jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import unicode_literals
	5
	6	import asyncio
	7	import atexit
	8	import base64
	9	import binascii
	10	import calendar
	11	import codecs
	12	import collections
	13	import contextlib
	14	import ctypes
	15	import datetime
	16	import email.utils
	17	import email.header
	18	import errno
	19	import functools
	20	import gzip
	21	import hashlib
	22	import hmac
	23	import importlib.util
	24	import io
	25	import itertools
	26	import json
	27	import locale
	28	import math
	29	import operator
	30	import os
	31	import platform
	32	import random
	33	import re
	34	import socket
	35	import ssl
	36	import subprocess
	37	import sys
	38	import tempfile
	39	import time
	40	import traceback
	41	import xml.etree.ElementTree
	42	import zlib
	43	import mimetypes
	44
	45	from .compat import (
	46	compat_HTMLParseError,
	47	compat_HTMLParser,
	48	compat_HTTPError,
	49	compat_basestring,
	50	compat_chr,
	51	compat_cookiejar,
	52	compat_ctypes_WINFUNCTYPE,
	53	compat_etree_fromstring,
	54	compat_expanduser,
	55	compat_html_entities,
	56	compat_html_entities_html5,
	57	compat_http_client,
	58	compat_integer_types,
	59	compat_numeric_types,
	60	compat_kwargs,
	61	compat_os_name,
	62	compat_parse_qs,
	63	compat_shlex_split,
	64	compat_shlex_quote,
	65	compat_str,
	66	compat_struct_pack,
	67	compat_struct_unpack,
	68	compat_urllib_error,
	69	compat_urllib_parse,
	70	compat_urllib_parse_urlencode,
	71	compat_urllib_parse_urlparse,
	72	compat_urllib_parse_urlunparse,
	73	compat_urllib_parse_quote,
	74	compat_urllib_parse_quote_plus,
	75	compat_urllib_parse_unquote_plus,
	76	compat_urllib_request,
	77	compat_urlparse,
	78	compat_websockets,
	79	compat_xpath,
	80	)
	81
	82	from .socks import (
	83	ProxyType,
	84	sockssocket,
	85	)
	86
	87
	88	def register_socks_protocols():
	89	# "Register" SOCKS protocols
	90	# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
	91	# URLs with protocols not in urlparse.uses_netloc are not handled correctly
	92	for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
	93	if scheme not in compat_urlparse.uses_netloc:
	94	compat_urlparse.uses_netloc.append(scheme)
	95
	96
	97	# This is not clearly defined otherwise
	98	compiled_regex_type = type(re.compile(''))
	99
	100
	101	def random_user_agent():
	102	_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
	103	_CHROME_VERSIONS = (
	104	'90.0.4430.212',
	105	'90.0.4430.24',
	106	'90.0.4430.70',
	107	'90.0.4430.72',
	108	'90.0.4430.85',
	109	'90.0.4430.93',
	110	'91.0.4472.101',
	111	'91.0.4472.106',
	112	'91.0.4472.114',
	113	'91.0.4472.124',
	114	'91.0.4472.164',
	115	'91.0.4472.19',
	116	'91.0.4472.77',
	117	'92.0.4515.107',
	118	'92.0.4515.115',
	119	'92.0.4515.131',
	120	'92.0.4515.159',
	121	'92.0.4515.43',
	122	'93.0.4556.0',
	123	'93.0.4577.15',
	124	'93.0.4577.63',
	125	'93.0.4577.82',
	126	'94.0.4606.41',
	127	'94.0.4606.54',
	128	'94.0.4606.61',
	129	'94.0.4606.71',
	130	'94.0.4606.81',
	131	'94.0.4606.85',
	132	'95.0.4638.17',
	133	'95.0.4638.50',
	134	'95.0.4638.54',
	135	'95.0.4638.69',
	136	'95.0.4638.74',
	137	'96.0.4664.18',
	138	'96.0.4664.45',
	139	'96.0.4664.55',
	140	'96.0.4664.93',
	141	'97.0.4692.20',
	142	)
	143	return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
	144
	145
	146	std_headers = {
	147	'User-Agent': random_user_agent(),
	148	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	149	'Accept-Encoding': 'gzip, deflate',
	150	'Accept-Language': 'en-us,en;q=0.5',
	151	'Sec-Fetch-Mode': 'navigate',
	152	}
	153
	154
	155	USER_AGENTS = {
	156	'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
	157	}
	158
	159
	160	NO_DEFAULT = object()
	161
	162	ENGLISH_MONTH_NAMES = [
	163	'January', 'February', 'March', 'April', 'May', 'June',
	164	'July', 'August', 'September', 'October', 'November', 'December']
	165
	166	MONTH_NAMES = {
	167	'en': ENGLISH_MONTH_NAMES,
	168	'fr': [
	169	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	170	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	171	}
	172
	173	KNOWN_EXTENSIONS = (
	174	'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
	175	'flv', 'f4v', 'f4a', 'f4b',
	176	'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
	177	'mkv', 'mka', 'mk3d',
	178	'avi', 'divx',
	179	'mov',
	180	'asf', 'wmv', 'wma',
	181	'3gp', '3g2',
	182	'mp3',
	183	'flac',
	184	'ape',
	185	'wav',
	186	'f4f', 'f4m', 'm3u8', 'smil')
	187
	188	# needed for sanitizing filenames in restricted mode
	189	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	190	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	191	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	192
	193	DATE_FORMATS = (
	194	'%d %B %Y',
	195	'%d %b %Y',
	196	'%B %d %Y',
	197	'%B %dst %Y',
	198	'%B %dnd %Y',
	199	'%B %drd %Y',
	200	'%B %dth %Y',
	201	'%b %d %Y',
	202	'%b %dst %Y',
	203	'%b %dnd %Y',
	204	'%b %drd %Y',
	205	'%b %dth %Y',
	206	'%b %dst %Y %I:%M',
	207	'%b %dnd %Y %I:%M',
	208	'%b %drd %Y %I:%M',
	209	'%b %dth %Y %I:%M',
	210	'%Y %m %d',
	211	'%Y-%m-%d',
	212	'%Y.%m.%d.',
	213	'%Y/%m/%d',
	214	'%Y/%m/%d %H:%M',
	215	'%Y/%m/%d %H:%M:%S',
	216	'%Y%m%d%H%M',
	217	'%Y%m%d%H%M%S',
	218	'%Y%m%d',
	219	'%Y-%m-%d %H:%M',
	220	'%Y-%m-%d %H:%M:%S',
	221	'%Y-%m-%d %H:%M:%S.%f',
	222	'%Y-%m-%d %H:%M:%S:%f',
	223	'%d.%m.%Y %H:%M',
	224	'%d.%m.%Y %H.%M',
	225	'%Y-%m-%dT%H:%M:%SZ',
	226	'%Y-%m-%dT%H:%M:%S.%fZ',
	227	'%Y-%m-%dT%H:%M:%S.%f0Z',
	228	'%Y-%m-%dT%H:%M:%S',
	229	'%Y-%m-%dT%H:%M:%S.%f',
	230	'%Y-%m-%dT%H:%M',
	231	'%b %d %Y at %H:%M',
	232	'%b %d %Y at %H:%M:%S',
	233	'%B %d %Y at %H:%M',
	234	'%B %d %Y at %H:%M:%S',
	235	'%H:%M %d-%b-%Y',
	236	)
	237
	238	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	239	DATE_FORMATS_DAY_FIRST.extend([
	240	'%d-%m-%Y',
	241	'%d.%m.%Y',
	242	'%d.%m.%y',
	243	'%d/%m/%Y',
	244	'%d/%m/%y',
	245	'%d/%m/%Y %H:%M:%S',
	246	])
	247
	248	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	249	DATE_FORMATS_MONTH_FIRST.extend([
	250	'%m-%d-%Y',
	251	'%m.%d.%Y',
	252	'%m/%d/%Y',
	253	'%m/%d/%y',
	254	'%m/%d/%Y %H:%M:%S',
	255	])
	256
	257	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	258	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
	259
	260
	261	def preferredencoding():
	262	"""Get preferred encoding.
	263
	264	Returns the best encoding scheme for the system, based on
	265	locale.getpreferredencoding() and some further tweaks.
	266	"""
	267	try:
	268	pref = locale.getpreferredencoding()
	269	'TEST'.encode(pref)
	270	except Exception:
	271	pref = 'UTF-8'
	272
	273	return pref
	274
	275
	276	def write_json_file(obj, fn):
	277	""" Encode obj as JSON and write it to fn, atomically if possible """
	278
	279	fn = encodeFilename(fn)
	280	if sys.version_info < (3, 0) and sys.platform != 'win32':
	281	encoding = get_filesystem_encoding()
	282	# os.path.basename returns a bytes object, but NamedTemporaryFile
	283	# will fail if the filename contains non ascii characters unless we
	284	# use a unicode object
	285	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	286	# the same for os.path.dirname
	287	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	288	else:
	289	path_basename = os.path.basename
	290	path_dirname = os.path.dirname
	291
	292	args = {
	293	'suffix': '.tmp',
	294	'prefix': path_basename(fn) + '.',
	295	'dir': path_dirname(fn),
	296	'delete': False,
	297	}
	298
	299	# In Python 2.x, json.dump expects a bytestream.
	300	# In Python 3.x, it writes to a character stream
	301	if sys.version_info < (3, 0):
	302	args['mode'] = 'wb'
	303	else:
	304	args.update({
	305	'mode': 'w',
	306	'encoding': 'utf-8',
	307	})
	308
	309	tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
	310
	311	try:
	312	with tf:
	313	json.dump(obj, tf, ensure_ascii=False)
	314	if sys.platform == 'win32':
	315	# Need to remove existing file on Windows, else os.rename raises
	316	# WindowsError or FileExistsError.
	317	try:
	318	os.unlink(fn)
	319	except OSError:
	320	pass
	321	try:
	322	mask = os.umask(0)
	323	os.umask(mask)
	324	os.chmod(tf.name, 0o666 & ~mask)
	325	except OSError:
	326	pass
	327	os.rename(tf.name, fn)
	328	except Exception:
	329	try:
	330	os.remove(tf.name)
	331	except OSError:
	332	pass
	333	raise
	334
	335
	336	if sys.version_info >= (2, 7):
	337	def find_xpath_attr(node, xpath, key, val=None):
	338	""" Find the xpath xpath[@key=val] """
	339	assert re.match(r'^[a-zA-Z_-]+$', key)
	340	expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
	341	return node.find(expr)
	342	else:
	343	def find_xpath_attr(node, xpath, key, val=None):
	344	for f in node.findall(compat_xpath(xpath)):
	345	if key not in f.attrib:
	346	continue
	347	if val is None or f.attrib.get(key) == val:
	348	return f
	349	return None
	350
	351	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	352	# the namespace parameter
	353
	354
	355	def xpath_with_ns(path, ns_map):
	356	components = [c.split(':') for c in path.split('/')]
	357	replaced = []
	358	for c in components:
	359	if len(c) == 1:
	360	replaced.append(c[0])
	361	else:
	362	ns, tag = c
	363	replaced.append('{%s}%s' % (ns_map[ns], tag))
	364	return '/'.join(replaced)
	365
	366
	367	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	368	def _find_xpath(xpath):
	369	return node.find(compat_xpath(xpath))
	370
	371	if isinstance(xpath, (str, compat_str)):
	372	n = _find_xpath(xpath)
	373	else:
	374	for xp in xpath:
	375	n = _find_xpath(xp)
	376	if n is not None:
	377	break
	378
	379	if n is None:
	380	if default is not NO_DEFAULT:
	381	return default
	382	elif fatal:
	383	name = xpath if name is None else name
	384	raise ExtractorError('Could not find XML element %s' % name)
	385	else:
	386	return None
	387	return n
	388
	389
	390	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	391	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	392	if n is None or n == default:
	393	return n
	394	if n.text is None:
	395	if default is not NO_DEFAULT:
	396	return default
	397	elif fatal:
	398	name = xpath if name is None else name
	399	raise ExtractorError('Could not find XML element\'s text %s' % name)
	400	else:
	401	return None
	402	return n.text
	403
	404
	405	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	406	n = find_xpath_attr(node, xpath, key)
	407	if n is None:
	408	if default is not NO_DEFAULT:
	409	return default
	410	elif fatal:
	411	name = '%s[@%s]' % (xpath, key) if name is None else name
	412	raise ExtractorError('Could not find XML attribute %s' % name)
	413	else:
	414	return None
	415	return n.attrib[key]
	416
	417
	418	def get_element_by_id(id, html):
	419	"""Return the content of the tag with the specified ID in the passed HTML document"""
	420	return get_element_by_attribute('id', id, html)
	421
	422
	423	def get_element_html_by_id(id, html):
	424	"""Return the html of the tag with the specified ID in the passed HTML document"""
	425	return get_element_html_by_attribute('id', id, html)
	426
	427
	428	def get_element_by_class(class_name, html):
	429	"""Return the content of the first tag with the specified class in the passed HTML document"""
	430	retval = get_elements_by_class(class_name, html)
	431	return retval[0] if retval else None
	432
	433
	434	def get_element_html_by_class(class_name, html):
	435	"""Return the html of the first tag with the specified class in the passed HTML document"""
	436	retval = get_elements_html_by_class(class_name, html)
	437	return retval[0] if retval else None
	438
	439
	440	def get_element_by_attribute(attribute, value, html, escape_value=True):
	441	retval = get_elements_by_attribute(attribute, value, html, escape_value)
	442	return retval[0] if retval else None
	443
	444
	445	def get_element_html_by_attribute(attribute, value, html, escape_value=True):
	446	retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
	447	return retval[0] if retval else None
	448
	449
	450	def get_elements_by_class(class_name, html):
	451	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	452	return get_elements_by_attribute(
	453	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	454	html, escape_value=False)
	455
	456
	457	def get_elements_html_by_class(class_name, html):
	458	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	459	return get_elements_html_by_attribute(
	460	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	461	html, escape_value=False)
	462
	463
	464	def get_elements_by_attribute(args, *kwargs):
	465	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	466	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	467
	468
	469	def get_elements_html_by_attribute(args, *kwargs):
	470	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	471	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	472
	473
	474	def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
	475	"""
	476	Return the text (content) and the html (whole) of the tag with the specified
	477	attribute in the passed HTML document
	478	"""
	479
	480	value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	481
	482	value = re.escape(value) if escape_value else value
	483
	484	partial_element_re = r'''(?x)
	485	<(?P<tag>[a-zA-Z0-9:._-]+)
	486	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	487	\s%(attribute)s\s=\s(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
	488	''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
	489
	490	for m in re.finditer(partial_element_re, html):
	491	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	492
	493	yield (
	494	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	495	whole
	496	)
	497
	498
	499	class HTMLBreakOnClosingTagParser(compat_HTMLParser):
	500	"""
	501	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	502	closing tag for the first opening tag it has encountered, and can be used
	503	as a context manager
	504	"""
	505
	506	class HTMLBreakOnClosingTagException(Exception):
	507	pass
	508
	509	def __init__(self):
	510	self.tagstack = collections.deque()
	511	compat_HTMLParser.__init__(self)
	512
	513	def __enter__(self):
	514	return self
	515
	516	def __exit__(self, *_):
	517	self.close()
	518
	519	def close(self):
	520	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	521	# so data remains buffered; we no longer have any interest in it, thus
	522	# override this method to discard it
	523	pass
	524
	525	def handle_starttag(self, tag, _):
	526	self.tagstack.append(tag)
	527
	528	def handle_endtag(self, tag):
	529	if not self.tagstack:
	530	raise compat_HTMLParseError('no tags in the stack')
	531	while self.tagstack:
	532	inner_tag = self.tagstack.pop()
	533	if inner_tag == tag:
	534	break
	535	else:
	536	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	537	if not self.tagstack:
	538	raise self.HTMLBreakOnClosingTagException()
	539
	540
	541	def get_element_text_and_html_by_tag(tag, html):
	542	"""
	543	For the first element with the specified tag in the passed HTML document
	544	return its' content (text) and the whole element (html)
	545	"""
	546	def find_or_raise(haystack, needle, exc):
	547	try:
	548	return haystack.index(needle)
	549	except ValueError:
	550	raise exc
	551	closing_tag = f'</{tag}>'
	552	whole_start = find_or_raise(
	553	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	554	content_start = find_or_raise(
	555	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	556	content_start += whole_start + 1
	557	with HTMLBreakOnClosingTagParser() as parser:
	558	parser.feed(html[whole_start:content_start])
	559	if not parser.tagstack or parser.tagstack[0] != tag:
	560	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	561	offset = content_start
	562	while offset < len(html):
	563	next_closing_tag_start = find_or_raise(
	564	html[offset:], closing_tag,
	565	compat_HTMLParseError(f'closing {tag} tag not found'))
	566	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	567	try:
	568	parser.feed(html[offset:offset + next_closing_tag_end])
	569	offset += next_closing_tag_end
	570	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	571	return html[content_start:offset + next_closing_tag_start], \
	572	html[whole_start:offset + next_closing_tag_end]
	573	raise compat_HTMLParseError('unexpected end of html')
	574
	575
	576	class HTMLAttributeParser(compat_HTMLParser):
	577	"""Trivial HTML parser to gather the attributes for a single element"""
	578
	579	def __init__(self):
	580	self.attrs = {}
	581	compat_HTMLParser.__init__(self)
	582
	583	def handle_starttag(self, tag, attrs):
	584	self.attrs = dict(attrs)
	585
	586
	587	class HTMLListAttrsParser(compat_HTMLParser):
	588	"""HTML parser to gather the attributes for the elements of a list"""
	589
	590	def __init__(self):
	591	compat_HTMLParser.__init__(self)
	592	self.items = []
	593	self._level = 0
	594
	595	def handle_starttag(self, tag, attrs):
	596	if tag == 'li' and self._level == 0:
	597	self.items.append(dict(attrs))
	598	self._level += 1
	599
	600	def handle_endtag(self, tag):
	601	self._level -= 1
	602
	603
	604	def extract_attributes(html_element):
	605	"""Given a string for an HTML element such as
	606	<el
	607	a="foo" B="bar" c="&98;az" d=boz
	608	empty= noval entity="&"
	609	sq='"' dq="'"
	610	>
	611	Decode and return a dictionary of attributes.
	612	{
	613	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	614	'empty': '', 'noval': None, 'entity': '&',
	615	'sq': '"', 'dq': '\''
	616	}.
	617	NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
	618	but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
	619	"""
	620	parser = HTMLAttributeParser()
	621	try:
	622	parser.feed(html_element)
	623	parser.close()
	624	# Older Python may throw HTMLParseError in case of malformed HTML
	625	except compat_HTMLParseError:
	626	pass
	627	return parser.attrs
	628
	629
	630	def parse_list(webpage):
	631	"""Given a string for an series of HTML <li> elements,
	632	return a dictionary of their attributes"""
	633	parser = HTMLListAttrsParser()
	634	parser.feed(webpage)
	635	parser.close()
	636	return parser.items
	637
	638
	639	def clean_html(html):
	640	"""Clean an HTML snippet into a readable string"""
	641
	642	if html is None: # Convenience for sanitizing descriptions etc.
	643	return html
	644
	645	html = re.sub(r'\s+', ' ', html)
	646	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	647	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	648	# Strip html tags
	649	html = re.sub('<.*?>', '', html)
	650	# Replace html entities
	651	html = unescapeHTML(html)
	652	return html.strip()
	653
	654
	655	def sanitize_open(filename, open_mode):
	656	"""Try to open the given filename, and slightly tweak it if this fails.
	657
	658	Attempts to open the given filename. If this fails, it tries to change
	659	the filename slightly, step by step, until it's either able to open it
	660	or it fails and raises a final exception, like the standard open()
	661	function.
	662
	663	It returns the tuple (stream, definitive_file_name).
	664	"""
	665	try:
	666	if filename == '-':
	667	if sys.platform == 'win32':
	668	import msvcrt
	669	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	670	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	671	stream = locked_file(filename, open_mode, block=False).open()
	672	return (stream, filename)
	673	except (IOError, OSError) as err:
	674	if err.errno in (errno.EACCES,):
	675	raise
	676
	677	# In case of error, try to remove win32 forbidden chars
	678	alt_filename = sanitize_path(filename)
	679	if alt_filename == filename:
	680	raise
	681	else:
	682	# An exception here should be caught in the caller
	683	stream = locked_file(filename, open_mode, block=False).open()
	684	return (stream, alt_filename)
	685
	686
	687	def timeconvert(timestr):
	688	"""Convert RFC 2822 defined time string into system timestamp"""
	689	timestamp = None
	690	timetuple = email.utils.parsedate_tz(timestr)
	691	if timetuple is not None:
	692	timestamp = email.utils.mktime_tz(timetuple)
	693	return timestamp
	694
	695
	696	def sanitize_filename(s, restricted=False, is_id=False):
	697	"""Sanitizes a string so it could be used as part of a filename.
	698	If restricted is set, use a stricter subset of allowed characters.
	699	Set is_id if this is not an arbitrary string, but an ID that should be kept
	700	if possible.
	701	"""
	702	def replace_insane(char):
	703	if restricted and char in ACCENT_CHARS:
	704	return ACCENT_CHARS[char]
	705	elif not restricted and char == '\n':
	706	return ' '
	707	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	708	return ''
	709	elif char == '"':
	710	return '' if restricted else '\''
	711	elif char == ':':
	712	return '_-' if restricted else ' -'
	713	elif char in '\\/\|*<>':
	714	return '_'
	715	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace()):
	716	return '_'
	717	if restricted and ord(char) > 127:
	718	return '_'
	719	return char
	720
	721	if s == '':
	722	return ''
	723	# Handle timestamps
	724	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s)
	725	result = ''.join(map(replace_insane, s))
	726	if not is_id:
	727	while '__' in result:
	728	result = result.replace('__', '_')
	729	result = result.strip('_')
	730	# Common case of "Foreign band name - English song title"
	731	if restricted and result.startswith('-_'):
	732	result = result[2:]
	733	if result.startswith('-'):
	734	result = '_' + result[len('-'):]
	735	result = result.lstrip('.')
	736	if not result:
	737	result = '_'
	738	return result
	739
	740
	741	def sanitize_path(s, force=False):
	742	"""Sanitizes and normalizes path on Windows"""
	743	if sys.platform == 'win32':
	744	force = False
	745	drive_or_unc, _ = os.path.splitdrive(s)
	746	if sys.version_info < (2, 7) and not drive_or_unc:
	747	drive_or_unc, _ = os.path.splitunc(s)
	748	elif force:
	749	drive_or_unc = ''
	750	else:
	751	return s
	752
	753	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	754	if drive_or_unc:
	755	norm_path.pop(0)
	756	sanitized_path = [
	757	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	758	for path_part in norm_path]
	759	if drive_or_unc:
	760	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	761	elif force and s[0] == os.path.sep:
	762	sanitized_path.insert(0, os.path.sep)
	763	return os.path.join(*sanitized_path)
	764
	765
	766	def sanitize_url(url):
	767	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	768	# the number of unwanted failures due to missing protocol
	769	if url.startswith('//'):
	770	return 'http:%s' % url
	771	# Fix some common typos seen so far
	772	COMMON_TYPOS = (
	773	# https://github.com/ytdl-org/youtube-dl/issues/15649
	774	(r'^httpss://', r'https://'),
	775	# https://bx1.be/lives/direct-tv/
	776	(r'^rmtp([es]?)://', r'rtmp\1://'),
	777	)
	778	for mistake, fixup in COMMON_TYPOS:
	779	if re.match(mistake, url):
	780	return re.sub(mistake, fixup, url)
	781	return url
	782
	783
	784	def extract_basic_auth(url):
	785	parts = compat_urlparse.urlsplit(url)
	786	if parts.username is None:
	787	return url, None
	788	url = compat_urlparse.urlunsplit(parts._replace(netloc=(
	789	parts.hostname if parts.port is None
	790	else '%s:%d' % (parts.hostname, parts.port))))
	791	auth_payload = base64.b64encode(
	792	('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
	793	return url, 'Basic ' + auth_payload.decode('utf-8')
	794
	795
	796	def sanitized_Request(url, args, *kwargs):
	797	url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
	798	if auth_header is not None:
	799	headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
	800	headers['Authorization'] = auth_header
	801	return compat_urllib_request.Request(url, args, *kwargs)
	802
	803
	804	def expand_path(s):
	805	"""Expand shell variables and ~"""
	806	return os.path.expandvars(compat_expanduser(s))
	807
	808
	809	def orderedSet(iterable):
	810	""" Remove all duplicates from the input iterable """
	811	res = []
	812	for el in iterable:
	813	if el not in res:
	814	res.append(el)
	815	return res
	816
	817
	818	def _htmlentity_transform(entity_with_semicolon):
	819	"""Transforms an HTML entity to a character."""
	820	entity = entity_with_semicolon[:-1]
	821
	822	# Known non-numeric HTML entity
	823	if entity in compat_html_entities.name2codepoint:
	824	return compat_chr(compat_html_entities.name2codepoint[entity])
	825
	826	# TODO: HTML5 allows entities without a semicolon. For example,
	827	# '&Eacuteric' should be decoded as 'Éric'.
	828	if entity_with_semicolon in compat_html_entities_html5:
	829	return compat_html_entities_html5[entity_with_semicolon]
	830
	831	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	832	if mobj is not None:
	833	numstr = mobj.group(1)
	834	if numstr.startswith('x'):
	835	base = 16
	836	numstr = '0%s' % numstr
	837	else:
	838	base = 10
	839	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	840	try:
	841	return compat_chr(int(numstr, base))
	842	except ValueError:
	843	pass
	844
	845	# Unknown entity in name, return its literal representation
	846	return '&%s;' % entity
	847
	848
	849	def unescapeHTML(s):
	850	if s is None:
	851	return None
	852	assert type(s) == compat_str
	853
	854	return re.sub(
	855	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	856
	857
	858	def escapeHTML(text):
	859	return (
	860	text
	861	.replace('&', '&')
	862	.replace('<', '<')
	863	.replace('>', '>')
	864	.replace('"', '"')
	865	.replace("'", ''')
	866	)
	867
	868
	869	def process_communicate_or_kill(p, args, *kwargs):
	870	try:
	871	return p.communicate(args, *kwargs)
	872	except BaseException: # Including KeyboardInterrupt
	873	p.kill()
	874	p.wait()
	875	raise
	876
	877
	878	class Popen(subprocess.Popen):
	879	if sys.platform == 'win32':
	880	_startupinfo = subprocess.STARTUPINFO()
	881	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	882	else:
	883	_startupinfo = None
	884
	885	def __init__(self, args, *kwargs):
	886	super(Popen, self).__init__(args, *kwargs, startupinfo=self._startupinfo)
	887
	888	def communicate_or_kill(self, args, *kwargs):
	889	return process_communicate_or_kill(self, args, *kwargs)
	890
	891
	892	def get_subprocess_encoding():
	893	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	894	# For subprocess calls, encode with locale encoding
	895	# Refer to http://stackoverflow.com/a/9951851/35070
	896	encoding = preferredencoding()
	897	else:
	898	encoding = sys.getfilesystemencoding()
	899	if encoding is None:
	900	encoding = 'utf-8'
	901	return encoding
	902
	903
	904	def encodeFilename(s, for_subprocess=False):
	905	"""
	906	@param s The name of the file
	907	"""
	908
	909	assert type(s) == compat_str
	910
	911	# Python 3 has a Unicode API
	912	if sys.version_info >= (3, 0):
	913	return s
	914
	915	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	916	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	917	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	918	if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	919	return s
	920
	921	# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
	922	if sys.platform.startswith('java'):
	923	return s
	924
	925	return s.encode(get_subprocess_encoding(), 'ignore')
	926
	927
	928	def decodeFilename(b, for_subprocess=False):
	929
	930	if sys.version_info >= (3, 0):
	931	return b
	932
	933	if not isinstance(b, bytes):
	934	return b
	935
	936	return b.decode(get_subprocess_encoding(), 'ignore')
	937
	938
	939	def encodeArgument(s):
	940	if not isinstance(s, compat_str):
	941	# Legacy code that uses byte strings
	942	# Uncomment the following line after fixing all post processors
	943	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	944	s = s.decode('ascii')
	945	return encodeFilename(s, True)
	946
	947
	948	def decodeArgument(b):
	949	return decodeFilename(b, True)
	950
	951
	952	def decodeOption(optval):
	953	if optval is None:
	954	return optval
	955	if isinstance(optval, bytes):
	956	optval = optval.decode(preferredencoding())
	957
	958	assert isinstance(optval, compat_str)
	959	return optval
	960
	961
	962	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	963
	964
	965	def timetuple_from_msec(msec):
	966	secs, msec = divmod(msec, 1000)
	967	mins, secs = divmod(secs, 60)
	968	hrs, mins = divmod(mins, 60)
	969	return _timetuple(hrs, mins, secs, msec)
	970
	971
	972	def formatSeconds(secs, delim=':', msec=False):
	973	time = timetuple_from_msec(secs * 1000)
	974	if time.hours:
	975	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	976	elif time.minutes:
	977	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	978	else:
	979	ret = '%d' % time.seconds
	980	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	981
	982
	983	def _ssl_load_windows_store_certs(ssl_context, storename):
	984	# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
	985	try:
	986	certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
	987	if encoding == 'x509_asn' and (
	988	trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
	989	except PermissionError:
	990	return
	991	for cert in certs:
	992	try:
	993	ssl_context.load_verify_locations(cadata=cert)
	994	except ssl.SSLError:
	995	pass
	996
	997
	998	def make_HTTPS_handler(params, **kwargs):
	999	opts_check_certificate = not params.get('nocheckcertificate')
	1000	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	1001	context.check_hostname = opts_check_certificate
	1002	if params.get('legacyserverconnect'):
	1003	context.options \|= 4 # SSL_OP_LEGACY_SERVER_CONNECT
	1004	context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
	1005	if opts_check_certificate:
	1006	try:
	1007	context.load_default_certs()
	1008	# Work around the issue in load_default_certs when there are bad certificates. See:
	1009	# https://github.com/yt-dlp/yt-dlp/issues/1060,
	1010	# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
	1011	except ssl.SSLError:
	1012	# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
	1013	if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
	1014	# Create a new context to discard any certificates that were already loaded
	1015	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	1016	context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
	1017	for storename in ('CA', 'ROOT'):
	1018	_ssl_load_windows_store_certs(context, storename)
	1019	context.set_default_verify_paths()
	1020	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	1021
	1022
	1023	def bug_reports_message(before=';'):
	1024	msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , '
	1025	'filling out the "Broken site" issue template properly. '
	1026	'Confirm you are on the latest version using -U')
	1027
	1028	before = before.rstrip()
	1029	if not before or before.endswith(('.', '!', '?')):
	1030	msg = msg[0].title() + msg[1:]
	1031
	1032	return (before + ' ' if before else '') + msg
	1033
	1034
	1035	class YoutubeDLError(Exception):
	1036	"""Base exception for YoutubeDL errors."""
	1037	msg = None
	1038
	1039	def __init__(self, msg=None):
	1040	if msg is not None:
	1041	self.msg = msg
	1042	elif self.msg is None:
	1043	self.msg = type(self).__name__
	1044	super().__init__(self.msg)
	1045
	1046
	1047	network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
	1048	if hasattr(ssl, 'CertificateError'):
	1049	network_exceptions.append(ssl.CertificateError)
	1050	network_exceptions = tuple(network_exceptions)
	1051
	1052
	1053	class ExtractorError(YoutubeDLError):
	1054	"""Error during info extraction."""
	1055
	1056	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	1057	""" tb, if given, is the original traceback (so that it can be printed out).
	1058	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	1059	"""
	1060	if sys.exc_info()[0] in network_exceptions:
	1061	expected = True
	1062
	1063	self.orig_msg = str(msg)
	1064	self.traceback = tb
	1065	self.expected = expected
	1066	self.cause = cause
	1067	self.video_id = video_id
	1068	self.ie = ie
	1069	self.exc_info = sys.exc_info() # preserve original exception
	1070
	1071	super(ExtractorError, self).__init__(''.join((
	1072	format_field(ie, template='[%s] '),
	1073	format_field(video_id, template='%s: '),
	1074	msg,
	1075	format_field(cause, template=' (caused by %r)'),
	1076	'' if expected else bug_reports_message())))
	1077
	1078	def format_traceback(self):
	1079	return join_nonempty(
	1080	self.traceback and ''.join(traceback.format_tb(self.traceback)),
	1081	self.cause and ''.join(traceback.format_exception(self.cause)[1:]),
	1082	delim='\n') or None
	1083
	1084
	1085	class UnsupportedError(ExtractorError):
	1086	def __init__(self, url):
	1087	super(UnsupportedError, self).__init__(
	1088	'Unsupported URL: %s' % url, expected=True)
	1089	self.url = url
	1090
	1091
	1092	class RegexNotFoundError(ExtractorError):
	1093	"""Error when a regex didn't match"""
	1094	pass
	1095
	1096
	1097	class GeoRestrictedError(ExtractorError):
	1098	"""Geographic restriction Error exception.
	1099
	1100	This exception may be thrown when a video is not available from your
	1101	geographic location due to geographic restrictions imposed by a website.
	1102	"""
	1103
	1104	def __init__(self, msg, countries=None, **kwargs):
	1105	kwargs['expected'] = True
	1106	super(GeoRestrictedError, self).__init__(msg, **kwargs)
	1107	self.countries = countries
	1108
	1109
	1110	class DownloadError(YoutubeDLError):
	1111	"""Download Error exception.
	1112
	1113	This exception may be thrown by FileDownloader objects if they are not
	1114	configured to continue on errors. They will contain the appropriate
	1115	error message.
	1116	"""
	1117
	1118	def __init__(self, msg, exc_info=None):
	1119	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1120	super(DownloadError, self).__init__(msg)
	1121	self.exc_info = exc_info
	1122
	1123
	1124	class EntryNotInPlaylist(YoutubeDLError):
	1125	"""Entry not in playlist exception.
	1126
	1127	This exception will be thrown by YoutubeDL when a requested entry
	1128	is not found in the playlist info_dict
	1129	"""
	1130	msg = 'Entry not found in info'
	1131
	1132
	1133	class SameFileError(YoutubeDLError):
	1134	"""Same File exception.
	1135
	1136	This exception will be thrown by FileDownloader objects if they detect
	1137	multiple files would have to be downloaded to the same file on disk.
	1138	"""
	1139	msg = 'Fixed output name but more than one file to download'
	1140
	1141	def __init__(self, filename=None):
	1142	if filename is not None:
	1143	self.msg += f': {filename}'
	1144	super().__init__(self.msg)
	1145
	1146
	1147	class PostProcessingError(YoutubeDLError):
	1148	"""Post Processing exception.
	1149
	1150	This exception may be raised by PostProcessor's .run() method to
	1151	indicate an error in the postprocessing task.
	1152	"""
	1153
	1154
	1155	class DownloadCancelled(YoutubeDLError):
	1156	""" Exception raised when the download queue should be interrupted """
	1157	msg = 'The download was cancelled'
	1158
	1159
	1160	class ExistingVideoReached(DownloadCancelled):
	1161	""" --break-on-existing triggered """
	1162	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1163
	1164
	1165	class RejectedVideoReached(DownloadCancelled):
	1166	""" --break-on-reject triggered """
	1167	msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
	1168
	1169
	1170	class MaxDownloadsReached(DownloadCancelled):
	1171	""" --max-downloads limit has been reached. """
	1172	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1173
	1174
	1175	class ReExtractInfo(YoutubeDLError):
	1176	""" Video info needs to be re-extracted. """
	1177
	1178	def __init__(self, msg, expected=False):
	1179	super().__init__(msg)
	1180	self.expected = expected
	1181
	1182
	1183	class ThrottledDownload(ReExtractInfo):
	1184	""" Download speed below --throttled-rate. """
	1185	msg = 'The download speed is below throttle limit'
	1186
	1187	def __init__(self):
	1188	super().__init__(self.msg, expected=False)
	1189
	1190
	1191	class UnavailableVideoError(YoutubeDLError):
	1192	"""Unavailable Format exception.
	1193
	1194	This exception will be thrown when a video is requested
	1195	in a format that is not available for that video.
	1196	"""
	1197	msg = 'Unable to download video'
	1198
	1199	def __init__(self, err=None):
	1200	if err is not None:
	1201	self.msg += f': {err}'
	1202	super().__init__(self.msg)
	1203
	1204
	1205	class ContentTooShortError(YoutubeDLError):
	1206	"""Content Too Short exception.
	1207
	1208	This exception may be raised by FileDownloader objects when a file they
	1209	download is too small for what the server announced first, indicating
	1210	the connection was probably interrupted.
	1211	"""
	1212
	1213	def __init__(self, downloaded, expected):
	1214	super(ContentTooShortError, self).__init__(
	1215	'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
	1216	)
	1217	# Both in bytes
	1218	self.downloaded = downloaded
	1219	self.expected = expected
	1220
	1221
	1222	class XAttrMetadataError(YoutubeDLError):
	1223	def __init__(self, code=None, msg='Unknown error'):
	1224	super(XAttrMetadataError, self).__init__(msg)
	1225	self.code = code
	1226	self.msg = msg
	1227
	1228	# Parsing code and msg
	1229	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1230	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1231	self.reason = 'NO_SPACE'
	1232	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1233	self.reason = 'VALUE_TOO_LONG'
	1234	else:
	1235	self.reason = 'NOT_SUPPORTED'
	1236
	1237
	1238	class XAttrUnavailableError(YoutubeDLError):
	1239	pass
	1240
	1241
	1242	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	1243	# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
	1244	# expected HTTP responses to meet HTTP/1.0 or later (see also
	1245	# https://github.com/ytdl-org/youtube-dl/issues/6727)
	1246	if sys.version_info < (3, 0):
	1247	kwargs['strict'] = True
	1248	hc = http_class(args, *compat_kwargs(kwargs))
	1249	source_address = ydl_handler._params.get('source_address')
	1250
	1251	if source_address is not None:
	1252	# This is to workaround _create_connection() from socket where it will try all
	1253	# address data from getaddrinfo() including IPv6. This filters the result from
	1254	# getaddrinfo() based on the source_address value.
	1255	# This is based on the cpython socket.create_connection() function.
	1256	# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
	1257	def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
	1258	host, port = address
	1259	err = None
	1260	addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
	1261	af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
	1262	ip_addrs = [addr for addr in addrs if addr[0] == af]
	1263	if addrs and not ip_addrs:
	1264	ip_version = 'v4' if af == socket.AF_INET else 'v6'
	1265	raise socket.error(
	1266	"No remote IP%s addresses available for connect, can't use '%s' as source address"
	1267	% (ip_version, source_address[0]))
	1268	for res in ip_addrs:
	1269	af, socktype, proto, canonname, sa = res
	1270	sock = None
	1271	try:
	1272	sock = socket.socket(af, socktype, proto)
	1273	if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
	1274	sock.settimeout(timeout)
	1275	sock.bind(source_address)
	1276	sock.connect(sa)
	1277	err = None # Explicitly break reference cycle
	1278	return sock
	1279	except socket.error as _:
	1280	err = _
	1281	if sock is not None:
	1282	sock.close()
	1283	if err is not None:
	1284	raise err
	1285	else:
	1286	raise socket.error('getaddrinfo returns an empty list')
	1287	if hasattr(hc, '_create_connection'):
	1288	hc._create_connection = _create_connection
	1289	sa = (source_address, 0)
	1290	if hasattr(hc, 'source_address'): # Python 2.7+
	1291	hc.source_address = sa
	1292	else: # Python 2.6
	1293	def _hc_connect(self, args, *kwargs):
	1294	sock = _create_connection(
	1295	(self.host, self.port), self.timeout, sa)
	1296	if is_https:
	1297	self.sock = ssl.wrap_socket(
	1298	sock, self.key_file, self.cert_file,
	1299	ssl_version=ssl.PROTOCOL_TLSv1)
	1300	else:
	1301	self.sock = sock
	1302	hc.connect = functools.partial(_hc_connect, hc)
	1303
	1304	return hc
	1305
	1306
	1307	def handle_youtubedl_headers(headers):
	1308	filtered_headers = headers
	1309
	1310	if 'Youtubedl-no-compression' in filtered_headers:
	1311	filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
	1312	del filtered_headers['Youtubedl-no-compression']
	1313
	1314	return filtered_headers
	1315
	1316
	1317	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	1318	"""Handler for HTTP requests and responses.
	1319
	1320	This class, when installed with an OpenerDirector, automatically adds
	1321	the standard headers to every HTTP request and handles gzipped and
	1322	deflated responses from web servers. If compression is to be avoided in
	1323	a particular request, the original request in the program code only has
	1324	to include the HTTP header "Youtubedl-no-compression", which will be
	1325	removed before making the real request.
	1326
	1327	Part of this code was copied from:
	1328
	1329	http://techknack.net/python-urllib2-handlers/
	1330
	1331	Andrew Rowls, the author of that code, agreed to release it to the
	1332	public domain.
	1333	"""
	1334
	1335	def __init__(self, params, args, *kwargs):
	1336	compat_urllib_request.HTTPHandler.__init__(self, args, *kwargs)
	1337	self._params = params
	1338
	1339	def http_open(self, req):
	1340	conn_class = compat_http_client.HTTPConnection
	1341
	1342	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1343	if socks_proxy:
	1344	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1345	del req.headers['Ytdl-socks-proxy']
	1346
	1347	return self.do_open(functools.partial(
	1348	_create_http_connection, self, conn_class, False),
	1349	req)
	1350
	1351	@staticmethod
	1352	def deflate(data):
	1353	if not data:
	1354	return data
	1355	try:
	1356	return zlib.decompress(data, -zlib.MAX_WBITS)
	1357	except zlib.error:
	1358	return zlib.decompress(data)
	1359
	1360	def http_request(self, req):
	1361	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	1362	# always respected by websites, some tend to give out URLs with non percent-encoded
	1363	# non-ASCII characters (see telemb.py, ard.py [#3412])
	1364	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	1365	# To work around aforementioned issue we will replace request's original URL with
	1366	# percent-encoded one
	1367	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	1368	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	1369	url = req.get_full_url()
	1370	url_escaped = escape_url(url)
	1371
	1372	# Substitute URL if any change after escaping
	1373	if url != url_escaped:
	1374	req = update_Request(req, url=url_escaped)
	1375
	1376	for h, v in self._params.get('http_headers', std_headers).items():
	1377	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	1378	# The dict keys are capitalized because of this bug by urllib
	1379	if h.capitalize() not in req.headers:
	1380	req.add_header(h, v)
	1381
	1382	req.headers = handle_youtubedl_headers(req.headers)
	1383
	1384	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	1385	# Python 2.6 is brain-dead when it comes to fragments
	1386	req._Request__original = req._Request__original.partition('#')[0]
	1387	req._Request__r_type = req._Request__r_type.partition('#')[0]
	1388
	1389	return req
	1390
	1391	def http_response(self, req, resp):
	1392	old_resp = resp
	1393	# gzip
	1394	if resp.headers.get('Content-encoding', '') == 'gzip':
	1395	content = resp.read()
	1396	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	1397	try:
	1398	uncompressed = io.BytesIO(gz.read())
	1399	except IOError as original_ioerror:
	1400	# There may be junk add the end of the file
	1401	# See http://stackoverflow.com/q/4928560/35070 for details
	1402	for i in range(1, 1024):
	1403	try:
	1404	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	1405	uncompressed = io.BytesIO(gz.read())
	1406	except IOError:
	1407	continue
	1408	break
	1409	else:
	1410	raise original_ioerror
	1411	resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	1412	resp.msg = old_resp.msg
	1413	del resp.headers['Content-encoding']
	1414	# deflate
	1415	if resp.headers.get('Content-encoding', '') == 'deflate':
	1416	gz = io.BytesIO(self.deflate(resp.read()))
	1417	resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
	1418	resp.msg = old_resp.msg
	1419	del resp.headers['Content-encoding']
	1420	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
	1421	# https://github.com/ytdl-org/youtube-dl/issues/6457).
	1422	if 300 <= resp.code < 400:
	1423	location = resp.headers.get('Location')
	1424	if location:
	1425	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	1426	if sys.version_info >= (3, 0):
	1427	location = location.encode('iso-8859-1').decode('utf-8')
	1428	else:
	1429	location = location.decode('utf-8')
	1430	location_escaped = escape_url(location)
	1431	if location != location_escaped:
	1432	del resp.headers['Location']
	1433	if sys.version_info < (3, 0):
	1434	location_escaped = location_escaped.encode('utf-8')
	1435	resp.headers['Location'] = location_escaped
	1436	return resp
	1437
	1438	https_request = http_request
	1439	https_response = http_response
	1440
	1441
	1442	def make_socks_conn_class(base_class, socks_proxy):
	1443	assert issubclass(base_class, (
	1444	compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
	1445
	1446	url_components = compat_urlparse.urlparse(socks_proxy)
	1447	if url_components.scheme.lower() == 'socks5':
	1448	socks_type = ProxyType.SOCKS5
	1449	elif url_components.scheme.lower() in ('socks', 'socks4'):
	1450	socks_type = ProxyType.SOCKS4
	1451	elif url_components.scheme.lower() == 'socks4a':
	1452	socks_type = ProxyType.SOCKS4A
	1453
	1454	def unquote_if_non_empty(s):
	1455	if not s:
	1456	return s
	1457	return compat_urllib_parse_unquote_plus(s)
	1458
	1459	proxy_args = (
	1460	socks_type,
	1461	url_components.hostname, url_components.port or 1080,
	1462	True, # Remote DNS
	1463	unquote_if_non_empty(url_components.username),
	1464	unquote_if_non_empty(url_components.password),
	1465	)
	1466
	1467	class SocksConnection(base_class):
	1468	def connect(self):
	1469	self.sock = sockssocket()
	1470	self.sock.setproxy(*proxy_args)
	1471	if type(self.timeout) in (int, float):
	1472	self.sock.settimeout(self.timeout)
	1473	self.sock.connect((self.host, self.port))
	1474
	1475	if isinstance(self, compat_http_client.HTTPSConnection):
	1476	if hasattr(self, '_context'): # Python > 2.6
	1477	self.sock = self._context.wrap_socket(
	1478	self.sock, server_hostname=self.host)
	1479	else:
	1480	self.sock = ssl.wrap_socket(self.sock)
	1481
	1482	return SocksConnection
	1483
	1484
	1485	class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
	1486	def __init__(self, params, https_conn_class=None, args, *kwargs):
	1487	compat_urllib_request.HTTPSHandler.__init__(self, args, *kwargs)
	1488	self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
	1489	self._params = params
	1490
	1491	def https_open(self, req):
	1492	kwargs = {}
	1493	conn_class = self._https_conn_class
	1494
	1495	if hasattr(self, '_context'): # python > 2.6
	1496	kwargs['context'] = self._context
	1497	if hasattr(self, '_check_hostname'): # python 3.x
	1498	kwargs['check_hostname'] = self._check_hostname
	1499
	1500	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1501	if socks_proxy:
	1502	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1503	del req.headers['Ytdl-socks-proxy']
	1504
	1505	return self.do_open(functools.partial(
	1506	_create_http_connection, self, conn_class, True),
	1507	req, **kwargs)
	1508
	1509
	1510	class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
	1511	"""
	1512	See [1] for cookie file format.
	1513
	1514	1. https://curl.haxx.se/docs/http-cookies.html
	1515	"""
	1516	_HTTPONLY_PREFIX = '#HttpOnly_'
	1517	_ENTRY_LEN = 7
	1518	_HEADER = '''# Netscape HTTP Cookie File
	1519	# This file is generated by yt-dlp. Do not edit.
	1520
	1521	'''
	1522	_CookieFileEntry = collections.namedtuple(
	1523	'CookieFileEntry',
	1524	('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
	1525
	1526	def save(self, filename=None, ignore_discard=False, ignore_expires=False):
	1527	"""
	1528	Save cookies to a file.
	1529
	1530	Most of the code is taken from CPython 3.8 and slightly adapted
	1531	to support cookie files with UTF-8 in both python 2 and 3.
	1532	"""
	1533	if filename is None:
	1534	if self.filename is not None:
	1535	filename = self.filename
	1536	else:
	1537	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1538
	1539	# Store session cookies with `expires` set to 0 instead of an empty
	1540	# string
	1541	for cookie in self:
	1542	if cookie.expires is None:
	1543	cookie.expires = 0
	1544
	1545	with io.open(filename, 'w', encoding='utf-8') as f:
	1546	f.write(self._HEADER)
	1547	now = time.time()
	1548	for cookie in self:
	1549	if not ignore_discard and cookie.discard:
	1550	continue
	1551	if not ignore_expires and cookie.is_expired(now):
	1552	continue
	1553	if cookie.secure:
	1554	secure = 'TRUE'
	1555	else:
	1556	secure = 'FALSE'
	1557	if cookie.domain.startswith('.'):
	1558	initial_dot = 'TRUE'
	1559	else:
	1560	initial_dot = 'FALSE'
	1561	if cookie.expires is not None:
	1562	expires = compat_str(cookie.expires)
	1563	else:
	1564	expires = ''
	1565	if cookie.value is None:
	1566	# cookies.txt regards 'Set-Cookie: foo' as a cookie
	1567	# with no name, whereas http.cookiejar regards it as a
	1568	# cookie with no value.
	1569	name = ''
	1570	value = cookie.name
	1571	else:
	1572	name = cookie.name
	1573	value = cookie.value
	1574	f.write(
	1575	'\t'.join([cookie.domain, initial_dot, cookie.path,
	1576	secure, expires, name, value]) + '\n')
	1577
	1578	def load(self, filename=None, ignore_discard=False, ignore_expires=False):
	1579	"""Load cookies from a file."""
	1580	if filename is None:
	1581	if self.filename is not None:
	1582	filename = self.filename
	1583	else:
	1584	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1585
	1586	def prepare_line(line):
	1587	if line.startswith(self._HTTPONLY_PREFIX):
	1588	line = line[len(self._HTTPONLY_PREFIX):]
	1589	# comments and empty lines are fine
	1590	if line.startswith('#') or not line.strip():
	1591	return line
	1592	cookie_list = line.split('\t')
	1593	if len(cookie_list) != self._ENTRY_LEN:
	1594	raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
	1595	cookie = self._CookieFileEntry(*cookie_list)
	1596	if cookie.expires_at and not cookie.expires_at.isdigit():
	1597	raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
	1598	return line
	1599
	1600	cf = io.StringIO()
	1601	with io.open(filename, encoding='utf-8') as f:
	1602	for line in f:
	1603	try:
	1604	cf.write(prepare_line(line))
	1605	except compat_cookiejar.LoadError as e:
	1606	write_string(
	1607	'WARNING: skipping cookie file entry due to %s: %r\n'
	1608	% (e, line), sys.stderr)
	1609	continue
	1610	cf.seek(0)
	1611	self._really_load(cf, filename, ignore_discard, ignore_expires)
	1612	# Session cookies are denoted by either `expires` field set to
	1613	# an empty string or 0. MozillaCookieJar only recognizes the former
	1614	# (see [1]). So we need force the latter to be recognized as session
	1615	# cookies on our own.
	1616	# Session cookies may be important for cookies-based authentication,
	1617	# e.g. usually, when user does not check 'Remember me' check box while
	1618	# logging in on a site, some important cookies are stored as session
	1619	# cookies so that not recognizing them will result in failed login.
	1620	# 1. https://bugs.python.org/issue17164
	1621	for cookie in self:
	1622	# Treat `expires=0` cookies as session cookies
	1623	if cookie.expires == 0:
	1624	cookie.expires = None
	1625	cookie.discard = True
	1626
	1627
	1628	class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
	1629	def __init__(self, cookiejar=None):
	1630	compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
	1631
	1632	def http_response(self, request, response):
	1633	# Python 2 will choke on next HTTP request in row if there are non-ASCII
	1634	# characters in Set-Cookie HTTP header of last response (see
	1635	# https://github.com/ytdl-org/youtube-dl/issues/6769).
	1636	# In order to at least prevent crashing we will percent encode Set-Cookie
	1637	# header before HTTPCookieProcessor starts processing it.
	1638	# if sys.version_info < (3, 0) and response.headers:
	1639	# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
	1640	# set_cookie = response.headers.get(set_cookie_header)
	1641	# if set_cookie:
	1642	# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
	1643	# if set_cookie != set_cookie_escaped:
	1644	# del response.headers[set_cookie_header]
	1645	# response.headers[set_cookie_header] = set_cookie_escaped
	1646	return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
	1647
	1648	https_request = compat_urllib_request.HTTPCookieProcessor.http_request
	1649	https_response = http_response
	1650
	1651
	1652	class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
	1653	"""YoutubeDL redirect handler
	1654
	1655	The code is based on HTTPRedirectHandler implementation from CPython [1].
	1656
	1657	This redirect handler solves two issues:
	1658	- ensures redirect URL is always unicode under python 2
	1659	- introduces support for experimental HTTP response status code
	1660	308 Permanent Redirect [2] used by some sites [3]
	1661
	1662	1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
	1663	2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
	1664	3. https://github.com/ytdl-org/youtube-dl/issues/28768
	1665	"""
	1666
	1667	http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
	1668
	1669	def redirect_request(self, req, fp, code, msg, headers, newurl):
	1670	"""Return a Request or None in response to a redirect.
	1671
	1672	This is called by the http_error_30x methods when a
	1673	redirection response is received. If a redirection should
	1674	take place, return a new Request to allow http_error_30x to
	1675	perform the redirect. Otherwise, raise HTTPError if no-one
	1676	else should try to handle this url. Return None if you can't
	1677	but another Handler might.
	1678	"""
	1679	m = req.get_method()
	1680	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
	1681	or code in (301, 302, 303) and m == "POST")):
	1682	raise compat_HTTPError(req.full_url, code, msg, headers, fp)
	1683	# Strictly (according to RFC 2616), 301 or 302 in response to
	1684	# a POST MUST NOT cause a redirection without confirmation
	1685	# from the user (of urllib.request, in this case). In practice,
	1686	# essentially all clients do redirect in this case, so we do
	1687	# the same.
	1688
	1689	# On python 2 urlh.geturl() may sometimes return redirect URL
	1690	# as byte string instead of unicode. This workaround allows
	1691	# to force it always return unicode.
	1692	if sys.version_info[0] < 3:
	1693	newurl = compat_str(newurl)
	1694
	1695	# Be conciliant with URIs containing a space. This is mainly
	1696	# redundant with the more complete encoding done in http_error_302(),
	1697	# but it is kept for compatibility with other callers.
	1698	newurl = newurl.replace(' ', '%20')
	1699
	1700	CONTENT_HEADERS = ("content-length", "content-type")
	1701	# NB: don't use dict comprehension for python 2.6 compatibility
	1702	newheaders = dict((k, v) for k, v in req.headers.items()
	1703	if k.lower() not in CONTENT_HEADERS)
	1704	return compat_urllib_request.Request(
	1705	newurl, headers=newheaders, origin_req_host=req.origin_req_host,
	1706	unverifiable=True)
	1707
	1708
	1709	def extract_timezone(date_str):
	1710	m = re.search(
	1711	r'''(?x)
	1712	^.{8,}? # >=8 char non-TZ prefix, if present
	1713	(?P<tz>Z\| # just the UTC Z, or
	1714	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1715	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1716	[ ]? # optional space
	1717	(?P<sign>\+\|-) # +/-
	1718	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1719	$)
	1720	''', date_str)
	1721	if not m:
	1722	timezone = datetime.timedelta()
	1723	else:
	1724	date_str = date_str[:-len(m.group('tz'))]
	1725	if not m.group('sign'):
	1726	timezone = datetime.timedelta()
	1727	else:
	1728	sign = 1 if m.group('sign') == '+' else -1
	1729	timezone = datetime.timedelta(
	1730	hours=sign * int(m.group('hours')),
	1731	minutes=sign * int(m.group('minutes')))
	1732	return timezone, date_str
	1733
	1734
	1735	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1736	""" Return a UNIX timestamp from the given date """
	1737
	1738	if date_str is None:
	1739	return None
	1740
	1741	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1742
	1743	if timezone is None:
	1744	timezone, date_str = extract_timezone(date_str)
	1745
	1746	try:
	1747	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	1748	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	1749	return calendar.timegm(dt.timetuple())
	1750	except ValueError:
	1751	pass
	1752
	1753
	1754	def date_formats(day_first=True):
	1755	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1756
	1757
	1758	def unified_strdate(date_str, day_first=True):
	1759	"""Return a string with the date in the format YYYYMMDD"""
	1760
	1761	if date_str is None:
	1762	return None
	1763	upload_date = None
	1764	# Replace commas
	1765	date_str = date_str.replace(',', ' ')
	1766	# Remove AM/PM + timezone
	1767	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1768	_, date_str = extract_timezone(date_str)
	1769
	1770	for expression in date_formats(day_first):
	1771	try:
	1772	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1773	except ValueError:
	1774	pass
	1775	if upload_date is None:
	1776	timetuple = email.utils.parsedate_tz(date_str)
	1777	if timetuple:
	1778	try:
	1779	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1780	except ValueError:
	1781	pass
	1782	if upload_date is not None:
	1783	return compat_str(upload_date)
	1784
	1785
	1786	def unified_timestamp(date_str, day_first=True):
	1787	if date_str is None:
	1788	return None
	1789
	1790	date_str = re.sub(r'[,\|]', '', date_str)
	1791
	1792	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1793	timezone, date_str = extract_timezone(date_str)
	1794
	1795	# Remove AM/PM + timezone
	1796	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1797
	1798	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1799	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1800	if m:
	1801	date_str = date_str[:-len(m.group('tz'))]
	1802
	1803	# Python only supports microseconds, so remove nanoseconds
	1804	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1805	if m:
	1806	date_str = m.group(1)
	1807
	1808	for expression in date_formats(day_first):
	1809	try:
	1810	dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
	1811	return calendar.timegm(dt.timetuple())
	1812	except ValueError:
	1813	pass
	1814	timetuple = email.utils.parsedate_tz(date_str)
	1815	if timetuple:
	1816	return calendar.timegm(timetuple) + pm_delta * 3600
	1817
	1818
	1819	def determine_ext(url, default_ext='unknown_video'):
	1820	if url is None or '.' not in url:
	1821	return default_ext
	1822	guess = url.partition('?')[0].rpartition('.')[2]
	1823	if re.match(r'^[A-Za-z0-9]+$', guess):
	1824	return guess
	1825	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1826	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1827	return guess.rstrip('/')
	1828	else:
	1829	return default_ext
	1830
	1831
	1832	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1833	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1834
	1835
	1836	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1837	"""
	1838	Return a datetime object from a string in the format YYYYMMDD or
	1839	(now\|today\|yesterday\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1840
	1841	format: string date format used to return datetime object from
	1842	precision: round the time portion of a datetime object.
	1843	auto\|microsecond\|second\|minute\|hour\|day.
	1844	auto: round to the unit provided in date_str (if applicable).
	1845	"""
	1846	auto_precision = False
	1847	if precision == 'auto':
	1848	auto_precision = True
	1849	precision = 'microsecond'
	1850	today = datetime_round(datetime.datetime.utcnow(), precision)
	1851	if date_str in ('now', 'today'):
	1852	return today
	1853	if date_str == 'yesterday':
	1854	return today - datetime.timedelta(days=1)
	1855	match = re.match(
	1856	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?',
	1857	date_str)
	1858	if match is not None:
	1859	start_time = datetime_from_str(match.group('start'), precision, format)
	1860	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1861	unit = match.group('unit')
	1862	if unit == 'month' or unit == 'year':
	1863	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1864	unit = 'day'
	1865	else:
	1866	if unit == 'week':
	1867	unit = 'day'
	1868	time *= 7
	1869	delta = datetime.timedelta(**{unit + 's': time})
	1870	new_date = start_time + delta
	1871	if auto_precision:
	1872	return datetime_round(new_date, unit)
	1873	return new_date
	1874
	1875	return datetime_round(datetime.datetime.strptime(date_str, format), precision)
	1876
	1877
	1878	def date_from_str(date_str, format='%Y%m%d', strict=False):
	1879	"""
	1880	Return a datetime object from a string in the format YYYYMMDD or
	1881	(now\|today\|yesterday\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1882
	1883	If "strict", only (now\|today)[+-][0-9](day\|week\|month\|year)(s)? is allowed
	1884
	1885	format: string date format used to return datetime object from
	1886	"""
	1887	if strict and not re.fullmatch(r'\d{8}\|(now\|today)[+-]\d+(day\|week\|month\|year)(s)?', date_str):
	1888	raise ValueError(f'Invalid date format {date_str}')
	1889	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1890
	1891
	1892	def datetime_add_months(dt, months):
	1893	"""Increment/Decrement a datetime object by months."""
	1894	month = dt.month + months - 1
	1895	year = dt.year + month // 12
	1896	month = month % 12 + 1
	1897	day = min(dt.day, calendar.monthrange(year, month)[1])
	1898	return dt.replace(year, month, day)
	1899
	1900
	1901	def datetime_round(dt, precision='day'):
	1902	"""
	1903	Round a datetime object's time to a specific precision
	1904	"""
	1905	if precision == 'microsecond':
	1906	return dt
	1907
	1908	unit_seconds = {
	1909	'day': 86400,
	1910	'hour': 3600,
	1911	'minute': 60,
	1912	'second': 1,
	1913	}
	1914	roundto = lambda x, n: ((x + n / 2) // n) * n
	1915	timestamp = calendar.timegm(dt.timetuple())
	1916	return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
	1917
	1918
	1919	def hyphenate_date(date_str):
	1920	"""
	1921	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1922	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1923	if match is not None:
	1924	return '-'.join(match.groups())
	1925	else:
	1926	return date_str
	1927
	1928
	1929	class DateRange(object):
	1930	"""Represents a time interval between two dates"""
	1931
	1932	def __init__(self, start=None, end=None):
	1933	"""start and end must be strings in the format accepted by date"""
	1934	if start is not None:
	1935	self.start = date_from_str(start, strict=True)
	1936	else:
	1937	self.start = datetime.datetime.min.date()
	1938	if end is not None:
	1939	self.end = date_from_str(end, strict=True)
	1940	else:
	1941	self.end = datetime.datetime.max.date()
	1942	if self.start > self.end:
	1943	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1944
	1945	@classmethod
	1946	def day(cls, day):
	1947	"""Returns a range that only contains the given day"""
	1948	return cls(day, day)
	1949
	1950	def __contains__(self, date):
	1951	"""Check if the date is in the range"""
	1952	if not isinstance(date, datetime.date):
	1953	date = date_from_str(date)
	1954	return self.start <= date <= self.end
	1955
	1956	def __str__(self):
	1957	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	1958
	1959
	1960	def platform_name():
	1961	""" Returns the platform name as a compat_str """
	1962	res = platform.platform()
	1963	if isinstance(res, bytes):
	1964	res = res.decode(preferredencoding())
	1965
	1966	assert isinstance(res, compat_str)
	1967	return res
	1968
	1969
	1970	def get_windows_version():
	1971	''' Get Windows version. None if it's not running on Windows '''
	1972	if compat_os_name == 'nt':
	1973	return version_tuple(platform.win32_ver()[1])
	1974	else:
	1975	return None
	1976
	1977
	1978	def _windows_write_string(s, out):
	1979	""" Returns True if the string was written using special methods,
	1980	False if it has yet to be written out."""
	1981	# Adapted from http://stackoverflow.com/a/3259271/35070
	1982
	1983	import ctypes.wintypes
	1984
	1985	WIN_OUTPUT_IDS = {
	1986	1: -11,
	1987	2: -12,
	1988	}
	1989
	1990	try:
	1991	fileno = out.fileno()
	1992	except AttributeError:
	1993	# If the output stream doesn't have a fileno, it's virtual
	1994	return False
	1995	except io.UnsupportedOperation:
	1996	# Some strange Windows pseudo files?
	1997	return False
	1998	if fileno not in WIN_OUTPUT_IDS:
	1999	return False
	2000
	2001	GetStdHandle = compat_ctypes_WINFUNCTYPE(
	2002	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	2003	('GetStdHandle', ctypes.windll.kernel32))
	2004	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	2005
	2006	WriteConsoleW = compat_ctypes_WINFUNCTYPE(
	2007	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	2008	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	2009	ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
	2010	written = ctypes.wintypes.DWORD(0)
	2011
	2012	GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
	2013	FILE_TYPE_CHAR = 0x0002
	2014	FILE_TYPE_REMOTE = 0x8000
	2015	GetConsoleMode = compat_ctypes_WINFUNCTYPE(
	2016	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	2017	ctypes.POINTER(ctypes.wintypes.DWORD))(
	2018	('GetConsoleMode', ctypes.windll.kernel32))
	2019	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	2020
	2021	def not_a_console(handle):
	2022	if handle == INVALID_HANDLE_VALUE or handle is None:
	2023	return True
	2024	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
	2025	or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	2026
	2027	if not_a_console(h):
	2028	return False
	2029
	2030	def next_nonbmp_pos(s):
	2031	try:
	2032	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	2033	except StopIteration:
	2034	return len(s)
	2035
	2036	while s:
	2037	count = min(next_nonbmp_pos(s), 1024)
	2038
	2039	ret = WriteConsoleW(
	2040	h, s, count if count else 2, ctypes.byref(written), None)
	2041	if ret == 0:
	2042	raise OSError('Failed to write string')
	2043	if not count: # We just wrote a non-BMP character
	2044	assert written.value == 2
	2045	s = s[1:]
	2046	else:
	2047	assert written.value > 0
	2048	s = s[written.value:]
	2049	return True
	2050
	2051
	2052	def write_string(s, out=None, encoding=None):
	2053	if out is None:
	2054	out = sys.stderr
	2055	assert type(s) == compat_str
	2056
	2057	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	2058	if _windows_write_string(s, out):
	2059	return
	2060
	2061	if ('b' in getattr(out, 'mode', '')
	2062	or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	2063	byt = s.encode(encoding or preferredencoding(), 'ignore')
	2064	out.write(byt)
	2065	elif hasattr(out, 'buffer'):
	2066	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	2067	byt = s.encode(enc, 'ignore')
	2068	out.buffer.write(byt)
	2069	else:
	2070	out.write(s)
	2071	out.flush()
	2072
	2073
	2074	def bytes_to_intlist(bs):
	2075	if not bs:
	2076	return []
	2077	if isinstance(bs[0], int): # Python 3
	2078	return list(bs)
	2079	else:
	2080	return [ord(c) for c in bs]
	2081
	2082
	2083	def intlist_to_bytes(xs):
	2084	if not xs:
	2085	return b''
	2086	return compat_struct_pack('%dB' % len(xs), *xs)
	2087
	2088
	2089	# Cross-platform file locking
	2090	if sys.platform == 'win32':
	2091	import ctypes.wintypes
	2092	import msvcrt
	2093
	2094	class OVERLAPPED(ctypes.Structure):
	2095	_fields_ = [
	2096	('Internal', ctypes.wintypes.LPVOID),
	2097	('InternalHigh', ctypes.wintypes.LPVOID),
	2098	('Offset', ctypes.wintypes.DWORD),
	2099	('OffsetHigh', ctypes.wintypes.DWORD),
	2100	('hEvent', ctypes.wintypes.HANDLE),
	2101	]
	2102
	2103	kernel32 = ctypes.windll.kernel32
	2104	LockFileEx = kernel32.LockFileEx
	2105	LockFileEx.argtypes = [
	2106	ctypes.wintypes.HANDLE, # hFile
	2107	ctypes.wintypes.DWORD, # dwFlags
	2108	ctypes.wintypes.DWORD, # dwReserved
	2109	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2110	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2111	ctypes.POINTER(OVERLAPPED) # Overlapped
	2112	]
	2113	LockFileEx.restype = ctypes.wintypes.BOOL
	2114	UnlockFileEx = kernel32.UnlockFileEx
	2115	UnlockFileEx.argtypes = [
	2116	ctypes.wintypes.HANDLE, # hFile
	2117	ctypes.wintypes.DWORD, # dwReserved
	2118	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2119	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2120	ctypes.POINTER(OVERLAPPED) # Overlapped
	2121	]
	2122	UnlockFileEx.restype = ctypes.wintypes.BOOL
	2123	whole_low = 0xffffffff
	2124	whole_high = 0x7fffffff
	2125
	2126	def _lock_file(f, exclusive, block):
	2127	overlapped = OVERLAPPED()
	2128	overlapped.Offset = 0
	2129	overlapped.OffsetHigh = 0
	2130	overlapped.hEvent = 0
	2131	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	2132
	2133	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	2134	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	2135	0, whole_low, whole_high, f._lock_file_overlapped_p):
	2136	raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
	2137
	2138	def _unlock_file(f):
	2139	assert f._lock_file_overlapped_p
	2140	handle = msvcrt.get_osfhandle(f.fileno())
	2141	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
	2142	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	2143
	2144	else:
	2145	try:
	2146	import fcntl
	2147
	2148	def _lock_file(f, exclusive, block):
	2149	try:
	2150	fcntl.flock(f,
	2151	fcntl.LOCK_SH if not exclusive
	2152	else fcntl.LOCK_EX if block
	2153	else fcntl.LOCK_EX \| fcntl.LOCK_NB)
	2154	except BlockingIOError:
	2155	raise
	2156	except OSError: # AOSP does not have flock()
	2157	fcntl.lockf(f,
	2158	fcntl.LOCK_SH if not exclusive
	2159	else fcntl.LOCK_EX if block
	2160	else fcntl.LOCK_EX \| fcntl.LOCK_NB)
	2161
	2162	def _unlock_file(f):
	2163	try:
	2164	fcntl.flock(f, fcntl.LOCK_UN)
	2165	except OSError:
	2166	fcntl.lockf(f, fcntl.LOCK_UN)
	2167
	2168	except ImportError:
	2169	UNSUPPORTED_MSG = 'file locking is not supported on this platform'
	2170
	2171	def _lock_file(f, exclusive, block):
	2172	raise IOError(UNSUPPORTED_MSG)
	2173
	2174	def _unlock_file(f):
	2175	raise IOError(UNSUPPORTED_MSG)
	2176
	2177
	2178	class locked_file(object):
	2179	_closed = False
	2180
	2181	def __init__(self, filename, mode, block=True, encoding=None):
	2182	assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb']
	2183	self.f = io.open(filename, mode, encoding=encoding)
	2184	self.mode = mode
	2185	self.block = block
	2186
	2187	def __enter__(self):
	2188	exclusive = 'r' not in self.mode
	2189	try:
	2190	_lock_file(self.f, exclusive, self.block)
	2191	except IOError:
	2192	self.f.close()
	2193	raise
	2194	return self
	2195
	2196	def __exit__(self, etype, value, traceback):
	2197	try:
	2198	if not self._closed:
	2199	_unlock_file(self.f)
	2200	finally:
	2201	self.f.close()
	2202	self._closed = True
	2203
	2204	def __iter__(self):
	2205	return iter(self.f)
	2206
	2207	def write(self, *args):
	2208	return self.f.write(*args)
	2209
	2210	def read(self, *args):
	2211	return self.f.read(*args)
	2212
	2213	def flush(self):
	2214	self.f.flush()
	2215
	2216	def open(self):
	2217	return self.__enter__()
	2218
	2219	def close(self, *args):
	2220	self.__exit__(self, *args, value=False, traceback=False)
	2221
	2222
	2223	def get_filesystem_encoding():
	2224	encoding = sys.getfilesystemencoding()
	2225	return encoding if encoding is not None else 'utf-8'
	2226
	2227
	2228	def shell_quote(args):
	2229	quoted_args = []
	2230	encoding = get_filesystem_encoding()
	2231	for a in args:
	2232	if isinstance(a, bytes):
	2233	# We may get a filename encoded with 'encodeFilename'
	2234	a = a.decode(encoding)
	2235	quoted_args.append(compat_shlex_quote(a))
	2236	return ' '.join(quoted_args)
	2237
	2238
	2239	def smuggle_url(url, data):
	2240	""" Pass additional data in a URL for internal use. """
	2241
	2242	url, idata = unsmuggle_url(url, {})
	2243	data.update(idata)
	2244	sdata = compat_urllib_parse_urlencode(
	2245	{'__youtubedl_smuggle': json.dumps(data)})
	2246	return url + '#' + sdata
	2247
	2248
	2249	def unsmuggle_url(smug_url, default=None):
	2250	if '#__youtubedl_smuggle' not in smug_url:
	2251	return smug_url, default
	2252	url, _, sdata = smug_url.rpartition('#')
	2253	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	2254	data = json.loads(jsond)
	2255	return url, data
	2256
	2257
	2258	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	2259	""" Formats numbers with decimal sufixes like K, M, etc """
	2260	num, factor = float_or_none(num), float(factor)
	2261	if num is None or num < 0:
	2262	return None
	2263	exponent = 0 if num == 0 else int(math.log(num, factor))
	2264	suffix = ['', *'kMGTPEZY'][exponent]
	2265	if factor == 1024:
	2266	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	2267	converted = num / (factor ** exponent)
	2268	return fmt % (converted, suffix)
	2269
	2270
	2271	def format_bytes(bytes):
	2272	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	2273
	2274
	2275	def lookup_unit_table(unit_table, s):
	2276	units_re = '\|'.join(re.escape(u) for u in unit_table)
	2277	m = re.match(
	2278	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)\b' % units_re, s)
	2279	if not m:
	2280	return None
	2281	num_str = m.group('num').replace(',', '.')
	2282	mult = unit_table[m.group('unit')]
	2283	return int(float(num_str) * mult)
	2284
	2285
	2286	def parse_filesize(s):
	2287	if s is None:
	2288	return None
	2289
	2290	# The lower-case forms are of course incorrect and unofficial,
	2291	# but we support those too
	2292	_UNIT_TABLE = {
	2293	'B': 1,
	2294	'b': 1,
	2295	'bytes': 1,
	2296	'KiB': 1024,
	2297	'KB': 1000,
	2298	'kB': 1024,
	2299	'Kb': 1000,
	2300	'kb': 1000,
	2301	'kilobytes': 1000,
	2302	'kibibytes': 1024,
	2303	'MiB': 1024 ** 2,
	2304	'MB': 1000 ** 2,
	2305	'mB': 1024 ** 2,
	2306	'Mb': 1000 ** 2,
	2307	'mb': 1000 ** 2,
	2308	'megabytes': 1000 ** 2,
	2309	'mebibytes': 1024 ** 2,
	2310	'GiB': 1024 ** 3,
	2311	'GB': 1000 ** 3,
	2312	'gB': 1024 ** 3,
	2313	'Gb': 1000 ** 3,
	2314	'gb': 1000 ** 3,
	2315	'gigabytes': 1000 ** 3,
	2316	'gibibytes': 1024 ** 3,
	2317	'TiB': 1024 ** 4,
	2318	'TB': 1000 ** 4,
	2319	'tB': 1024 ** 4,
	2320	'Tb': 1000 ** 4,
	2321	'tb': 1000 ** 4,
	2322	'terabytes': 1000 ** 4,
	2323	'tebibytes': 1024 ** 4,
	2324	'PiB': 1024 ** 5,
	2325	'PB': 1000 ** 5,
	2326	'pB': 1024 ** 5,
	2327	'Pb': 1000 ** 5,
	2328	'pb': 1000 ** 5,
	2329	'petabytes': 1000 ** 5,
	2330	'pebibytes': 1024 ** 5,
	2331	'EiB': 1024 ** 6,
	2332	'EB': 1000 ** 6,
	2333	'eB': 1024 ** 6,
	2334	'Eb': 1000 ** 6,
	2335	'eb': 1000 ** 6,
	2336	'exabytes': 1000 ** 6,
	2337	'exbibytes': 1024 ** 6,
	2338	'ZiB': 1024 ** 7,
	2339	'ZB': 1000 ** 7,
	2340	'zB': 1024 ** 7,
	2341	'Zb': 1000 ** 7,
	2342	'zb': 1000 ** 7,
	2343	'zettabytes': 1000 ** 7,
	2344	'zebibytes': 1024 ** 7,
	2345	'YiB': 1024 ** 8,
	2346	'YB': 1000 ** 8,
	2347	'yB': 1024 ** 8,
	2348	'Yb': 1000 ** 8,
	2349	'yb': 1000 ** 8,
	2350	'yottabytes': 1000 ** 8,
	2351	'yobibytes': 1024 ** 8,
	2352	}
	2353
	2354	return lookup_unit_table(_UNIT_TABLE, s)
	2355
	2356
	2357	def parse_count(s):
	2358	if s is None:
	2359	return None
	2360
	2361	s = re.sub(r'^[^\d]+\s', '', s).strip()
	2362
	2363	if re.match(r'^[\d,.]+$', s):
	2364	return str_to_int(s)
	2365
	2366	_UNIT_TABLE = {
	2367	'k': 1000,
	2368	'K': 1000,
	2369	'm': 1000 ** 2,
	2370	'M': 1000 ** 2,
	2371	'kk': 1000 ** 2,
	2372	'KK': 1000 ** 2,
	2373	'b': 1000 ** 3,
	2374	'B': 1000 ** 3,
	2375	}
	2376
	2377	ret = lookup_unit_table(_UNIT_TABLE, s)
	2378	if ret is not None:
	2379	return ret
	2380
	2381	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	2382	if mobj:
	2383	return str_to_int(mobj.group(1))
	2384
	2385
	2386	def parse_resolution(s):
	2387	if s is None:
	2388	return {}
	2389
	2390	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	2391	if mobj:
	2392	return {
	2393	'width': int(mobj.group('w')),
	2394	'height': int(mobj.group('h')),
	2395	}
	2396
	2397	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	2398	if mobj:
	2399	return {'height': int(mobj.group(1))}
	2400
	2401	mobj = re.search(r'\b([48])[kK]\b', s)
	2402	if mobj:
	2403	return {'height': int(mobj.group(1)) * 540}
	2404
	2405	return {}
	2406
	2407
	2408	def parse_bitrate(s):
	2409	if not isinstance(s, compat_str):
	2410	return
	2411	mobj = re.search(r'\b(\d+)\s*kbps', s)
	2412	if mobj:
	2413	return int(mobj.group(1))
	2414
	2415
	2416	def month_by_name(name, lang='en'):
	2417	""" Return the number of a month by (locale-independently) English name """
	2418
	2419	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	2420
	2421	try:
	2422	return month_names.index(name) + 1
	2423	except ValueError:
	2424	return None
	2425
	2426
	2427	def month_by_abbreviation(abbrev):
	2428	""" Return the number of a month by (locale-independently) English
	2429	abbreviations """
	2430
	2431	try:
	2432	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	2433	except ValueError:
	2434	return None
	2435
	2436
	2437	def fix_xml_ampersands(xml_str):
	2438	"""Replace all the '&' by '&' in XML"""
	2439	return re.sub(
	2440	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	2441	'&',
	2442	xml_str)
	2443
	2444
	2445	def setproctitle(title):
	2446	assert isinstance(title, compat_str)
	2447
	2448	# ctypes in Jython is not complete
	2449	# http://bugs.jython.org/issue2148
	2450	if sys.platform.startswith('java'):
	2451	return
	2452
	2453	try:
	2454	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	2455	except OSError:
	2456	return
	2457	except TypeError:
	2458	# LoadLibrary in Windows Python 2.7.13 only expects
	2459	# a bytestring, but since unicode_literals turns
	2460	# every string into a unicode string, it fails.
	2461	return
	2462	title_bytes = title.encode('utf-8')
	2463	buf = ctypes.create_string_buffer(len(title_bytes))
	2464	buf.value = title_bytes
	2465	try:
	2466	libc.prctl(15, buf, 0, 0, 0)
	2467	except AttributeError:
	2468	return # Strange libc, just skip this
	2469
	2470
	2471	def remove_start(s, start):
	2472	return s[len(start):] if s is not None and s.startswith(start) else s
	2473
	2474
	2475	def remove_end(s, end):
	2476	return s[:-len(end)] if s is not None and s.endswith(end) else s
	2477
	2478
	2479	def remove_quotes(s):
	2480	if s is None or len(s) < 2:
	2481	return s
	2482	for quote in ('"', "'", ):
	2483	if s[0] == quote and s[-1] == quote:
	2484	return s[1:-1]
	2485	return s
	2486
	2487
	2488	def get_domain(url):
	2489	domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
	2490	return domain.group('domain') if domain else None
	2491
	2492
	2493	def url_basename(url):
	2494	path = compat_urlparse.urlparse(url).path
	2495	return path.strip('/').split('/')[-1]
	2496
	2497
	2498	def base_url(url):
	2499	return re.match(r'https?://[^?#&]+/', url).group()
	2500
	2501
	2502	def urljoin(base, path):
	2503	if isinstance(path, bytes):
	2504	path = path.decode('utf-8')
	2505	if not isinstance(path, compat_str) or not path:
	2506	return None
	2507	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	2508	return path
	2509	if isinstance(base, bytes):
	2510	base = base.decode('utf-8')
	2511	if not isinstance(base, compat_str) or not re.match(
	2512	r'^(?:https?:)?//', base):
	2513	return None
	2514	return compat_urlparse.urljoin(base, path)
	2515
	2516
	2517	class HEADRequest(compat_urllib_request.Request):
	2518	def get_method(self):
	2519	return 'HEAD'
	2520
	2521
	2522	class PUTRequest(compat_urllib_request.Request):
	2523	def get_method(self):
	2524	return 'PUT'
	2525
	2526
	2527	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	2528	if get_attr and v is not None:
	2529	v = getattr(v, get_attr, None)
	2530	try:
	2531	return int(v) * invscale // scale
	2532	except (ValueError, TypeError, OverflowError):
	2533	return default
	2534
	2535
	2536	def str_or_none(v, default=None):
	2537	return default if v is None else compat_str(v)
	2538
	2539
	2540	def str_to_int(int_str):
	2541	""" A more relaxed version of int_or_none """
	2542	if isinstance(int_str, compat_integer_types):
	2543	return int_str
	2544	elif isinstance(int_str, compat_str):
	2545	int_str = re.sub(r'[,\.\+]', '', int_str)
	2546	return int_or_none(int_str)
	2547
	2548
	2549	def float_or_none(v, scale=1, invscale=1, default=None):
	2550	if v is None:
	2551	return default
	2552	try:
	2553	return float(v) * invscale / scale
	2554	except (ValueError, TypeError):
	2555	return default
	2556
	2557
	2558	def bool_or_none(v, default=None):
	2559	return v if isinstance(v, bool) else default
	2560
	2561
	2562	def strip_or_none(v, default=None):
	2563	return v.strip() if isinstance(v, compat_str) else default
	2564
	2565
	2566	def url_or_none(url):
	2567	if not url or not isinstance(url, compat_str):
	2568	return None
	2569	url = url.strip()
	2570	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2571
	2572
	2573	def request_to_url(req):
	2574	if isinstance(req, compat_urllib_request.Request):
	2575	return req.get_full_url()
	2576	else:
	2577	return req
	2578
	2579
	2580	def strftime_or_none(timestamp, date_format, default=None):
	2581	datetime_object = None
	2582	try:
	2583	if isinstance(timestamp, compat_numeric_types): # unix timestamp
	2584	datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
	2585	elif isinstance(timestamp, compat_str): # assume YYYYMMDD
	2586	datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
	2587	return datetime_object.strftime(date_format)
	2588	except (ValueError, TypeError, AttributeError):
	2589	return default
	2590
	2591
	2592	def parse_duration(s):
	2593	if not isinstance(s, compat_basestring):
	2594	return None
	2595	s = s.strip()
	2596	if not s:
	2597	return None
	2598
	2599	days, hours, mins, secs, ms = [None] * 5
	2600	m = re.match(r'''(?x)
	2601	(?P<before_secs>
	2602	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2603	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2604	(?P<ms>[.:][0-9]+)?Z?$
	2605	''', s)
	2606	if m:
	2607	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2608	else:
	2609	m = re.match(
	2610	r'''(?ix)(?:P?
	2611	(?:
	2612	[0-9]+\sy(?:ears?)?\s
	2613	)?
	2614	(?:
	2615	[0-9]+\sm(?:onths?)?\s
	2616	)?
	2617	(?:
	2618	[0-9]+\sw(?:eeks?)?\s
	2619	)?
	2620	(?:
	2621	(?P<days>[0-9]+)\sd(?:ays?)?\s
	2622	)?
	2623	T)?
	2624	(?:
	2625	(?P<hours>[0-9]+)\sh(?:ours?)?\s
	2626	)?
	2627	(?:
	2628	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?\s
	2629	)?
	2630	(?:
	2631	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2632	)?Z?$''', s)
	2633	if m:
	2634	days, hours, mins, secs, ms = m.groups()
	2635	else:
	2636	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2637	if m:
	2638	hours, mins = m.groups()
	2639	else:
	2640	return None
	2641
	2642	duration = 0
	2643	if secs:
	2644	duration += float(secs)
	2645	if mins:
	2646	duration += float(mins) * 60
	2647	if hours:
	2648	duration += float(hours) * 60 * 60
	2649	if days:
	2650	duration += float(days) * 24 * 60 * 60
	2651	if ms:
	2652	duration += float(ms.replace(':', '.'))
	2653	return duration
	2654
	2655
	2656	def prepend_extension(filename, ext, expected_real_ext=None):
	2657	name, real_ext = os.path.splitext(filename)
	2658	return (
	2659	'{0}.{1}{2}'.format(name, ext, real_ext)
	2660	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2661	else '{0}.{1}'.format(filename, ext))
	2662
	2663
	2664	def replace_extension(filename, ext, expected_real_ext=None):
	2665	name, real_ext = os.path.splitext(filename)
	2666	return '{0}.{1}'.format(
	2667	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2668	ext)
	2669
	2670
	2671	def check_executable(exe, args=[]):
	2672	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2673	args can be a list of arguments for a short output (like -version) """
	2674	try:
	2675	Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
	2676	except OSError:
	2677	return False
	2678	return exe
	2679
	2680
	2681	def _get_exe_version_output(exe, args):
	2682	try:
	2683	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2684	# SIGTTOU if yt-dlp is run in the background.
	2685	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2686	out, _ = Popen(
	2687	[encodeArgument(exe)] + args, stdin=subprocess.PIPE,
	2688	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
	2689	except OSError:
	2690	return False
	2691	if isinstance(out, bytes): # Python 2.x
	2692	out = out.decode('ascii', 'ignore')
	2693	return out
	2694
	2695
	2696	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2697	assert isinstance(output, compat_str)
	2698	if version_re is None:
	2699	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2700	m = re.search(version_re, output)
	2701	if m:
	2702	return m.group(1)
	2703	else:
	2704	return unrecognized
	2705
	2706
	2707	def get_exe_version(exe, args=['--version'],
	2708	version_re=None, unrecognized='present'):
	2709	""" Returns the version of the specified executable,
	2710	or False if the executable is not present """
	2711	out = _get_exe_version_output(exe, args)
	2712	return detect_exe_version(out, version_re, unrecognized) if out else False
	2713
	2714
	2715	class LazyList(collections.abc.Sequence):
	2716	''' Lazy immutable list from an iterable
	2717	Note that slices of a LazyList are lists and not LazyList'''
	2718
	2719	class IndexError(IndexError):
	2720	pass
	2721
	2722	def __init__(self, iterable, *, reverse=False, _cache=None):
	2723	self.__iterable = iter(iterable)
	2724	self.__cache = [] if _cache is None else _cache
	2725	self.__reversed = reverse
	2726
	2727	def __iter__(self):
	2728	if self.__reversed:
	2729	# We need to consume the entire iterable to iterate in reverse
	2730	yield from self.exhaust()
	2731	return
	2732	yield from self.__cache
	2733	for item in self.__iterable:
	2734	self.__cache.append(item)
	2735	yield item
	2736
	2737	def __exhaust(self):
	2738	self.__cache.extend(self.__iterable)
	2739	# Discard the emptied iterable to make it pickle-able
	2740	self.__iterable = []
	2741	return self.__cache
	2742
	2743	def exhaust(self):
	2744	''' Evaluate the entire iterable '''
	2745	return self.__exhaust()[::-1 if self.__reversed else 1]
	2746
	2747	@staticmethod
	2748	def __reverse_index(x):
	2749	return None if x is None else -(x + 1)
	2750
	2751	def __getitem__(self, idx):
	2752	if isinstance(idx, slice):
	2753	if self.__reversed:
	2754	idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
	2755	start, stop, step = idx.start, idx.stop, idx.step or 1
	2756	elif isinstance(idx, int):
	2757	if self.__reversed:
	2758	idx = self.__reverse_index(idx)
	2759	start, stop, step = idx, idx, 0
	2760	else:
	2761	raise TypeError('indices must be integers or slices')
	2762	if ((start or 0) < 0 or (stop or 0) < 0
	2763	or (start is None and step < 0)
	2764	or (stop is None and step > 0)):
	2765	# We need to consume the entire iterable to be able to slice from the end
	2766	# Obviously, never use this with infinite iterables
	2767	self.__exhaust()
	2768	try:
	2769	return self.__cache[idx]
	2770	except IndexError as e:
	2771	raise self.IndexError(e) from e
	2772	n = max(start or 0, stop or 0) - len(self.__cache) + 1
	2773	if n > 0:
	2774	self.__cache.extend(itertools.islice(self.__iterable, n))
	2775	try:
	2776	return self.__cache[idx]
	2777	except IndexError as e:
	2778	raise self.IndexError(e) from e
	2779
	2780	def __bool__(self):
	2781	try:
	2782	self[-1] if self.__reversed else self[0]
	2783	except self.IndexError:
	2784	return False
	2785	return True
	2786
	2787	def __len__(self):
	2788	self.__exhaust()
	2789	return len(self.__cache)
	2790
	2791	def __reversed__(self):
	2792	return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
	2793
	2794	def __copy__(self):
	2795	return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
	2796
	2797	def __repr__(self):
	2798	# repr and str should mimic a list. So we exhaust the iterable
	2799	return repr(self.exhaust())
	2800
	2801	def __str__(self):
	2802	return repr(self.exhaust())
	2803
	2804
	2805	class PagedList:
	2806
	2807	class IndexError(IndexError):
	2808	pass
	2809
	2810	def __len__(self):
	2811	# This is only useful for tests
	2812	return len(self.getslice())
	2813
	2814	def __init__(self, pagefunc, pagesize, use_cache=True):
	2815	self._pagefunc = pagefunc
	2816	self._pagesize = pagesize
	2817	self._pagecount = float('inf')
	2818	self._use_cache = use_cache
	2819	self._cache = {}
	2820
	2821	def getpage(self, pagenum):
	2822	page_results = self._cache.get(pagenum)
	2823	if page_results is None:
	2824	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
	2825	if self._use_cache:
	2826	self._cache[pagenum] = page_results
	2827	return page_results
	2828
	2829	def getslice(self, start=0, end=None):
	2830	return list(self._getslice(start, end))
	2831
	2832	def _getslice(self, start, end):
	2833	raise NotImplementedError('This method must be implemented by subclasses')
	2834
	2835	def __getitem__(self, idx):
	2836	assert self._use_cache, 'Indexing PagedList requires cache'
	2837	if not isinstance(idx, int) or idx < 0:
	2838	raise TypeError('indices must be non-negative integers')
	2839	entries = self.getslice(idx, idx + 1)
	2840	if not entries:
	2841	raise self.IndexError()
	2842	return entries[0]
	2843
	2844
	2845	class OnDemandPagedList(PagedList):
	2846	def _getslice(self, start, end):
	2847	for pagenum in itertools.count(start // self._pagesize):
	2848	firstid = pagenum * self._pagesize
	2849	nextfirstid = pagenum * self._pagesize + self._pagesize
	2850	if start >= nextfirstid:
	2851	continue
	2852
	2853	startv = (
	2854	start % self._pagesize
	2855	if firstid <= start < nextfirstid
	2856	else 0)
	2857	endv = (
	2858	((end - 1) % self._pagesize) + 1
	2859	if (end is not None and firstid <= end <= nextfirstid)
	2860	else None)
	2861
	2862	try:
	2863	page_results = self.getpage(pagenum)
	2864	except Exception:
	2865	self._pagecount = pagenum - 1
	2866	raise
	2867	if startv != 0 or endv is not None:
	2868	page_results = page_results[startv:endv]
	2869	yield from page_results
	2870
	2871	# A little optimization - if current page is not "full", ie. does
	2872	# not contain page_size videos then we can assume that this page
	2873	# is the last one - there are no more ids on further pages -
	2874	# i.e. no need to query again.
	2875	if len(page_results) + startv < self._pagesize:
	2876	break
	2877
	2878	# If we got the whole page, but the next page is not interesting,
	2879	# break out early as well
	2880	if end == nextfirstid:
	2881	break
	2882
	2883
	2884	class InAdvancePagedList(PagedList):
	2885	def __init__(self, pagefunc, pagecount, pagesize):
	2886	PagedList.__init__(self, pagefunc, pagesize, True)
	2887	self._pagecount = pagecount
	2888
	2889	def _getslice(self, start, end):
	2890	start_page = start // self._pagesize
	2891	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2892	skip_elems = start - start_page * self._pagesize
	2893	only_more = None if end is None else end - start
	2894	for pagenum in range(start_page, end_page):
	2895	page_results = self.getpage(pagenum)
	2896	if skip_elems:
	2897	page_results = page_results[skip_elems:]
	2898	skip_elems = None
	2899	if only_more is not None:
	2900	if len(page_results) < only_more:
	2901	only_more -= len(page_results)
	2902	else:
	2903	yield from page_results[:only_more]
	2904	break
	2905	yield from page_results
	2906
	2907
	2908	def uppercase_escape(s):
	2909	unicode_escape = codecs.getdecoder('unicode_escape')
	2910	return re.sub(
	2911	r'\\U[0-9a-fA-F]{8}',
	2912	lambda m: unicode_escape(m.group(0))[0],
	2913	s)
	2914
	2915
	2916	def lowercase_escape(s):
	2917	unicode_escape = codecs.getdecoder('unicode_escape')
	2918	return re.sub(
	2919	r'\\u[0-9a-fA-F]{4}',
	2920	lambda m: unicode_escape(m.group(0))[0],
	2921	s)
	2922
	2923
	2924	def escape_rfc3986(s):
	2925	"""Escape non-ASCII characters as suggested by RFC 3986"""
	2926	if sys.version_info < (3, 0) and isinstance(s, compat_str):
	2927	s = s.encode('utf-8')
	2928	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	2929
	2930
	2931	def escape_url(url):
	2932	"""Escape URL as suggested by RFC 3986"""
	2933	url_parsed = compat_urllib_parse_urlparse(url)
	2934	return url_parsed._replace(
	2935	netloc=url_parsed.netloc.encode('idna').decode('ascii'),
	2936	path=escape_rfc3986(url_parsed.path),
	2937	params=escape_rfc3986(url_parsed.params),
	2938	query=escape_rfc3986(url_parsed.query),
	2939	fragment=escape_rfc3986(url_parsed.fragment)
	2940	).geturl()
	2941
	2942
	2943	def parse_qs(url):
	2944	return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
	2945
	2946
	2947	def read_batch_urls(batch_fd):
	2948	def fixup(url):
	2949	if not isinstance(url, compat_str):
	2950	url = url.decode('utf-8', 'replace')
	2951	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2952	for bom in BOM_UTF8:
	2953	if url.startswith(bom):
	2954	url = url[len(bom):]
	2955	url = url.lstrip()
	2956	if not url or url.startswith(('#', ';', ']')):
	2957	return False
	2958	# "#" cannot be stripped out since it is part of the URI
	2959	# However, it can be safely stipped out if follwing a whitespace
	2960	return re.split(r'\s#', url, 1)[0].rstrip()
	2961
	2962	with contextlib.closing(batch_fd) as fd:
	2963	return [url for url in map(fixup, fd) if url]
	2964
	2965
	2966	def urlencode_postdata(args, *kargs):
	2967	return compat_urllib_parse_urlencode(args, *kargs).encode('ascii')
	2968
	2969
	2970	def update_url_query(url, query):
	2971	if not query:
	2972	return url
	2973	parsed_url = compat_urlparse.urlparse(url)
	2974	qs = compat_parse_qs(parsed_url.query)
	2975	qs.update(query)
	2976	return compat_urlparse.urlunparse(parsed_url._replace(
	2977	query=compat_urllib_parse_urlencode(qs, True)))
	2978
	2979
	2980	def update_Request(req, url=None, data=None, headers={}, query={}):
	2981	req_headers = req.headers.copy()
	2982	req_headers.update(headers)
	2983	req_data = data or req.data
	2984	req_url = update_url_query(url or req.get_full_url(), query)
	2985	req_get_method = req.get_method()
	2986	if req_get_method == 'HEAD':
	2987	req_type = HEADRequest
	2988	elif req_get_method == 'PUT':
	2989	req_type = PUTRequest
	2990	else:
	2991	req_type = compat_urllib_request.Request
	2992	new_req = req_type(
	2993	req_url, data=req_data, headers=req_headers,
	2994	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	2995	if hasattr(req, 'timeout'):
	2996	new_req.timeout = req.timeout
	2997	return new_req
	2998
	2999
	3000	def _multipart_encode_impl(data, boundary):
	3001	content_type = 'multipart/form-data; boundary=%s' % boundary
	3002
	3003	out = b''
	3004	for k, v in data.items():
	3005	out += b'--' + boundary.encode('ascii') + b'\r\n'
	3006	if isinstance(k, compat_str):
	3007	k = k.encode('utf-8')
	3008	if isinstance(v, compat_str):
	3009	v = v.encode('utf-8')
	3010	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	3011	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	3012	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	3013	if boundary.encode('ascii') in content:
	3014	raise ValueError('Boundary overlaps with data')
	3015	out += content
	3016
	3017	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	3018
	3019	return out, content_type
	3020
	3021
	3022	def multipart_encode(data, boundary=None):
	3023	'''
	3024	Encode a dict to RFC 7578-compliant form-data
	3025
	3026	data:
	3027	A dict where keys and values can be either Unicode or bytes-like
	3028	objects.
	3029	boundary:
	3030	If specified a Unicode object, it's used as the boundary. Otherwise
	3031	a random boundary is generated.
	3032
	3033	Reference: https://tools.ietf.org/html/rfc7578
	3034	'''
	3035	has_specified_boundary = boundary is not None
	3036
	3037	while True:
	3038	if boundary is None:
	3039	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	3040
	3041	try:
	3042	out, content_type = _multipart_encode_impl(data, boundary)
	3043	break
	3044	except ValueError:
	3045	if has_specified_boundary:
	3046	raise
	3047	boundary = None
	3048
	3049	return out, content_type
	3050
	3051
	3052	def dict_get(d, key_or_keys, default=None, skip_false_values=True):
	3053	if isinstance(key_or_keys, (list, tuple)):
	3054	for key in key_or_keys:
	3055	if key not in d or d[key] is None or skip_false_values and not d[key]:
	3056	continue
	3057	return d[key]
	3058	return default
	3059	return d.get(key_or_keys, default)
	3060
	3061
	3062	def try_get(src, getter, expected_type=None):
	3063	for get in variadic(getter):
	3064	try:
	3065	v = get(src)
	3066	except (AttributeError, KeyError, TypeError, IndexError):
	3067	pass
	3068	else:
	3069	if expected_type is None or isinstance(v, expected_type):
	3070	return v
	3071
	3072
	3073	def merge_dicts(*dicts):
	3074	merged = {}
	3075	for a_dict in dicts:
	3076	for k, v in a_dict.items():
	3077	if v is None:
	3078	continue
	3079	if (k not in merged
	3080	or (isinstance(v, compat_str) and v
	3081	and isinstance(merged[k], compat_str)
	3082	and not merged[k])):
	3083	merged[k] = v
	3084	return merged
	3085
	3086
	3087	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	3088	return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
	3089
	3090
	3091	US_RATINGS = {
	3092	'G': 0,
	3093	'PG': 10,
	3094	'PG-13': 13,
	3095	'R': 16,
	3096	'NC': 18,
	3097	}
	3098
	3099
	3100	TV_PARENTAL_GUIDELINES = {
	3101	'TV-Y': 0,
	3102	'TV-Y7': 7,
	3103	'TV-G': 0,
	3104	'TV-PG': 0,
	3105	'TV-14': 14,
	3106	'TV-MA': 17,
	3107	}
	3108
	3109
	3110	def parse_age_limit(s):
	3111	if type(s) == int:
	3112	return s if 0 <= s <= 21 else None
	3113	if not isinstance(s, compat_basestring):
	3114	return None
	3115	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	3116	if m:
	3117	return int(m.group('age'))
	3118	s = s.upper()
	3119	if s in US_RATINGS:
	3120	return US_RATINGS[s]
	3121	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	3122	if m:
	3123	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	3124	return None
	3125
	3126
	3127	def strip_jsonp(code):
	3128	return re.sub(
	3129	r'''(?sx)^
	3130	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	3131	(?:\s&&\s(?P=func_name))?
	3132	\s$\s(?P<callback_data>.*)$;?
	3133	\s?(?://[^\n])*$''',
	3134	r'\g<callback_data>', code)
	3135
	3136
	3137	def js_to_json(code, vars={}):
	3138	# vars is a dict of var, val pairs to substitute
	3139	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	3140	SKIP_RE = r'\s(?:{comment})?\s'.format(comment=COMMENT_RE)
	3141	INTEGER_TABLE = (
	3142	(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
	3143	(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
	3144	)
	3145
	3146	def fix_kv(m):
	3147	v = m.group(0)
	3148	if v in ('true', 'false', 'null'):
	3149	return v
	3150	elif v in ('undefined', 'void 0'):
	3151	return 'null'
	3152	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	3153	return ""
	3154
	3155	if v[0] in ("'", '"'):
	3156	v = re.sub(r'(?s)\\.\|"', lambda m: {
	3157	'"': '\\"',
	3158	"\\'": "'",
	3159	'\\\n': '',
	3160	'\\x': '\\u00',
	3161	}.get(m.group(0), m.group(0)), v[1:-1])
	3162	else:
	3163	for regex, base in INTEGER_TABLE:
	3164	im = re.match(regex, v)
	3165	if im:
	3166	i = int(im.group(1), base)
	3167	return '"%d":' % i if v.endswith(':') else '%d' % i
	3168
	3169	if v in vars:
	3170	return vars[v]
	3171
	3172	return '"%s"' % v
	3173
	3174	code = re.sub(r'new Date$(".+")$', r'\g<1>', code)
	3175
	3176	return re.sub(r'''(?sx)
	3177	"(?:[^"\\](?:\\\\\|\\['"nurtbfx/\n]))[^"\\]*"\|
	3178	'(?:[^'\\](?:\\\\\|\\['"nurtbfx/\n]))[^'\\]*'\|
	3179	{comment}\|,(?={skip}[\]}}])\|
	3180	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	3181	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{skip}:)?\|
	3182	[0-9]+(?={skip}:)\|
	3183	!+
	3184	'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
	3185
	3186
	3187	def qualities(quality_ids):
	3188	""" Get a numeric quality value out of a list of possible values """
	3189	def q(qid):
	3190	try:
	3191	return quality_ids.index(qid)
	3192	except ValueError:
	3193	return -1
	3194	return q
	3195
	3196
	3197	POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
	3198
	3199
	3200	DEFAULT_OUTTMPL = {
	3201	'default': '%(title)s [%(id)s].%(ext)s',
	3202	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	3203	}
	3204	OUTTMPL_TYPES = {
	3205	'chapter': None,
	3206	'subtitle': None,
	3207	'thumbnail': None,
	3208	'description': 'description',
	3209	'annotation': 'annotations.xml',
	3210	'infojson': 'info.json',
	3211	'link': None,
	3212	'pl_video': None,
	3213	'pl_thumbnail': None,
	3214	'pl_description': 'description',
	3215	'pl_infojson': 'info.json',
	3216	}
	3217
	3218	# As of [1] format syntax is:
	3219	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	3220	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	3221	STR_FORMAT_RE_TMPL = r'''(?x)
	3222	(?<!%)(?P<prefix>(?:%%)*)
	3223	%
	3224	(?P<has_key>$(?P<key>{0})$)?
	3225	(?P<format>
	3226	(?P<conversion>[#0\-+ ]+)?
	3227	(?P<min_width>\d+)?
	3228	(?P<precision>\.\d+)?
	3229	(?P<len_mod>[hlL])? # unused in python
	3230	{1} # conversion type
	3231	)
	3232	'''
	3233
	3234
	3235	STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
	3236
	3237
	3238	def limit_length(s, length):
	3239	""" Add ellipses to overly long strings """
	3240	if s is None:
	3241	return None
	3242	ELLIPSES = '...'
	3243	if len(s) > length:
	3244	return s[:length - len(ELLIPSES)] + ELLIPSES
	3245	return s
	3246
	3247
	3248	def version_tuple(v):
	3249	return tuple(int(e) for e in re.split(r'[-.]', v))
	3250
	3251
	3252	def is_outdated_version(version, limit, assume_new=True):
	3253	if not version:
	3254	return not assume_new
	3255	try:
	3256	return version_tuple(version) < version_tuple(limit)
	3257	except ValueError:
	3258	return not assume_new
	3259
	3260
	3261	def ytdl_is_updateable():
	3262	""" Returns if yt-dlp can be updated with -U """
	3263
	3264	from .update import is_non_updateable
	3265
	3266	return not is_non_updateable()
	3267
	3268
	3269	def args_to_str(args):
	3270	# Get a short string representation for a subprocess command
	3271	return ' '.join(compat_shlex_quote(a) for a in args)
	3272
	3273
	3274	def error_to_compat_str(err):
	3275	err_str = str(err)
	3276	# On python 2 error byte string must be decoded with proper
	3277	# encoding rather than ascii
	3278	if sys.version_info[0] < 3:
	3279	err_str = err_str.decode(preferredencoding())
	3280	return err_str
	3281
	3282
	3283	def mimetype2ext(mt):
	3284	if mt is None:
	3285	return None
	3286
	3287	mt, _, params = mt.partition(';')
	3288	mt = mt.strip()
	3289
	3290	FULL_MAP = {
	3291	'audio/mp4': 'm4a',
	3292	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
	3293	# it's the most popular one
	3294	'audio/mpeg': 'mp3',
	3295	'audio/x-wav': 'wav',
	3296	'audio/wav': 'wav',
	3297	'audio/wave': 'wav',
	3298	}
	3299
	3300	ext = FULL_MAP.get(mt)
	3301	if ext is not None:
	3302	return ext
	3303
	3304	SUBTYPE_MAP = {
	3305	'3gpp': '3gp',
	3306	'smptett+xml': 'tt',
	3307	'ttaf+xml': 'dfxp',
	3308	'ttml+xml': 'ttml',
	3309	'x-flv': 'flv',
	3310	'x-mp4-fragmented': 'mp4',
	3311	'x-ms-sami': 'sami',
	3312	'x-ms-wmv': 'wmv',
	3313	'mpegurl': 'm3u8',
	3314	'x-mpegurl': 'm3u8',
	3315	'vnd.apple.mpegurl': 'm3u8',
	3316	'dash+xml': 'mpd',
	3317	'f4m+xml': 'f4m',
	3318	'hds+xml': 'f4m',
	3319	'vnd.ms-sstr+xml': 'ism',
	3320	'quicktime': 'mov',
	3321	'mp2t': 'ts',
	3322	'x-wav': 'wav',
	3323	'filmstrip+json': 'fs',
	3324	'svg+xml': 'svg',
	3325	}
	3326
	3327	_, _, subtype = mt.rpartition('/')
	3328	ext = SUBTYPE_MAP.get(subtype.lower())
	3329	if ext is not None:
	3330	return ext
	3331
	3332	SUFFIX_MAP = {
	3333	'json': 'json',
	3334	'xml': 'xml',
	3335	'zip': 'zip',
	3336	'gzip': 'gz',
	3337	}
	3338
	3339	_, _, suffix = subtype.partition('+')
	3340	ext = SUFFIX_MAP.get(suffix)
	3341	if ext is not None:
	3342	return ext
	3343
	3344	return subtype.replace('+', '.')
	3345
	3346
	3347	def ext2mimetype(ext_or_url):
	3348	if not ext_or_url:
	3349	return None
	3350	if '.' not in ext_or_url:
	3351	ext_or_url = f'file.{ext_or_url}'
	3352	return mimetypes.guess_type(ext_or_url)[0]
	3353
	3354
	3355	def parse_codecs(codecs_str):
	3356	# http://tools.ietf.org/html/rfc6381
	3357	if not codecs_str:
	3358	return {}
	3359	split_codecs = list(filter(None, map(
	3360	str.strip, codecs_str.strip().strip(',').split(','))))
	3361	vcodec, acodec, tcodec, hdr = None, None, None, None
	3362	for full_codec in split_codecs:
	3363	parts = full_codec.split('.')
	3364	codec = parts[0].replace('0', '')
	3365	if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	3366	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	3367	if not vcodec:
	3368	vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
	3369	if codec in ('dvh1', 'dvhe'):
	3370	hdr = 'DV'
	3371	elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
	3372	hdr = 'HDR10'
	3373	elif full_codec.replace('0', '').startswith('vp9.2'):
	3374	hdr = 'HDR10'
	3375	elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3376	if not acodec:
	3377	acodec = full_codec
	3378	elif codec in ('stpp', 'wvtt',):
	3379	if not tcodec:
	3380	tcodec = full_codec
	3381	else:
	3382	write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
	3383	if vcodec or acodec or tcodec:
	3384	return {
	3385	'vcodec': vcodec or 'none',
	3386	'acodec': acodec or 'none',
	3387	'dynamic_range': hdr,
	3388	**({'tcodec': tcodec} if tcodec is not None else {}),
	3389	}
	3390	elif len(split_codecs) == 2:
	3391	return {
	3392	'vcodec': split_codecs[0],
	3393	'acodec': split_codecs[1],
	3394	}
	3395	return {}
	3396
	3397
	3398	def urlhandle_detect_ext(url_handle):
	3399	getheader = url_handle.headers.get
	3400
	3401	cd = getheader('Content-Disposition')
	3402	if cd:
	3403	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3404	if m:
	3405	e = determine_ext(m.group('filename'), default_ext=None)
	3406	if e:
	3407	return e
	3408
	3409	return mimetype2ext(getheader('Content-Type'))
	3410
	3411
	3412	def encode_data_uri(data, mime_type):
	3413	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3414
	3415
	3416	def age_restricted(content_limit, age_limit):
	3417	""" Returns True iff the content should be blocked """
	3418
	3419	if age_limit is None: # No limit set
	3420	return False
	3421	if content_limit is None:
	3422	return False # Content available for everyone
	3423	return age_limit < content_limit
	3424
	3425
	3426	def is_html(first_bytes):
	3427	""" Detect whether a file contains HTML by examining its first bytes. """
	3428
	3429	BOMS = [
	3430	(b'\xef\xbb\xbf', 'utf-8'),
	3431	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3432	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3433	(b'\xff\xfe', 'utf-16-le'),
	3434	(b'\xfe\xff', 'utf-16-be'),
	3435	]
	3436	for bom, enc in BOMS:
	3437	if first_bytes.startswith(bom):
	3438	s = first_bytes[len(bom):].decode(enc, 'replace')
	3439	break
	3440	else:
	3441	s = first_bytes.decode('utf-8', 'replace')
	3442
	3443	return re.match(r'^\s*<', s)
	3444
	3445
	3446	def determine_protocol(info_dict):
	3447	protocol = info_dict.get('protocol')
	3448	if protocol is not None:
	3449	return protocol
	3450
	3451	url = sanitize_url(info_dict['url'])
	3452	if url.startswith('rtmp'):
	3453	return 'rtmp'
	3454	elif url.startswith('mms'):
	3455	return 'mms'
	3456	elif url.startswith('rtsp'):
	3457	return 'rtsp'
	3458
	3459	ext = determine_ext(url)
	3460	if ext == 'm3u8':
	3461	return 'm3u8'
	3462	elif ext == 'f4m':
	3463	return 'f4m'
	3464
	3465	return compat_urllib_parse_urlparse(url).scheme
	3466
	3467
	3468	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3469	""" Render a list of rows, each as a list of values.
	3470	Text after a \t will be right aligned """
	3471	def width(string):
	3472	return len(remove_terminal_sequences(string).replace('\t', ''))
	3473
	3474	def get_max_lens(table):
	3475	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3476
	3477	def filter_using_list(row, filterArray):
	3478	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3479
	3480	max_lens = get_max_lens(data) if hide_empty else []
	3481	header_row = filter_using_list(header_row, max_lens)
	3482	data = [filter_using_list(row, max_lens) for row in data]
	3483
	3484	table = [header_row] + data
	3485	max_lens = get_max_lens(table)
	3486	extra_gap += 1
	3487	if delim:
	3488	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3489	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
	3490	for row in table:
	3491	for pos, text in enumerate(map(str, row)):
	3492	if '\t' in text:
	3493	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3494	else:
	3495	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3496	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3497	return ret
	3498
	3499
	3500	def _match_one(filter_part, dct, incomplete):
	3501	# TODO: Generalize code with YoutubeDL._build_format_filter
	3502	STRING_OPERATORS = {
	3503	'*=': operator.contains,
	3504	'^=': lambda attr, value: attr.startswith(value),
	3505	'$=': lambda attr, value: attr.endswith(value),
	3506	'~=': lambda attr, value: re.search(value, attr),
	3507	}
	3508	COMPARISON_OPERATORS = {
	3509	**STRING_OPERATORS,
	3510	'<=': operator.le, # "<=" must be defined above "<"
	3511	'<': operator.lt,
	3512	'>=': operator.ge,
	3513	'>': operator.gt,
	3514	'=': operator.eq,
	3515	}
	3516
	3517	operator_rex = re.compile(r'''(?x)\s*
	3518	(?P<key>[a-z_]+)
	3519	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3520	(?:
	3521	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3522	(?P<strval>.+?)
	3523	)
	3524	\s*$
	3525	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3526	m = operator_rex.search(filter_part)
	3527	if m:
	3528	m = m.groupdict()
	3529	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3530	if m['negation']:
	3531	op = lambda attr, value: not unnegated_op(attr, value)
	3532	else:
	3533	op = unnegated_op
	3534	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3535	if m['quote']:
	3536	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3537	actual_value = dct.get(m['key'])
	3538	numeric_comparison = None
	3539	if isinstance(actual_value, compat_numeric_types):
	3540	# If the original field is a string and matching comparisonvalue is
	3541	# a number we should respect the origin of the original field
	3542	# and process comparison value as a string (see
	3543	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3544	try:
	3545	numeric_comparison = int(comparison_value)
	3546	except ValueError:
	3547	numeric_comparison = parse_filesize(comparison_value)
	3548	if numeric_comparison is None:
	3549	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3550	if numeric_comparison is None:
	3551	numeric_comparison = parse_duration(comparison_value)
	3552	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3553	raise ValueError('Operator %s only supports string values!' % m['op'])
	3554	if actual_value is None:
	3555	return incomplete or m['none_inclusive']
	3556	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3557
	3558	UNARY_OPERATORS = {
	3559	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3560	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3561	}
	3562	operator_rex = re.compile(r'''(?x)\s*
	3563	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3564	\s*$
	3565	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3566	m = operator_rex.search(filter_part)
	3567	if m:
	3568	op = UNARY_OPERATORS[m.group('op')]
	3569	actual_value = dct.get(m.group('key'))
	3570	if incomplete and actual_value is None:
	3571	return True
	3572	return op(actual_value)
	3573
	3574	raise ValueError('Invalid filter part %r' % filter_part)
	3575
	3576
	3577	def match_str(filter_str, dct, incomplete=False):
	3578	""" Filter a dictionary with a simple string syntax. Returns True (=passes filter) or false
	3579	When incomplete, all conditions passes on missing fields
	3580	"""
	3581	return all(
	3582	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3583	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3584
	3585
	3586	def match_filter_func(filter_str):
	3587	def _match_func(info_dict, args, *kwargs):
	3588	if match_str(filter_str, info_dict, args, *kwargs):
	3589	return None
	3590	else:
	3591	video_title = info_dict.get('title', info_dict.get('id', 'video'))
	3592	return '%s does not pass filter %s, skipping ..' % (video_title, filter_str)
	3593	return _match_func
	3594
	3595
	3596	def parse_dfxp_time_expr(time_expr):
	3597	if not time_expr:
	3598	return
	3599
	3600	mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
	3601	if mobj:
	3602	return float(mobj.group('time_offset'))
	3603
	3604	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3605	if mobj:
	3606	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3607
	3608
	3609	def srt_subtitles_timecode(seconds):
	3610	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3611
	3612
	3613	def ass_subtitles_timecode(seconds):
	3614	time = timetuple_from_msec(seconds * 1000)
	3615	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3616
	3617
	3618	def dfxp2srt(dfxp_data):
	3619	'''
	3620	@param dfxp_data A bytes-like object containing DFXP data
	3621	@returns A unicode object containing converted SRT data
	3622	'''
	3623	LEGACY_NAMESPACES = (
	3624	(b'http://www.w3.org/ns/ttml', [
	3625	b'http://www.w3.org/2004/11/ttaf1',
	3626	b'http://www.w3.org/2006/04/ttaf1',
	3627	b'http://www.w3.org/2006/10/ttaf1',
	3628	]),
	3629	(b'http://www.w3.org/ns/ttml#styling', [
	3630	b'http://www.w3.org/ns/ttml#style',
	3631	]),
	3632	)
	3633
	3634	SUPPORTED_STYLING = [
	3635	'color',
	3636	'fontFamily',
	3637	'fontSize',
	3638	'fontStyle',
	3639	'fontWeight',
	3640	'textDecoration'
	3641	]
	3642
	3643	_x = functools.partial(xpath_with_ns, ns_map={
	3644	'xml': 'http://www.w3.org/XML/1998/namespace',
	3645	'ttml': 'http://www.w3.org/ns/ttml',
	3646	'tts': 'http://www.w3.org/ns/ttml#styling',
	3647	})
	3648
	3649	styles = {}
	3650	default_style = {}
	3651
	3652	class TTMLPElementParser(object):
	3653	_out = ''
	3654	_unclosed_elements = []
	3655	_applied_styles = []
	3656
	3657	def start(self, tag, attrib):
	3658	if tag in (_x('ttml:br'), 'br'):
	3659	self._out += '\n'
	3660	else:
	3661	unclosed_elements = []
	3662	style = {}
	3663	element_style_id = attrib.get('style')
	3664	if default_style:
	3665	style.update(default_style)
	3666	if element_style_id:
	3667	style.update(styles.get(element_style_id, {}))
	3668	for prop in SUPPORTED_STYLING:
	3669	prop_val = attrib.get(_x('tts:' + prop))
	3670	if prop_val:
	3671	style[prop] = prop_val
	3672	if style:
	3673	font = ''
	3674	for k, v in sorted(style.items()):
	3675	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3676	continue
	3677	if k == 'color':
	3678	font += ' color="%s"' % v
	3679	elif k == 'fontSize':
	3680	font += ' size="%s"' % v
	3681	elif k == 'fontFamily':
	3682	font += ' face="%s"' % v
	3683	elif k == 'fontWeight' and v == 'bold':
	3684	self._out += '<b>'
	3685	unclosed_elements.append('b')
	3686	elif k == 'fontStyle' and v == 'italic':
	3687	self._out += '<i>'
	3688	unclosed_elements.append('i')
	3689	elif k == 'textDecoration' and v == 'underline':
	3690	self._out += '<u>'
	3691	unclosed_elements.append('u')
	3692	if font:
	3693	self._out += '<font' + font + '>'
	3694	unclosed_elements.append('font')
	3695	applied_style = {}
	3696	if self._applied_styles:
	3697	applied_style.update(self._applied_styles[-1])
	3698	applied_style.update(style)
	3699	self._applied_styles.append(applied_style)
	3700	self._unclosed_elements.append(unclosed_elements)
	3701
	3702	def end(self, tag):
	3703	if tag not in (_x('ttml:br'), 'br'):
	3704	unclosed_elements = self._unclosed_elements.pop()
	3705	for element in reversed(unclosed_elements):
	3706	self._out += '</%s>' % element
	3707	if unclosed_elements and self._applied_styles:
	3708	self._applied_styles.pop()
	3709
	3710	def data(self, data):
	3711	self._out += data
	3712
	3713	def close(self):
	3714	return self._out.strip()
	3715
	3716	def parse_node(node):
	3717	target = TTMLPElementParser()
	3718	parser = xml.etree.ElementTree.XMLParser(target=target)
	3719	parser.feed(xml.etree.ElementTree.tostring(node))
	3720	return parser.close()
	3721
	3722	for k, v in LEGACY_NAMESPACES:
	3723	for ns in v:
	3724	dfxp_data = dfxp_data.replace(ns, k)
	3725
	3726	dfxp = compat_etree_fromstring(dfxp_data)
	3727	out = []
	3728	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	3729
	3730	if not paras:
	3731	raise ValueError('Invalid dfxp/TTML subtitle')
	3732
	3733	repeat = False
	3734	while True:
	3735	for style in dfxp.findall(_x('.//ttml:style')):
	3736	style_id = style.get('id') or style.get(_x('xml:id'))
	3737	if not style_id:
	3738	continue
	3739	parent_style_id = style.get('style')
	3740	if parent_style_id:
	3741	if parent_style_id not in styles:
	3742	repeat = True
	3743	continue
	3744	styles[style_id] = styles[parent_style_id].copy()
	3745	for prop in SUPPORTED_STYLING:
	3746	prop_val = style.get(_x('tts:' + prop))
	3747	if prop_val:
	3748	styles.setdefault(style_id, {})[prop] = prop_val
	3749	if repeat:
	3750	repeat = False
	3751	else:
	3752	break
	3753
	3754	for p in ('body', 'div'):
	3755	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3756	if ele is None:
	3757	continue
	3758	style = styles.get(ele.get('style'))
	3759	if not style:
	3760	continue
	3761	default_style.update(style)
	3762
	3763	for para, index in zip(paras, itertools.count(1)):
	3764	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	3765	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	3766	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3767	if begin_time is None:
	3768	continue
	3769	if not end_time:
	3770	if not dur:
	3771	continue
	3772	end_time = begin_time + dur
	3773	out.append('%d\n%s --> %s\n%s\n\n' % (
	3774	index,
	3775	srt_subtitles_timecode(begin_time),
	3776	srt_subtitles_timecode(end_time),
	3777	parse_node(para)))
	3778
	3779	return ''.join(out)
	3780
	3781
	3782	def cli_option(params, command_option, param):
	3783	param = params.get(param)
	3784	if param:
	3785	param = compat_str(param)
	3786	return [command_option, param] if param is not None else []
	3787
	3788
	3789	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3790	param = params.get(param)
	3791	if param is None:
	3792	return []
	3793	assert isinstance(param, bool)
	3794	if separator:
	3795	return [command_option + separator + (true_value if param else false_value)]
	3796	return [command_option, true_value if param else false_value]
	3797
	3798
	3799	def cli_valueless_option(params, command_option, param, expected_value=True):
	3800	param = params.get(param)
	3801	return [command_option] if param == expected_value else []
	3802
	3803
	3804	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	3805	if isinstance(argdict, (list, tuple)): # for backward compatibility
	3806	if use_compat:
	3807	return argdict
	3808	else:
	3809	argdict = None
	3810	if argdict is None:
	3811	return default
	3812	assert isinstance(argdict, dict)
	3813
	3814	assert isinstance(keys, (list, tuple))
	3815	for key_list in keys:
	3816	arg_list = list(filter(
	3817	lambda x: x is not None,
	3818	[argdict.get(key.lower()) for key in variadic(key_list)]))
	3819	if arg_list:
	3820	return [arg for args in arg_list for arg in args]
	3821	return default
	3822
	3823
	3824	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3825	main_key, exe = main_key.lower(), exe.lower()
	3826	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3827	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3828	if root_key in keys:
	3829	if main_key != exe:
	3830	keys.append((main_key, exe))
	3831	keys.append('default')
	3832	else:
	3833	use_compat = False
	3834	return cli_configuration_args(argdict, keys, default, use_compat)
	3835
	3836
	3837	class ISO639Utils(object):
	3838	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3839	_lang_map = {
	3840	'aa': 'aar',
	3841	'ab': 'abk',
	3842	'ae': 'ave',
	3843	'af': 'afr',
	3844	'ak': 'aka',
	3845	'am': 'amh',
	3846	'an': 'arg',
	3847	'ar': 'ara',
	3848	'as': 'asm',
	3849	'av': 'ava',
	3850	'ay': 'aym',
	3851	'az': 'aze',
	3852	'ba': 'bak',
	3853	'be': 'bel',
	3854	'bg': 'bul',
	3855	'bh': 'bih',
	3856	'bi': 'bis',
	3857	'bm': 'bam',
	3858	'bn': 'ben',
	3859	'bo': 'bod',
	3860	'br': 'bre',
	3861	'bs': 'bos',
	3862	'ca': 'cat',
	3863	'ce': 'che',
	3864	'ch': 'cha',
	3865	'co': 'cos',
	3866	'cr': 'cre',
	3867	'cs': 'ces',
	3868	'cu': 'chu',
	3869	'cv': 'chv',
	3870	'cy': 'cym',
	3871	'da': 'dan',
	3872	'de': 'deu',
	3873	'dv': 'div',
	3874	'dz': 'dzo',
	3875	'ee': 'ewe',
	3876	'el': 'ell',
	3877	'en': 'eng',
	3878	'eo': 'epo',
	3879	'es': 'spa',
	3880	'et': 'est',
	3881	'eu': 'eus',
	3882	'fa': 'fas',
	3883	'ff': 'ful',
	3884	'fi': 'fin',
	3885	'fj': 'fij',
	3886	'fo': 'fao',
	3887	'fr': 'fra',
	3888	'fy': 'fry',
	3889	'ga': 'gle',
	3890	'gd': 'gla',
	3891	'gl': 'glg',
	3892	'gn': 'grn',
	3893	'gu': 'guj',
	3894	'gv': 'glv',
	3895	'ha': 'hau',
	3896	'he': 'heb',
	3897	'iw': 'heb', # Replaced by he in 1989 revision
	3898	'hi': 'hin',
	3899	'ho': 'hmo',
	3900	'hr': 'hrv',
	3901	'ht': 'hat',
	3902	'hu': 'hun',
	3903	'hy': 'hye',
	3904	'hz': 'her',
	3905	'ia': 'ina',
	3906	'id': 'ind',
	3907	'in': 'ind', # Replaced by id in 1989 revision
	3908	'ie': 'ile',
	3909	'ig': 'ibo',
	3910	'ii': 'iii',
	3911	'ik': 'ipk',
	3912	'io': 'ido',
	3913	'is': 'isl',
	3914	'it': 'ita',
	3915	'iu': 'iku',
	3916	'ja': 'jpn',
	3917	'jv': 'jav',
	3918	'ka': 'kat',
	3919	'kg': 'kon',
	3920	'ki': 'kik',
	3921	'kj': 'kua',
	3922	'kk': 'kaz',
	3923	'kl': 'kal',
	3924	'km': 'khm',
	3925	'kn': 'kan',
	3926	'ko': 'kor',
	3927	'kr': 'kau',
	3928	'ks': 'kas',
	3929	'ku': 'kur',
	3930	'kv': 'kom',
	3931	'kw': 'cor',
	3932	'ky': 'kir',
	3933	'la': 'lat',
	3934	'lb': 'ltz',
	3935	'lg': 'lug',
	3936	'li': 'lim',
	3937	'ln': 'lin',
	3938	'lo': 'lao',
	3939	'lt': 'lit',
	3940	'lu': 'lub',
	3941	'lv': 'lav',
	3942	'mg': 'mlg',
	3943	'mh': 'mah',
	3944	'mi': 'mri',
	3945	'mk': 'mkd',
	3946	'ml': 'mal',
	3947	'mn': 'mon',
	3948	'mr': 'mar',
	3949	'ms': 'msa',
	3950	'mt': 'mlt',
	3951	'my': 'mya',
	3952	'na': 'nau',
	3953	'nb': 'nob',
	3954	'nd': 'nde',
	3955	'ne': 'nep',
	3956	'ng': 'ndo',
	3957	'nl': 'nld',
	3958	'nn': 'nno',
	3959	'no': 'nor',
	3960	'nr': 'nbl',
	3961	'nv': 'nav',
	3962	'ny': 'nya',
	3963	'oc': 'oci',
	3964	'oj': 'oji',
	3965	'om': 'orm',
	3966	'or': 'ori',
	3967	'os': 'oss',
	3968	'pa': 'pan',
	3969	'pi': 'pli',
	3970	'pl': 'pol',
	3971	'ps': 'pus',
	3972	'pt': 'por',
	3973	'qu': 'que',
	3974	'rm': 'roh',
	3975	'rn': 'run',
	3976	'ro': 'ron',
	3977	'ru': 'rus',
	3978	'rw': 'kin',
	3979	'sa': 'san',
	3980	'sc': 'srd',
	3981	'sd': 'snd',
	3982	'se': 'sme',
	3983	'sg': 'sag',
	3984	'si': 'sin',
	3985	'sk': 'slk',
	3986	'sl': 'slv',
	3987	'sm': 'smo',
	3988	'sn': 'sna',
	3989	'so': 'som',
	3990	'sq': 'sqi',
	3991	'sr': 'srp',
	3992	'ss': 'ssw',
	3993	'st': 'sot',
	3994	'su': 'sun',
	3995	'sv': 'swe',
	3996	'sw': 'swa',
	3997	'ta': 'tam',
	3998	'te': 'tel',
	3999	'tg': 'tgk',
	4000	'th': 'tha',
	4001	'ti': 'tir',
	4002	'tk': 'tuk',
	4003	'tl': 'tgl',
	4004	'tn': 'tsn',
	4005	'to': 'ton',
	4006	'tr': 'tur',
	4007	'ts': 'tso',
	4008	'tt': 'tat',
	4009	'tw': 'twi',
	4010	'ty': 'tah',
	4011	'ug': 'uig',
	4012	'uk': 'ukr',
	4013	'ur': 'urd',
	4014	'uz': 'uzb',
	4015	've': 'ven',
	4016	'vi': 'vie',
	4017	'vo': 'vol',
	4018	'wa': 'wln',
	4019	'wo': 'wol',
	4020	'xh': 'xho',
	4021	'yi': 'yid',
	4022	'ji': 'yid', # Replaced by yi in 1989 revision
	4023	'yo': 'yor',
	4024	'za': 'zha',
	4025	'zh': 'zho',
	4026	'zu': 'zul',
	4027	}
	4028
	4029	@classmethod
	4030	def short2long(cls, code):
	4031	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	4032	return cls._lang_map.get(code[:2])
	4033
	4034	@classmethod
	4035	def long2short(cls, code):
	4036	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	4037	for short_name, long_name in cls._lang_map.items():
	4038	if long_name == code:
	4039	return short_name
	4040
	4041
	4042	class ISO3166Utils(object):
	4043	# From http://data.okfn.org/data/core/country-list
	4044	_country_map = {
	4045	'AF': 'Afghanistan',
	4046	'AX': 'Åland Islands',
	4047	'AL': 'Albania',
	4048	'DZ': 'Algeria',
	4049	'AS': 'American Samoa',
	4050	'AD': 'Andorra',
	4051	'AO': 'Angola',
	4052	'AI': 'Anguilla',
	4053	'AQ': 'Antarctica',
	4054	'AG': 'Antigua and Barbuda',
	4055	'AR': 'Argentina',
	4056	'AM': 'Armenia',
	4057	'AW': 'Aruba',
	4058	'AU': 'Australia',
	4059	'AT': 'Austria',
	4060	'AZ': 'Azerbaijan',
	4061	'BS': 'Bahamas',
	4062	'BH': 'Bahrain',
	4063	'BD': 'Bangladesh',
	4064	'BB': 'Barbados',
	4065	'BY': 'Belarus',
	4066	'BE': 'Belgium',
	4067	'BZ': 'Belize',
	4068	'BJ': 'Benin',
	4069	'BM': 'Bermuda',
	4070	'BT': 'Bhutan',
	4071	'BO': 'Bolivia, Plurinational State of',
	4072	'BQ': 'Bonaire, Sint Eustatius and Saba',
	4073	'BA': 'Bosnia and Herzegovina',
	4074	'BW': 'Botswana',
	4075	'BV': 'Bouvet Island',
	4076	'BR': 'Brazil',
	4077	'IO': 'British Indian Ocean Territory',
	4078	'BN': 'Brunei Darussalam',
	4079	'BG': 'Bulgaria',
	4080	'BF': 'Burkina Faso',
	4081	'BI': 'Burundi',
	4082	'KH': 'Cambodia',
	4083	'CM': 'Cameroon',
	4084	'CA': 'Canada',
	4085	'CV': 'Cape Verde',
	4086	'KY': 'Cayman Islands',
	4087	'CF': 'Central African Republic',
	4088	'TD': 'Chad',
	4089	'CL': 'Chile',
	4090	'CN': 'China',
	4091	'CX': 'Christmas Island',
	4092	'CC': 'Cocos (Keeling) Islands',
	4093	'CO': 'Colombia',
	4094	'KM': 'Comoros',
	4095	'CG': 'Congo',
	4096	'CD': 'Congo, the Democratic Republic of the',
	4097	'CK': 'Cook Islands',
	4098	'CR': 'Costa Rica',
	4099	'CI': 'Côte d\'Ivoire',
	4100	'HR': 'Croatia',
	4101	'CU': 'Cuba',
	4102	'CW': 'Curaçao',
	4103	'CY': 'Cyprus',
	4104	'CZ': 'Czech Republic',
	4105	'DK': 'Denmark',
	4106	'DJ': 'Djibouti',
	4107	'DM': 'Dominica',
	4108	'DO': 'Dominican Republic',
	4109	'EC': 'Ecuador',
	4110	'EG': 'Egypt',
	4111	'SV': 'El Salvador',
	4112	'GQ': 'Equatorial Guinea',
	4113	'ER': 'Eritrea',
	4114	'EE': 'Estonia',
	4115	'ET': 'Ethiopia',
	4116	'FK': 'Falkland Islands (Malvinas)',
	4117	'FO': 'Faroe Islands',
	4118	'FJ': 'Fiji',
	4119	'FI': 'Finland',
	4120	'FR': 'France',
	4121	'GF': 'French Guiana',
	4122	'PF': 'French Polynesia',
	4123	'TF': 'French Southern Territories',
	4124	'GA': 'Gabon',
	4125	'GM': 'Gambia',
	4126	'GE': 'Georgia',
	4127	'DE': 'Germany',
	4128	'GH': 'Ghana',
	4129	'GI': 'Gibraltar',
	4130	'GR': 'Greece',
	4131	'GL': 'Greenland',
	4132	'GD': 'Grenada',
	4133	'GP': 'Guadeloupe',
	4134	'GU': 'Guam',
	4135	'GT': 'Guatemala',
	4136	'GG': 'Guernsey',
	4137	'GN': 'Guinea',
	4138	'GW': 'Guinea-Bissau',
	4139	'GY': 'Guyana',
	4140	'HT': 'Haiti',
	4141	'HM': 'Heard Island and McDonald Islands',
	4142	'VA': 'Holy See (Vatican City State)',
	4143	'HN': 'Honduras',
	4144	'HK': 'Hong Kong',
	4145	'HU': 'Hungary',
	4146	'IS': 'Iceland',
	4147	'IN': 'India',
	4148	'ID': 'Indonesia',
	4149	'IR': 'Iran, Islamic Republic of',
	4150	'IQ': 'Iraq',
	4151	'IE': 'Ireland',
	4152	'IM': 'Isle of Man',
	4153	'IL': 'Israel',
	4154	'IT': 'Italy',
	4155	'JM': 'Jamaica',
	4156	'JP': 'Japan',
	4157	'JE': 'Jersey',
	4158	'JO': 'Jordan',
	4159	'KZ': 'Kazakhstan',
	4160	'KE': 'Kenya',
	4161	'KI': 'Kiribati',
	4162	'KP': 'Korea, Democratic People\'s Republic of',
	4163	'KR': 'Korea, Republic of',
	4164	'KW': 'Kuwait',
	4165	'KG': 'Kyrgyzstan',
	4166	'LA': 'Lao People\'s Democratic Republic',
	4167	'LV': 'Latvia',
	4168	'LB': 'Lebanon',
	4169	'LS': 'Lesotho',
	4170	'LR': 'Liberia',
	4171	'LY': 'Libya',
	4172	'LI': 'Liechtenstein',
	4173	'LT': 'Lithuania',
	4174	'LU': 'Luxembourg',
	4175	'MO': 'Macao',
	4176	'MK': 'Macedonia, the Former Yugoslav Republic of',
	4177	'MG': 'Madagascar',
	4178	'MW': 'Malawi',
	4179	'MY': 'Malaysia',
	4180	'MV': 'Maldives',
	4181	'ML': 'Mali',
	4182	'MT': 'Malta',
	4183	'MH': 'Marshall Islands',
	4184	'MQ': 'Martinique',
	4185	'MR': 'Mauritania',
	4186	'MU': 'Mauritius',
	4187	'YT': 'Mayotte',
	4188	'MX': 'Mexico',
	4189	'FM': 'Micronesia, Federated States of',
	4190	'MD': 'Moldova, Republic of',
	4191	'MC': 'Monaco',
	4192	'MN': 'Mongolia',
	4193	'ME': 'Montenegro',
	4194	'MS': 'Montserrat',
	4195	'MA': 'Morocco',
	4196	'MZ': 'Mozambique',
	4197	'MM': 'Myanmar',
	4198	'NA': 'Namibia',
	4199	'NR': 'Nauru',
	4200	'NP': 'Nepal',
	4201	'NL': 'Netherlands',
	4202	'NC': 'New Caledonia',
	4203	'NZ': 'New Zealand',
	4204	'NI': 'Nicaragua',
	4205	'NE': 'Niger',
	4206	'NG': 'Nigeria',
	4207	'NU': 'Niue',
	4208	'NF': 'Norfolk Island',
	4209	'MP': 'Northern Mariana Islands',
	4210	'NO': 'Norway',
	4211	'OM': 'Oman',
	4212	'PK': 'Pakistan',
	4213	'PW': 'Palau',
	4214	'PS': 'Palestine, State of',
	4215	'PA': 'Panama',
	4216	'PG': 'Papua New Guinea',
	4217	'PY': 'Paraguay',
	4218	'PE': 'Peru',
	4219	'PH': 'Philippines',
	4220	'PN': 'Pitcairn',
	4221	'PL': 'Poland',
	4222	'PT': 'Portugal',
	4223	'PR': 'Puerto Rico',
	4224	'QA': 'Qatar',
	4225	'RE': 'Réunion',
	4226	'RO': 'Romania',
	4227	'RU': 'Russian Federation',
	4228	'RW': 'Rwanda',
	4229	'BL': 'Saint Barthélemy',
	4230	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	4231	'KN': 'Saint Kitts and Nevis',
	4232	'LC': 'Saint Lucia',
	4233	'MF': 'Saint Martin (French part)',
	4234	'PM': 'Saint Pierre and Miquelon',
	4235	'VC': 'Saint Vincent and the Grenadines',
	4236	'WS': 'Samoa',
	4237	'SM': 'San Marino',
	4238	'ST': 'Sao Tome and Principe',
	4239	'SA': 'Saudi Arabia',
	4240	'SN': 'Senegal',
	4241	'RS': 'Serbia',
	4242	'SC': 'Seychelles',
	4243	'SL': 'Sierra Leone',
	4244	'SG': 'Singapore',
	4245	'SX': 'Sint Maarten (Dutch part)',
	4246	'SK': 'Slovakia',
	4247	'SI': 'Slovenia',
	4248	'SB': 'Solomon Islands',
	4249	'SO': 'Somalia',
	4250	'ZA': 'South Africa',
	4251	'GS': 'South Georgia and the South Sandwich Islands',
	4252	'SS': 'South Sudan',
	4253	'ES': 'Spain',
	4254	'LK': 'Sri Lanka',
	4255	'SD': 'Sudan',
	4256	'SR': 'Suriname',
	4257	'SJ': 'Svalbard and Jan Mayen',
	4258	'SZ': 'Swaziland',
	4259	'SE': 'Sweden',
	4260	'CH': 'Switzerland',
	4261	'SY': 'Syrian Arab Republic',
	4262	'TW': 'Taiwan, Province of China',
	4263	'TJ': 'Tajikistan',
	4264	'TZ': 'Tanzania, United Republic of',
	4265	'TH': 'Thailand',
	4266	'TL': 'Timor-Leste',
	4267	'TG': 'Togo',
	4268	'TK': 'Tokelau',
	4269	'TO': 'Tonga',
	4270	'TT': 'Trinidad and Tobago',
	4271	'TN': 'Tunisia',
	4272	'TR': 'Turkey',
	4273	'TM': 'Turkmenistan',
	4274	'TC': 'Turks and Caicos Islands',
	4275	'TV': 'Tuvalu',
	4276	'UG': 'Uganda',
	4277	'UA': 'Ukraine',
	4278	'AE': 'United Arab Emirates',
	4279	'GB': 'United Kingdom',
	4280	'US': 'United States',
	4281	'UM': 'United States Minor Outlying Islands',
	4282	'UY': 'Uruguay',
	4283	'UZ': 'Uzbekistan',
	4284	'VU': 'Vanuatu',
	4285	'VE': 'Venezuela, Bolivarian Republic of',
	4286	'VN': 'Viet Nam',
	4287	'VG': 'Virgin Islands, British',
	4288	'VI': 'Virgin Islands, U.S.',
	4289	'WF': 'Wallis and Futuna',
	4290	'EH': 'Western Sahara',
	4291	'YE': 'Yemen',
	4292	'ZM': 'Zambia',
	4293	'ZW': 'Zimbabwe',
	4294	}
	4295
	4296	@classmethod
	4297	def short2full(cls, code):
	4298	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4299	return cls._country_map.get(code.upper())
	4300
	4301
	4302	class GeoUtils(object):
	4303	# Major IPv4 address blocks per country
	4304	_country_ip_map = {
	4305	'AD': '46.172.224.0/19',
	4306	'AE': '94.200.0.0/13',
	4307	'AF': '149.54.0.0/17',
	4308	'AG': '209.59.64.0/18',
	4309	'AI': '204.14.248.0/21',
	4310	'AL': '46.99.0.0/16',
	4311	'AM': '46.70.0.0/15',
	4312	'AO': '105.168.0.0/13',
	4313	'AP': '182.50.184.0/21',
	4314	'AQ': '23.154.160.0/24',
	4315	'AR': '181.0.0.0/12',
	4316	'AS': '202.70.112.0/20',
	4317	'AT': '77.116.0.0/14',
	4318	'AU': '1.128.0.0/11',
	4319	'AW': '181.41.0.0/18',
	4320	'AX': '185.217.4.0/22',
	4321	'AZ': '5.197.0.0/16',
	4322	'BA': '31.176.128.0/17',
	4323	'BB': '65.48.128.0/17',
	4324	'BD': '114.130.0.0/16',
	4325	'BE': '57.0.0.0/8',
	4326	'BF': '102.178.0.0/15',
	4327	'BG': '95.42.0.0/15',
	4328	'BH': '37.131.0.0/17',
	4329	'BI': '154.117.192.0/18',
	4330	'BJ': '137.255.0.0/16',
	4331	'BL': '185.212.72.0/23',
	4332	'BM': '196.12.64.0/18',
	4333	'BN': '156.31.0.0/16',
	4334	'BO': '161.56.0.0/16',
	4335	'BQ': '161.0.80.0/20',
	4336	'BR': '191.128.0.0/12',
	4337	'BS': '24.51.64.0/18',
	4338	'BT': '119.2.96.0/19',
	4339	'BW': '168.167.0.0/16',
	4340	'BY': '178.120.0.0/13',
	4341	'BZ': '179.42.192.0/18',
	4342	'CA': '99.224.0.0/11',
	4343	'CD': '41.243.0.0/16',
	4344	'CF': '197.242.176.0/21',
	4345	'CG': '160.113.0.0/16',
	4346	'CH': '85.0.0.0/13',
	4347	'CI': '102.136.0.0/14',
	4348	'CK': '202.65.32.0/19',
	4349	'CL': '152.172.0.0/14',
	4350	'CM': '102.244.0.0/14',
	4351	'CN': '36.128.0.0/10',
	4352	'CO': '181.240.0.0/12',
	4353	'CR': '201.192.0.0/12',
	4354	'CU': '152.206.0.0/15',
	4355	'CV': '165.90.96.0/19',
	4356	'CW': '190.88.128.0/17',
	4357	'CY': '31.153.0.0/16',
	4358	'CZ': '88.100.0.0/14',
	4359	'DE': '53.0.0.0/8',
	4360	'DJ': '197.241.0.0/17',
	4361	'DK': '87.48.0.0/12',
	4362	'DM': '192.243.48.0/20',
	4363	'DO': '152.166.0.0/15',
	4364	'DZ': '41.96.0.0/12',
	4365	'EC': '186.68.0.0/15',
	4366	'EE': '90.190.0.0/15',
	4367	'EG': '156.160.0.0/11',
	4368	'ER': '196.200.96.0/20',
	4369	'ES': '88.0.0.0/11',
	4370	'ET': '196.188.0.0/14',
	4371	'EU': '2.16.0.0/13',
	4372	'FI': '91.152.0.0/13',
	4373	'FJ': '144.120.0.0/16',
	4374	'FK': '80.73.208.0/21',
	4375	'FM': '119.252.112.0/20',
	4376	'FO': '88.85.32.0/19',
	4377	'FR': '90.0.0.0/9',
	4378	'GA': '41.158.0.0/15',
	4379	'GB': '25.0.0.0/8',
	4380	'GD': '74.122.88.0/21',
	4381	'GE': '31.146.0.0/16',
	4382	'GF': '161.22.64.0/18',
	4383	'GG': '62.68.160.0/19',
	4384	'GH': '154.160.0.0/12',
	4385	'GI': '95.164.0.0/16',
	4386	'GL': '88.83.0.0/19',
	4387	'GM': '160.182.0.0/15',
	4388	'GN': '197.149.192.0/18',
	4389	'GP': '104.250.0.0/19',
	4390	'GQ': '105.235.224.0/20',
	4391	'GR': '94.64.0.0/13',
	4392	'GT': '168.234.0.0/16',
	4393	'GU': '168.123.0.0/16',
	4394	'GW': '197.214.80.0/20',
	4395	'GY': '181.41.64.0/18',
	4396	'HK': '113.252.0.0/14',
	4397	'HN': '181.210.0.0/16',
	4398	'HR': '93.136.0.0/13',
	4399	'HT': '148.102.128.0/17',
	4400	'HU': '84.0.0.0/14',
	4401	'ID': '39.192.0.0/10',
	4402	'IE': '87.32.0.0/12',
	4403	'IL': '79.176.0.0/13',
	4404	'IM': '5.62.80.0/20',
	4405	'IN': '117.192.0.0/10',
	4406	'IO': '203.83.48.0/21',
	4407	'IQ': '37.236.0.0/14',
	4408	'IR': '2.176.0.0/12',
	4409	'IS': '82.221.0.0/16',
	4410	'IT': '79.0.0.0/10',
	4411	'JE': '87.244.64.0/18',
	4412	'JM': '72.27.0.0/17',
	4413	'JO': '176.29.0.0/16',
	4414	'JP': '133.0.0.0/8',
	4415	'KE': '105.48.0.0/12',
	4416	'KG': '158.181.128.0/17',
	4417	'KH': '36.37.128.0/17',
	4418	'KI': '103.25.140.0/22',
	4419	'KM': '197.255.224.0/20',
	4420	'KN': '198.167.192.0/19',
	4421	'KP': '175.45.176.0/22',
	4422	'KR': '175.192.0.0/10',
	4423	'KW': '37.36.0.0/14',
	4424	'KY': '64.96.0.0/15',
	4425	'KZ': '2.72.0.0/13',
	4426	'LA': '115.84.64.0/18',
	4427	'LB': '178.135.0.0/16',
	4428	'LC': '24.92.144.0/20',
	4429	'LI': '82.117.0.0/19',
	4430	'LK': '112.134.0.0/15',
	4431	'LR': '102.183.0.0/16',
	4432	'LS': '129.232.0.0/17',
	4433	'LT': '78.56.0.0/13',
	4434	'LU': '188.42.0.0/16',
	4435	'LV': '46.109.0.0/16',
	4436	'LY': '41.252.0.0/14',
	4437	'MA': '105.128.0.0/11',
	4438	'MC': '88.209.64.0/18',
	4439	'MD': '37.246.0.0/16',
	4440	'ME': '178.175.0.0/17',
	4441	'MF': '74.112.232.0/21',
	4442	'MG': '154.126.0.0/17',
	4443	'MH': '117.103.88.0/21',
	4444	'MK': '77.28.0.0/15',
	4445	'ML': '154.118.128.0/18',
	4446	'MM': '37.111.0.0/17',
	4447	'MN': '49.0.128.0/17',
	4448	'MO': '60.246.0.0/16',
	4449	'MP': '202.88.64.0/20',
	4450	'MQ': '109.203.224.0/19',
	4451	'MR': '41.188.64.0/18',
	4452	'MS': '208.90.112.0/22',
	4453	'MT': '46.11.0.0/16',
	4454	'MU': '105.16.0.0/12',
	4455	'MV': '27.114.128.0/18',
	4456	'MW': '102.70.0.0/15',
	4457	'MX': '187.192.0.0/11',
	4458	'MY': '175.136.0.0/13',
	4459	'MZ': '197.218.0.0/15',
	4460	'NA': '41.182.0.0/16',
	4461	'NC': '101.101.0.0/18',
	4462	'NE': '197.214.0.0/18',
	4463	'NF': '203.17.240.0/22',
	4464	'NG': '105.112.0.0/12',
	4465	'NI': '186.76.0.0/15',
	4466	'NL': '145.96.0.0/11',
	4467	'NO': '84.208.0.0/13',
	4468	'NP': '36.252.0.0/15',
	4469	'NR': '203.98.224.0/19',
	4470	'NU': '49.156.48.0/22',
	4471	'NZ': '49.224.0.0/14',
	4472	'OM': '5.36.0.0/15',
	4473	'PA': '186.72.0.0/15',
	4474	'PE': '186.160.0.0/14',
	4475	'PF': '123.50.64.0/18',
	4476	'PG': '124.240.192.0/19',
	4477	'PH': '49.144.0.0/13',
	4478	'PK': '39.32.0.0/11',
	4479	'PL': '83.0.0.0/11',
	4480	'PM': '70.36.0.0/20',
	4481	'PR': '66.50.0.0/16',
	4482	'PS': '188.161.0.0/16',
	4483	'PT': '85.240.0.0/13',
	4484	'PW': '202.124.224.0/20',
	4485	'PY': '181.120.0.0/14',
	4486	'QA': '37.210.0.0/15',
	4487	'RE': '102.35.0.0/16',
	4488	'RO': '79.112.0.0/13',
	4489	'RS': '93.86.0.0/15',
	4490	'RU': '5.136.0.0/13',
	4491	'RW': '41.186.0.0/16',
	4492	'SA': '188.48.0.0/13',
	4493	'SB': '202.1.160.0/19',
	4494	'SC': '154.192.0.0/11',
	4495	'SD': '102.120.0.0/13',
	4496	'SE': '78.64.0.0/12',
	4497	'SG': '8.128.0.0/10',
	4498	'SI': '188.196.0.0/14',
	4499	'SK': '78.98.0.0/15',
	4500	'SL': '102.143.0.0/17',
	4501	'SM': '89.186.32.0/19',
	4502	'SN': '41.82.0.0/15',
	4503	'SO': '154.115.192.0/18',
	4504	'SR': '186.179.128.0/17',
	4505	'SS': '105.235.208.0/21',
	4506	'ST': '197.159.160.0/19',
	4507	'SV': '168.243.0.0/16',
	4508	'SX': '190.102.0.0/20',
	4509	'SY': '5.0.0.0/16',
	4510	'SZ': '41.84.224.0/19',
	4511	'TC': '65.255.48.0/20',
	4512	'TD': '154.68.128.0/19',
	4513	'TG': '196.168.0.0/14',
	4514	'TH': '171.96.0.0/13',
	4515	'TJ': '85.9.128.0/18',
	4516	'TK': '27.96.24.0/21',
	4517	'TL': '180.189.160.0/20',
	4518	'TM': '95.85.96.0/19',
	4519	'TN': '197.0.0.0/11',
	4520	'TO': '175.176.144.0/21',
	4521	'TR': '78.160.0.0/11',
	4522	'TT': '186.44.0.0/15',
	4523	'TV': '202.2.96.0/19',
	4524	'TW': '120.96.0.0/11',
	4525	'TZ': '156.156.0.0/14',
	4526	'UA': '37.52.0.0/14',
	4527	'UG': '102.80.0.0/13',
	4528	'US': '6.0.0.0/8',
	4529	'UY': '167.56.0.0/13',
	4530	'UZ': '84.54.64.0/18',
	4531	'VA': '212.77.0.0/19',
	4532	'VC': '207.191.240.0/21',
	4533	'VE': '186.88.0.0/13',
	4534	'VG': '66.81.192.0/20',
	4535	'VI': '146.226.0.0/16',
	4536	'VN': '14.160.0.0/11',
	4537	'VU': '202.80.32.0/20',
	4538	'WF': '117.20.32.0/21',
	4539	'WS': '202.4.32.0/19',
	4540	'YE': '134.35.0.0/16',
	4541	'YT': '41.242.116.0/22',
	4542	'ZA': '41.0.0.0/11',
	4543	'ZM': '102.144.0.0/13',
	4544	'ZW': '102.177.192.0/18',
	4545	}
	4546
	4547	@classmethod
	4548	def random_ipv4(cls, code_or_block):
	4549	if len(code_or_block) == 2:
	4550	block = cls._country_ip_map.get(code_or_block.upper())
	4551	if not block:
	4552	return None
	4553	else:
	4554	block = code_or_block
	4555	addr, preflen = block.split('/')
	4556	addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
	4557	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4558	return compat_str(socket.inet_ntoa(
	4559	compat_struct_pack('!L', random.randint(addr_min, addr_max))))
	4560
	4561
	4562	class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
	4563	def __init__(self, proxies=None):
	4564	# Set default handlers
	4565	for type in ('http', 'https'):
	4566	setattr(self, '%s_open' % type,
	4567	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	4568	meth(r, proxy, type))
	4569	compat_urllib_request.ProxyHandler.__init__(self, proxies)
	4570
	4571	def proxy_open(self, req, proxy, type):
	4572	req_proxy = req.headers.get('Ytdl-request-proxy')
	4573	if req_proxy is not None:
	4574	proxy = req_proxy
	4575	del req.headers['Ytdl-request-proxy']
	4576
	4577	if proxy == '__noproxy__':
	4578	return None # No Proxy
	4579	if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
	4580	req.add_header('Ytdl-socks-proxy', proxy)
	4581	# yt-dlp's http/https handlers do wrapping the socket with socks
	4582	return None
	4583	return compat_urllib_request.ProxyHandler.proxy_open(
	4584	self, req, proxy, type)
	4585
	4586
	4587	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4588	# released into Public Domain
	4589	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4590
	4591	def long_to_bytes(n, blocksize=0):
	4592	"""long_to_bytes(n:long, blocksize:int) : string
	4593	Convert a long integer to a byte string.
	4594
	4595	If optional blocksize is given and greater than zero, pad the front of the
	4596	byte string with binary zeros so that the length is a multiple of
	4597	blocksize.
	4598	"""
	4599	# after much testing, this algorithm was deemed to be the fastest
	4600	s = b''
	4601	n = int(n)
	4602	while n > 0:
	4603	s = compat_struct_pack('>I', n & 0xffffffff) + s
	4604	n = n >> 32
	4605	# strip off leading zeros
	4606	for i in range(len(s)):
	4607	if s[i] != b'\000'[0]:
	4608	break
	4609	else:
	4610	# only happens when n == 0
	4611	s = b'\000'
	4612	i = 0
	4613	s = s[i:]
	4614	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4615	# de-padding being done above, but sigh...
	4616	if blocksize > 0 and len(s) % blocksize:
	4617	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4618	return s
	4619
	4620
	4621	def bytes_to_long(s):
	4622	"""bytes_to_long(string) : long
	4623	Convert a byte string to a long integer.
	4624
	4625	This is (essentially) the inverse of long_to_bytes().
	4626	"""
	4627	acc = 0
	4628	length = len(s)
	4629	if length % 4:
	4630	extra = (4 - length % 4)
	4631	s = b'\000' * extra + s
	4632	length = length + extra
	4633	for i in range(0, length, 4):
	4634	acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
	4635	return acc
	4636
	4637
	4638	def ohdave_rsa_encrypt(data, exponent, modulus):
	4639	'''
	4640	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4641
	4642	Input:
	4643	data: data to encrypt, bytes-like object
	4644	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4645	Output: hex string of encrypted data
	4646
	4647	Limitation: supports one block encryption only
	4648	'''
	4649
	4650	payload = int(binascii.hexlify(data[::-1]), 16)
	4651	encrypted = pow(payload, exponent, modulus)
	4652	return '%x' % encrypted
	4653
	4654
	4655	def pkcs1pad(data, length):
	4656	"""
	4657	Padding input data with PKCS#1 scheme
	4658
	4659	@param {int[]} data input data
	4660	@param {int} length target length
	4661	@returns {int[]} padded data
	4662	"""
	4663	if len(data) > length - 11:
	4664	raise ValueError('Input data too long for PKCS#1 padding')
	4665
	4666	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4667	return [0, 2] + pseudo_random + [0] + data
	4668
	4669
	4670	def encode_base_n(num, n, table=None):
	4671	FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
	4672	if not table:
	4673	table = FULL_TABLE[:n]
	4674
	4675	if n > len(table):
	4676	raise ValueError('base %d exceeds table length %d' % (n, len(table)))
	4677
	4678	if num == 0:
	4679	return table[0]
	4680
	4681	ret = ''
	4682	while num:
	4683	ret = table[num % n] + ret
	4684	num = num // n
	4685	return ret
	4686
	4687
	4688	def decode_packed_codes(code):
	4689	mobj = re.search(PACKED_CODES_RE, code)
	4690	obfuscated_code, base, count, symbols = mobj.groups()
	4691	base = int(base)
	4692	count = int(count)
	4693	symbols = symbols.split('\|')
	4694	symbol_table = {}
	4695
	4696	while count:
	4697	count -= 1
	4698	base_n_count = encode_base_n(count, base)
	4699	symbol_table[base_n_count] = symbols[count] or base_n_count
	4700
	4701	return re.sub(
	4702	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	4703	obfuscated_code)
	4704
	4705
	4706	def caesar(s, alphabet, shift):
	4707	if shift == 0:
	4708	return s
	4709	l = len(alphabet)
	4710	return ''.join(
	4711	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4712	for c in s)
	4713
	4714
	4715	def rot47(s):
	4716	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4717
	4718
	4719	def parse_m3u8_attributes(attrib):
	4720	info = {}
	4721	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4722	if val.startswith('"'):
	4723	val = val[1:-1]
	4724	info[key] = val
	4725	return info
	4726
	4727
	4728	def urshift(val, n):
	4729	return val >> n if val >= 0 else (val + 0x100000000) >> n
	4730
	4731
	4732	# Based on png2str() written by @gdkchan and improved by @yokrysty
	4733	# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
	4734	def decode_png(png_data):
	4735	# Reference: https://www.w3.org/TR/PNG/
	4736	header = png_data[8:]
	4737
	4738	if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
	4739	raise IOError('Not a valid PNG file.')
	4740
	4741	int_map = {1: '>B', 2: '>H', 4: '>I'}
	4742	unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
	4743
	4744	chunks = []
	4745
	4746	while header:
	4747	length = unpack_integer(header[:4])
	4748	header = header[4:]
	4749
	4750	chunk_type = header[:4]
	4751	header = header[4:]
	4752
	4753	chunk_data = header[:length]
	4754	header = header[length:]
	4755
	4756	header = header[4:] # Skip CRC
	4757
	4758	chunks.append({
	4759	'type': chunk_type,
	4760	'length': length,
	4761	'data': chunk_data
	4762	})
	4763
	4764	ihdr = chunks[0]['data']
	4765
	4766	width = unpack_integer(ihdr[:4])
	4767	height = unpack_integer(ihdr[4:8])
	4768
	4769	idat = b''
	4770
	4771	for chunk in chunks:
	4772	if chunk['type'] == b'IDAT':
	4773	idat += chunk['data']
	4774
	4775	if not idat:
	4776	raise IOError('Unable to read PNG data.')
	4777
	4778	decompressed_data = bytearray(zlib.decompress(idat))
	4779
	4780	stride = width * 3
	4781	pixels = []
	4782
	4783	def _get_pixel(idx):
	4784	x = idx % stride
	4785	y = idx // stride
	4786	return pixels[y][x]
	4787
	4788	for y in range(height):
	4789	basePos = y * (1 + stride)
	4790	filter_type = decompressed_data[basePos]
	4791
	4792	current_row = []
	4793
	4794	pixels.append(current_row)
	4795
	4796	for x in range(stride):
	4797	color = decompressed_data[1 + basePos + x]
	4798	basex = y * stride + x
	4799	left = 0
	4800	up = 0
	4801
	4802	if x > 2:
	4803	left = _get_pixel(basex - 3)
	4804	if y > 0:
	4805	up = _get_pixel(basex - stride)
	4806
	4807	if filter_type == 1: # Sub
	4808	color = (color + left) & 0xff
	4809	elif filter_type == 2: # Up
	4810	color = (color + up) & 0xff
	4811	elif filter_type == 3: # Average
	4812	color = (color + ((left + up) >> 1)) & 0xff
	4813	elif filter_type == 4: # Paeth
	4814	a = left
	4815	b = up
	4816	c = 0
	4817
	4818	if x > 2 and y > 0:
	4819	c = _get_pixel(basex - stride - 3)
	4820
	4821	p = a + b - c
	4822
	4823	pa = abs(p - a)
	4824	pb = abs(p - b)
	4825	pc = abs(p - c)
	4826
	4827	if pa <= pb and pa <= pc:
	4828	color = (color + a) & 0xff
	4829	elif pb <= pc:
	4830	color = (color + b) & 0xff
	4831	else:
	4832	color = (color + c) & 0xff
	4833
	4834	current_row.append(color)
	4835
	4836	return width, height, pixels
	4837
	4838
	4839	def write_xattr(path, key, value):
	4840	# This mess below finds the best xattr tool for the job
	4841	try:
	4842	# try the pyxattr module...
	4843	import xattr
	4844
	4845	if hasattr(xattr, 'set'): # pyxattr
	4846	# Unicode arguments are not supported in python-pyxattr until
	4847	# version 0.5.0
	4848	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4849	pyxattr_required_version = '0.5.0'
	4850	if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
	4851	# TODO: fallback to CLI tools
	4852	raise XAttrUnavailableError(
	4853	'python-pyxattr is detected but is too old. '
	4854	'yt-dlp requires %s or above while your version is %s. '
	4855	'Falling back to other xattr implementations' % (
	4856	pyxattr_required_version, xattr.__version__))
	4857
	4858	setxattr = xattr.set
	4859	else: # xattr
	4860	setxattr = xattr.setxattr
	4861
	4862	try:
	4863	setxattr(path, key, value)
	4864	except EnvironmentError as e:
	4865	raise XAttrMetadataError(e.errno, e.strerror)
	4866
	4867	except ImportError:
	4868	if compat_os_name == 'nt':
	4869	# Write xattrs to NTFS Alternate Data Streams:
	4870	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4871	assert ':' not in key
	4872	assert os.path.exists(path)
	4873
	4874	ads_fn = path + ':' + key
	4875	try:
	4876	with open(ads_fn, 'wb') as f:
	4877	f.write(value)
	4878	except EnvironmentError as e:
	4879	raise XAttrMetadataError(e.errno, e.strerror)
	4880	else:
	4881	user_has_setfattr = check_executable('setfattr', ['--version'])
	4882	user_has_xattr = check_executable('xattr', ['-h'])
	4883
	4884	if user_has_setfattr or user_has_xattr:
	4885
	4886	value = value.decode('utf-8')
	4887	if user_has_setfattr:
	4888	executable = 'setfattr'
	4889	opts = ['-n', key, '-v', value]
	4890	elif user_has_xattr:
	4891	executable = 'xattr'
	4892	opts = ['-w', key, value]
	4893
	4894	cmd = ([encodeFilename(executable, True)]
	4895	+ [encodeArgument(o) for o in opts]
	4896	+ [encodeFilename(path, True)])
	4897
	4898	try:
	4899	p = Popen(
	4900	cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	4901	except EnvironmentError as e:
	4902	raise XAttrMetadataError(e.errno, e.strerror)
	4903	stdout, stderr = p.communicate_or_kill()
	4904	stderr = stderr.decode('utf-8', 'replace')
	4905	if p.returncode != 0:
	4906	raise XAttrMetadataError(p.returncode, stderr)
	4907
	4908	else:
	4909	# On Unix, and can't find pyxattr, setfattr, or xattr.
	4910	if sys.platform.startswith('linux'):
	4911	raise XAttrUnavailableError(
	4912	"Couldn't find a tool to set the xattrs. "
	4913	"Install either the python 'pyxattr' or 'xattr' "
	4914	"modules, or the GNU 'attr' package "
	4915	"(which contains the 'setfattr' tool).")
	4916	else:
	4917	raise XAttrUnavailableError(
	4918	"Couldn't find a tool to set the xattrs. "
	4919	"Install either the python 'xattr' module, "
	4920	"or the 'xattr' binary.")
	4921
	4922
	4923	def random_birthday(year_field, month_field, day_field):
	4924	start_date = datetime.date(1950, 1, 1)
	4925	end_date = datetime.date(1995, 12, 31)
	4926	offset = random.randint(0, (end_date - start_date).days)
	4927	random_date = start_date + datetime.timedelta(offset)
	4928	return {
	4929	year_field: str(random_date.year),
	4930	month_field: str(random_date.month),
	4931	day_field: str(random_date.day),
	4932	}
	4933
	4934
	4935	# Templates for internet shortcut files, which are plain text files.
	4936	DOT_URL_LINK_TEMPLATE = '''
	4937	[InternetShortcut]
	4938	URL=%(url)s
	4939	'''.lstrip()
	4940
	4941	DOT_WEBLOC_LINK_TEMPLATE = '''
	4942	<?xml version="1.0" encoding="UTF-8"?>
	4943	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4944	<plist version="1.0">
	4945	<dict>
	4946	\t<key>URL</key>
	4947	\t<string>%(url)s</string>
	4948	</dict>
	4949	</plist>
	4950	'''.lstrip()
	4951
	4952	DOT_DESKTOP_LINK_TEMPLATE = '''
	4953	[Desktop Entry]
	4954	Encoding=UTF-8
	4955	Name=%(filename)s
	4956	Type=Link
	4957	URL=%(url)s
	4958	Icon=text-html
	4959	'''.lstrip()
	4960
	4961	LINK_TEMPLATES = {
	4962	'url': DOT_URL_LINK_TEMPLATE,
	4963	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	4964	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	4965	}
	4966
	4967
	4968	def iri_to_uri(iri):
	4969	"""
	4970	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	4971
	4972	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	4973	"""
	4974
	4975	iri_parts = compat_urllib_parse_urlparse(iri)
	4976
	4977	if '[' in iri_parts.netloc:
	4978	raise ValueError('IPv6 URIs are not, yet, supported.')
	4979	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	4980
	4981	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	4982
	4983	net_location = ''
	4984	if iri_parts.username:
	4985	net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
	4986	if iri_parts.password is not None:
	4987	net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
	4988	net_location += '@'
	4989
	4990	net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
	4991	# The 'idna' encoding produces ASCII text.
	4992	if iri_parts.port is not None and iri_parts.port != 80:
	4993	net_location += ':' + str(iri_parts.port)
	4994
	4995	return compat_urllib_parse_urlunparse(
	4996	(iri_parts.scheme,
	4997	net_location,
	4998
	4999	compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	5000
	5001	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	5002	compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	5003
	5004	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	5005	compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	5006
	5007	compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	5008
	5009	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	5010
	5011
	5012	def to_high_limit_path(path):
	5013	if sys.platform in ['win32', 'cygwin']:
	5014	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	5015	return r'\\?\ '.rstrip() + os.path.abspath(path)
	5016
	5017	return path
	5018
	5019
	5020	def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
	5021	val = traverse_obj(obj, *variadic(field))
	5022	if val in ignore:
	5023	return default
	5024	return template % (func(val) if func else val)
	5025
	5026
	5027	def clean_podcast_url(url):
	5028	return re.sub(r'''(?x)
	5029	(?:
	5030	(?:
	5031	chtbl\.com/track\|
	5032	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	5033	play\.podtrac\.com
	5034	)/[^/]+\|
	5035	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	5036	flex\.acast\.com\|
	5037	pd(?:
	5038	cn\.co\| # https://podcorn.com/analytics-prefix/
	5039	st\.fm # https://podsights.com/docs/
	5040	)/e
	5041	)/''', '', url)
	5042
	5043
	5044	_HEX_TABLE = '0123456789abcdef'
	5045
	5046
	5047	def random_uuidv4():
	5048	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	5049
	5050
	5051	def make_dir(path, to_screen=None):
	5052	try:
	5053	dn = os.path.dirname(path)
	5054	if dn and not os.path.exists(dn):
	5055	os.makedirs(dn)
	5056	return True
	5057	except (OSError, IOError) as err:
	5058	if callable(to_screen) is not None:
	5059	to_screen('unable to create directory ' + error_to_compat_str(err))
	5060	return False
	5061
	5062
	5063	def get_executable_path():
	5064	from zipimport import zipimporter
	5065	if hasattr(sys, 'frozen'): # Running from PyInstaller
	5066	path = os.path.dirname(sys.executable)
	5067	elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
	5068	path = os.path.join(os.path.dirname(__file__), '../..')
	5069	else:
	5070	path = os.path.join(os.path.dirname(__file__), '..')
	5071	return os.path.abspath(path)
	5072
	5073
	5074	def load_plugins(name, suffix, namespace):
	5075	classes = {}
	5076	try:
	5077	plugins_spec = importlib.util.spec_from_file_location(
	5078	name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
	5079	plugins = importlib.util.module_from_spec(plugins_spec)
	5080	sys.modules[plugins_spec.name] = plugins
	5081	plugins_spec.loader.exec_module(plugins)
	5082	for name in dir(plugins):
	5083	if name in namespace:
	5084	continue
	5085	if not name.endswith(suffix):
	5086	continue
	5087	klass = getattr(plugins, name)
	5088	classes[name] = namespace[name] = klass
	5089	except FileNotFoundError:
	5090	pass
	5091	return classes
	5092
	5093
	5094	def traverse_obj(
	5095	obj, *path_list, default=None, expected_type=None, get_all=True,
	5096	casesense=True, is_user_input=False, traverse_string=False):
	5097	''' Traverse nested list/dict/tuple
	5098	@param path_list A list of paths which are checked one by one.
	5099	Each path is a list of keys where each key is a string,
	5100	a function, a tuple of strings/None or "...".
	5101	When a fuction is given, it takes the key as argument and
	5102	returns whether the key matches or not. When a tuple is given,
	5103	all the keys given in the tuple are traversed, and
	5104	"..." traverses all the keys in the object
	5105	"None" returns the object without traversal
	5106	@param default Default value to return
	5107	@param expected_type Only accept final value of this type (Can also be any callable)
	5108	@param get_all Return all the values obtained from a path or only the first one
	5109	@param casesense Whether to consider dictionary keys as case sensitive
	5110	@param is_user_input Whether the keys are generated from user input. If True,
	5111	strings are converted to int/slice if necessary
	5112	@param traverse_string Whether to traverse inside strings. If True, any
	5113	non-compatible object will also be converted into a string
	5114	# TODO: Write tests
	5115	'''
	5116	if not casesense:
	5117	_lower = lambda k: (k.lower() if isinstance(k, str) else k)
	5118	path_list = (map(_lower, variadic(path)) for path in path_list)
	5119
	5120	def _traverse_obj(obj, path, _current_depth=0):
	5121	nonlocal depth
	5122	path = tuple(variadic(path))
	5123	for i, key in enumerate(path):
	5124	if None in (key, obj):
	5125	return obj
	5126	if isinstance(key, (list, tuple)):
	5127	obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
	5128	key = ...
	5129	if key is ...:
	5130	obj = (obj.values() if isinstance(obj, dict)
	5131	else obj if isinstance(obj, (list, tuple, LazyList))
	5132	else str(obj) if traverse_string else [])
	5133	_current_depth += 1
	5134	depth = max(depth, _current_depth)
	5135	return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
	5136	elif callable(key):
	5137	if isinstance(obj, (list, tuple, LazyList)):
	5138	obj = enumerate(obj)
	5139	elif isinstance(obj, dict):
	5140	obj = obj.items()
	5141	else:
	5142	if not traverse_string:
	5143	return None
	5144	obj = str(obj)
	5145	_current_depth += 1
	5146	depth = max(depth, _current_depth)
	5147	return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
	5148	elif isinstance(obj, dict) and not (is_user_input and key == ':'):
	5149	obj = (obj.get(key) if casesense or (key in obj)
	5150	else next((v for k, v in obj.items() if _lower(k) == key), None))
	5151	else:
	5152	if is_user_input:
	5153	key = (int_or_none(key) if ':' not in key
	5154	else slice(*map(int_or_none, key.split(':'))))
	5155	if key == slice(None):
	5156	return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
	5157	if not isinstance(key, (int, slice)):
	5158	return None
	5159	if not isinstance(obj, (list, tuple, LazyList)):
	5160	if not traverse_string:
	5161	return None
	5162	obj = str(obj)
	5163	try:
	5164	obj = obj[key]
	5165	except IndexError:
	5166	return None
	5167	return obj
	5168
	5169	if isinstance(expected_type, type):
	5170	type_test = lambda val: val if isinstance(val, expected_type) else None
	5171	elif expected_type is not None:
	5172	type_test = expected_type
	5173	else:
	5174	type_test = lambda val: val
	5175
	5176	for path in path_list:
	5177	depth = 0
	5178	val = _traverse_obj(obj, path)
	5179	if val is not None:
	5180	if depth:
	5181	for _ in range(depth - 1):
	5182	val = itertools.chain.from_iterable(v for v in val if v is not None)
	5183	val = [v for v in map(type_test, val) if v is not None]
	5184	if val:
	5185	return val if get_all else val[0]
	5186	else:
	5187	val = type_test(val)
	5188	if val is not None:
	5189	return val
	5190	return default
	5191
	5192
	5193	def traverse_dict(dictn, keys, casesense=True):
	5194	write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
	5195	'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
	5196	return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
	5197
	5198
	5199	def variadic(x, allowed_types=(str, bytes, dict)):
	5200	return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
	5201
	5202
	5203	def decode_base(value, digits):
	5204	# This will convert given base-x string to scalar (long or int)
	5205	table = {char: index for index, char in enumerate(digits)}
	5206	result = 0
	5207	base = len(digits)
	5208	for chr in value:
	5209	result *= base
	5210	result += table[chr]
	5211	return result
	5212
	5213
	5214	def time_seconds(**kwargs):
	5215	t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
	5216	return t.timestamp()
	5217
	5218
	5219	# create a JSON Web Signature (jws) with HS256 algorithm
	5220	# the resulting format is in JWS Compact Serialization
	5221	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	5222	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	5223	def jwt_encode_hs256(payload_data, key, headers={}):
	5224	header_data = {
	5225	'alg': 'HS256',
	5226	'typ': 'JWT',
	5227	}
	5228	if headers:
	5229	header_data.update(headers)
	5230	header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
	5231	payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
	5232	h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
	5233	signature_b64 = base64.b64encode(h.digest())
	5234	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	5235	return token
	5236
	5237
	5238	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	5239	def jwt_decode_hs256(jwt):
	5240	header_b64, payload_b64, signature_b64 = jwt.split('.')
	5241	payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
	5242	return payload_data
	5243
	5244
	5245	def supports_terminal_sequences(stream):
	5246	if compat_os_name == 'nt':
	5247	from .compat import WINDOWS_VT_MODE # Must be imported locally
	5248	if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
	5249	return False
	5250	elif not os.getenv('TERM'):
	5251	return False
	5252	try:
	5253	return stream.isatty()
	5254	except BaseException:
	5255	return False
	5256
	5257
	5258	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	5259
	5260
	5261	def remove_terminal_sequences(string):
	5262	return _terminal_sequences_re.sub('', string)
	5263
	5264
	5265	def number_of_digits(number):
	5266	return len('%d' % number)
	5267
	5268
	5269	def join_nonempty(*values, delim='-', from_dict=None):
	5270	if from_dict is not None:
	5271	values = map(from_dict.get, values)
	5272	return delim.join(map(str, filter(None, values)))
	5273
	5274
	5275	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	5276	"""
	5277	Find the largest format dimensions in terms of video width and, for each thumbnail:
	5278	* Modify the URL: Match the width with the provided regex and replace with the former width
	5279	* Update dimensions
	5280
	5281	This function is useful with video services that scale the provided thumbnails on demand
	5282	"""
	5283	_keys = ('width', 'height')
	5284	max_dimensions = max(
	5285	[tuple(format.get(k) or 0 for k in _keys) for format in formats],
	5286	default=(0, 0))
	5287	if not max_dimensions[0]:
	5288	return thumbnails
	5289	return [
	5290	merge_dicts(
	5291	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	5292	dict(zip(_keys, max_dimensions)), thumbnail)
	5293	for thumbnail in thumbnails
	5294	]
	5295
	5296
	5297	def parse_http_range(range):
	5298	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	5299	if not range:
	5300	return None, None, None
	5301	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	5302	if not crg:
	5303	return None, None, None
	5304	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	5305
	5306
	5307	class Config:
	5308	own_args = None
	5309	filename = None
	5310	__initialized = False
	5311
	5312	def __init__(self, parser, label=None):
	5313	self._parser, self.label = parser, label
	5314	self._loaded_paths, self.configs = set(), []
	5315
	5316	def init(self, args=None, filename=None):
	5317	assert not self.__initialized
	5318	directory = ''
	5319	if filename:
	5320	location = os.path.realpath(filename)
	5321	directory = os.path.dirname(location)
	5322	if location in self._loaded_paths:
	5323	return False
	5324	self._loaded_paths.add(location)
	5325
	5326	self.__initialized = True
	5327	self.own_args, self.filename = args, filename
	5328	for location in self._parser.parse_args(args)[0].config_locations or []:
	5329	location = os.path.join(directory, expand_path(location))
	5330	if os.path.isdir(location):
	5331	location = os.path.join(location, 'yt-dlp.conf')
	5332	if not os.path.exists(location):
	5333	self._parser.error(f'config location {location} does not exist')
	5334	self.append_config(self.read_file(location), location)
	5335	return True
	5336
	5337	def __str__(self):
	5338	label = join_nonempty(
	5339	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	5340	delim=' ')
	5341	return join_nonempty(
	5342	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	5343	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	5344	delim='\n')
	5345
	5346	@staticmethod
	5347	def read_file(filename, default=[]):
	5348	try:
	5349	optionf = open(filename)
	5350	except IOError:
	5351	return default # silently skip if file is not present
	5352	try:
	5353	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	5354	contents = optionf.read()
	5355	if sys.version_info < (3,):
	5356	contents = contents.decode(preferredencoding())
	5357	res = compat_shlex_split(contents, comments=True)
	5358	finally:
	5359	optionf.close()
	5360	return res
	5361
	5362	@staticmethod
	5363	def hide_login_info(opts):
	5364	PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
	5365	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	5366
	5367	def _scrub_eq(o):
	5368	m = eqre.match(o)
	5369	if m:
	5370	return m.group('key') + '=PRIVATE'
	5371	else:
	5372	return o
	5373
	5374	opts = list(map(_scrub_eq, opts))
	5375	for idx, opt in enumerate(opts):
	5376	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	5377	opts[idx + 1] = 'PRIVATE'
	5378	return opts
	5379
	5380	def append_config(self, *args, label=None):
	5381	config = type(self)(self._parser, label)
	5382	config._loaded_paths = self._loaded_paths
	5383	if config.init(*args):
	5384	self.configs.append(config)
	5385
	5386	@property
	5387	def all_args(self):
	5388	for config in reversed(self.configs):
	5389	yield from config.all_args
	5390	yield from self.own_args or []
	5391
	5392	def parse_args(self):
	5393	return self._parser.parse_args(list(self.all_args))
	5394
	5395
	5396	class WebSocketsWrapper():
	5397	"""Wraps websockets module to use in non-async scopes"""
	5398
	5399	def __init__(self, url, headers=None):
	5400	self.loop = asyncio.events.new_event_loop()
	5401	self.conn = compat_websockets.connect(
	5402	url, extra_headers=headers, ping_interval=None,
	5403	close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
	5404	atexit.register(self.__exit__, None, None, None)
	5405
	5406	def __enter__(self):
	5407	self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
	5408	return self
	5409
	5410	def send(self, *args):
	5411	self.run_with_loop(self.pool.send(*args), self.loop)
	5412
	5413	def recv(self, *args):
	5414	return self.run_with_loop(self.pool.recv(*args), self.loop)
	5415
	5416	def __exit__(self, type, value, traceback):
	5417	try:
	5418	return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
	5419	finally:
	5420	self.loop.close()
	5421	self._cancel_all_tasks(self.loop)
	5422
	5423	# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
	5424	# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
	5425	@staticmethod
	5426	def run_with_loop(main, loop):
	5427	if not asyncio.coroutines.iscoroutine(main):
	5428	raise ValueError(f'a coroutine was expected, got {main!r}')
	5429
	5430	try:
	5431	return loop.run_until_complete(main)
	5432	finally:
	5433	loop.run_until_complete(loop.shutdown_asyncgens())
	5434	if hasattr(loop, 'shutdown_default_executor'):
	5435	loop.run_until_complete(loop.shutdown_default_executor())
	5436
	5437	@staticmethod
	5438	def _cancel_all_tasks(loop):
	5439	to_cancel = asyncio.tasks.all_tasks(loop)
	5440
	5441	if not to_cancel:
	5442	return
	5443
	5444	for task in to_cancel:
	5445	task.cancel()
	5446
	5447	loop.run_until_complete(
	5448	asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
	5449
	5450	for task in to_cancel:
	5451	if task.cancelled():
	5452	continue
	5453	if task.exception() is not None:
	5454	loop.call_exception_handler({
	5455	'message': 'unhandled exception during asyncio.run() shutdown',
	5456	'exception': task.exception(),
	5457	'task': task,
	5458	})
	5459
	5460
	5461	has_websockets = bool(compat_websockets)
	5462
	5463
	5464	def merge_headers(*dicts):
	5465	"""Merge dicts of network headers case insensitively, prioritizing the latter ones"""
	5466	return {k.capitalize(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}