jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	#!/usr/bin/env python3
	2	# coding: utf-8
	3
	4	from __future__ import unicode_literals
	5
	6	import asyncio
	7	import atexit
	8	import base64
	9	import binascii
	10	import calendar
	11	import codecs
	12	import collections
	13	import contextlib
	14	import ctypes
	15	import datetime
	16	import email.utils
	17	import email.header
	18	import errno
	19	import functools
	20	import gzip
	21	import hashlib
	22	import hmac
	23	import importlib.util
	24	import io
	25	import itertools
	26	import json
	27	import locale
	28	import math
	29	import operator
	30	import os
	31	import platform
	32	import random
	33	import re
	34	import socket
	35	import ssl
	36	import subprocess
	37	import sys
	38	import tempfile
	39	import time
	40	import traceback
	41	import xml.etree.ElementTree
	42	import zlib
	43	import mimetypes
	44
	45	from .compat import (
	46	compat_HTMLParseError,
	47	compat_HTMLParser,
	48	compat_HTTPError,
	49	compat_basestring,
	50	compat_brotli,
	51	compat_chr,
	52	compat_cookiejar,
	53	compat_ctypes_WINFUNCTYPE,
	54	compat_etree_fromstring,
	55	compat_expanduser,
	56	compat_html_entities,
	57	compat_html_entities_html5,
	58	compat_http_client,
	59	compat_integer_types,
	60	compat_numeric_types,
	61	compat_kwargs,
	62	compat_os_name,
	63	compat_parse_qs,
	64	compat_shlex_split,
	65	compat_shlex_quote,
	66	compat_str,
	67	compat_struct_pack,
	68	compat_struct_unpack,
	69	compat_urllib_error,
	70	compat_urllib_parse,
	71	compat_urllib_parse_urlencode,
	72	compat_urllib_parse_urlparse,
	73	compat_urllib_parse_urlunparse,
	74	compat_urllib_parse_quote,
	75	compat_urllib_parse_quote_plus,
	76	compat_urllib_parse_unquote_plus,
	77	compat_urllib_request,
	78	compat_urlparse,
	79	compat_websockets,
	80	compat_xpath,
	81	)
	82
	83	from .socks import (
	84	ProxyType,
	85	sockssocket,
	86	)
	87
	88	try:
	89	import certifi
	90	has_certifi = True
	91	except ImportError:
	92	has_certifi = False
	93
	94
	95	def register_socks_protocols():
	96	# "Register" SOCKS protocols
	97	# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
	98	# URLs with protocols not in urlparse.uses_netloc are not handled correctly
	99	for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
	100	if scheme not in compat_urlparse.uses_netloc:
	101	compat_urlparse.uses_netloc.append(scheme)
	102
	103
	104	# This is not clearly defined otherwise
	105	compiled_regex_type = type(re.compile(''))
	106
	107
	108	def random_user_agent():
	109	_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
	110	_CHROME_VERSIONS = (
	111	'90.0.4430.212',
	112	'90.0.4430.24',
	113	'90.0.4430.70',
	114	'90.0.4430.72',
	115	'90.0.4430.85',
	116	'90.0.4430.93',
	117	'91.0.4472.101',
	118	'91.0.4472.106',
	119	'91.0.4472.114',
	120	'91.0.4472.124',
	121	'91.0.4472.164',
	122	'91.0.4472.19',
	123	'91.0.4472.77',
	124	'92.0.4515.107',
	125	'92.0.4515.115',
	126	'92.0.4515.131',
	127	'92.0.4515.159',
	128	'92.0.4515.43',
	129	'93.0.4556.0',
	130	'93.0.4577.15',
	131	'93.0.4577.63',
	132	'93.0.4577.82',
	133	'94.0.4606.41',
	134	'94.0.4606.54',
	135	'94.0.4606.61',
	136	'94.0.4606.71',
	137	'94.0.4606.81',
	138	'94.0.4606.85',
	139	'95.0.4638.17',
	140	'95.0.4638.50',
	141	'95.0.4638.54',
	142	'95.0.4638.69',
	143	'95.0.4638.74',
	144	'96.0.4664.18',
	145	'96.0.4664.45',
	146	'96.0.4664.55',
	147	'96.0.4664.93',
	148	'97.0.4692.20',
	149	)
	150	return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
	151
	152
	153	SUPPORTED_ENCODINGS = [
	154	'gzip', 'deflate'
	155	]
	156	if compat_brotli:
	157	SUPPORTED_ENCODINGS.append('br')
	158
	159	std_headers = {
	160	'User-Agent': random_user_agent(),
	161	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	162	'Accept-Language': 'en-us,en;q=0.5',
	163	'Sec-Fetch-Mode': 'navigate',
	164	}
	165
	166
	167	USER_AGENTS = {
	168	'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
	169	}
	170
	171
	172	NO_DEFAULT = object()
	173
	174	ENGLISH_MONTH_NAMES = [
	175	'January', 'February', 'March', 'April', 'May', 'June',
	176	'July', 'August', 'September', 'October', 'November', 'December']
	177
	178	MONTH_NAMES = {
	179	'en': ENGLISH_MONTH_NAMES,
	180	'fr': [
	181	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	182	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	183	}
	184
	185	KNOWN_EXTENSIONS = (
	186	'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'aac',
	187	'flv', 'f4v', 'f4a', 'f4b',
	188	'webm', 'ogg', 'ogv', 'oga', 'ogx', 'spx', 'opus',
	189	'mkv', 'mka', 'mk3d',
	190	'avi', 'divx',
	191	'mov',
	192	'asf', 'wmv', 'wma',
	193	'3gp', '3g2',
	194	'mp3',
	195	'flac',
	196	'ape',
	197	'wav',
	198	'f4f', 'f4m', 'm3u8', 'smil')
	199
	200	# needed for sanitizing filenames in restricted mode
	201	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	202	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	203	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	204
	205	DATE_FORMATS = (
	206	'%d %B %Y',
	207	'%d %b %Y',
	208	'%B %d %Y',
	209	'%B %dst %Y',
	210	'%B %dnd %Y',
	211	'%B %drd %Y',
	212	'%B %dth %Y',
	213	'%b %d %Y',
	214	'%b %dst %Y',
	215	'%b %dnd %Y',
	216	'%b %drd %Y',
	217	'%b %dth %Y',
	218	'%b %dst %Y %I:%M',
	219	'%b %dnd %Y %I:%M',
	220	'%b %drd %Y %I:%M',
	221	'%b %dth %Y %I:%M',
	222	'%Y %m %d',
	223	'%Y-%m-%d',
	224	'%Y.%m.%d.',
	225	'%Y/%m/%d',
	226	'%Y/%m/%d %H:%M',
	227	'%Y/%m/%d %H:%M:%S',
	228	'%Y%m%d%H%M',
	229	'%Y%m%d%H%M%S',
	230	'%Y%m%d',
	231	'%Y-%m-%d %H:%M',
	232	'%Y-%m-%d %H:%M:%S',
	233	'%Y-%m-%d %H:%M:%S.%f',
	234	'%Y-%m-%d %H:%M:%S:%f',
	235	'%d.%m.%Y %H:%M',
	236	'%d.%m.%Y %H.%M',
	237	'%Y-%m-%dT%H:%M:%SZ',
	238	'%Y-%m-%dT%H:%M:%S.%fZ',
	239	'%Y-%m-%dT%H:%M:%S.%f0Z',
	240	'%Y-%m-%dT%H:%M:%S',
	241	'%Y-%m-%dT%H:%M:%S.%f',
	242	'%Y-%m-%dT%H:%M',
	243	'%b %d %Y at %H:%M',
	244	'%b %d %Y at %H:%M:%S',
	245	'%B %d %Y at %H:%M',
	246	'%B %d %Y at %H:%M:%S',
	247	'%H:%M %d-%b-%Y',
	248	)
	249
	250	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	251	DATE_FORMATS_DAY_FIRST.extend([
	252	'%d-%m-%Y',
	253	'%d.%m.%Y',
	254	'%d.%m.%y',
	255	'%d/%m/%Y',
	256	'%d/%m/%y',
	257	'%d/%m/%Y %H:%M:%S',
	258	])
	259
	260	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	261	DATE_FORMATS_MONTH_FIRST.extend([
	262	'%m-%d-%Y',
	263	'%m.%d.%Y',
	264	'%m/%d/%Y',
	265	'%m/%d/%y',
	266	'%m/%d/%Y %H:%M:%S',
	267	])
	268
	269	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	270	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]*>(?P<json_ld>.+?)</script>'
	271
	272
	273	def preferredencoding():
	274	"""Get preferred encoding.
	275
	276	Returns the best encoding scheme for the system, based on
	277	locale.getpreferredencoding() and some further tweaks.
	278	"""
	279	try:
	280	pref = locale.getpreferredencoding()
	281	'TEST'.encode(pref)
	282	except Exception:
	283	pref = 'UTF-8'
	284
	285	return pref
	286
	287
	288	def write_json_file(obj, fn):
	289	""" Encode obj as JSON and write it to fn, atomically if possible """
	290
	291	fn = encodeFilename(fn)
	292	if sys.version_info < (3, 0) and sys.platform != 'win32':
	293	encoding = get_filesystem_encoding()
	294	# os.path.basename returns a bytes object, but NamedTemporaryFile
	295	# will fail if the filename contains non ascii characters unless we
	296	# use a unicode object
	297	path_basename = lambda f: os.path.basename(fn).decode(encoding)
	298	# the same for os.path.dirname
	299	path_dirname = lambda f: os.path.dirname(fn).decode(encoding)
	300	else:
	301	path_basename = os.path.basename
	302	path_dirname = os.path.dirname
	303
	304	args = {
	305	'suffix': '.tmp',
	306	'prefix': path_basename(fn) + '.',
	307	'dir': path_dirname(fn),
	308	'delete': False,
	309	}
	310
	311	# In Python 2.x, json.dump expects a bytestream.
	312	# In Python 3.x, it writes to a character stream
	313	if sys.version_info < (3, 0):
	314	args['mode'] = 'wb'
	315	else:
	316	args.update({
	317	'mode': 'w',
	318	'encoding': 'utf-8',
	319	})
	320
	321	tf = tempfile.NamedTemporaryFile(**compat_kwargs(args))
	322
	323	try:
	324	with tf:
	325	json.dump(obj, tf, ensure_ascii=False)
	326	if sys.platform == 'win32':
	327	# Need to remove existing file on Windows, else os.rename raises
	328	# WindowsError or FileExistsError.
	329	try:
	330	os.unlink(fn)
	331	except OSError:
	332	pass
	333	try:
	334	mask = os.umask(0)
	335	os.umask(mask)
	336	os.chmod(tf.name, 0o666 & ~mask)
	337	except OSError:
	338	pass
	339	os.rename(tf.name, fn)
	340	except Exception:
	341	try:
	342	os.remove(tf.name)
	343	except OSError:
	344	pass
	345	raise
	346
	347
	348	if sys.version_info >= (2, 7):
	349	def find_xpath_attr(node, xpath, key, val=None):
	350	""" Find the xpath xpath[@key=val] """
	351	assert re.match(r'^[a-zA-Z_-]+$', key)
	352	expr = xpath + ('[@%s]' % key if val is None else "[@%s='%s']" % (key, val))
	353	return node.find(expr)
	354	else:
	355	def find_xpath_attr(node, xpath, key, val=None):
	356	for f in node.findall(compat_xpath(xpath)):
	357	if key not in f.attrib:
	358	continue
	359	if val is None or f.attrib.get(key) == val:
	360	return f
	361	return None
	362
	363	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	364	# the namespace parameter
	365
	366
	367	def xpath_with_ns(path, ns_map):
	368	components = [c.split(':') for c in path.split('/')]
	369	replaced = []
	370	for c in components:
	371	if len(c) == 1:
	372	replaced.append(c[0])
	373	else:
	374	ns, tag = c
	375	replaced.append('{%s}%s' % (ns_map[ns], tag))
	376	return '/'.join(replaced)
	377
	378
	379	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	380	def _find_xpath(xpath):
	381	return node.find(compat_xpath(xpath))
	382
	383	if isinstance(xpath, (str, compat_str)):
	384	n = _find_xpath(xpath)
	385	else:
	386	for xp in xpath:
	387	n = _find_xpath(xp)
	388	if n is not None:
	389	break
	390
	391	if n is None:
	392	if default is not NO_DEFAULT:
	393	return default
	394	elif fatal:
	395	name = xpath if name is None else name
	396	raise ExtractorError('Could not find XML element %s' % name)
	397	else:
	398	return None
	399	return n
	400
	401
	402	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	403	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	404	if n is None or n == default:
	405	return n
	406	if n.text is None:
	407	if default is not NO_DEFAULT:
	408	return default
	409	elif fatal:
	410	name = xpath if name is None else name
	411	raise ExtractorError('Could not find XML element\'s text %s' % name)
	412	else:
	413	return None
	414	return n.text
	415
	416
	417	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	418	n = find_xpath_attr(node, xpath, key)
	419	if n is None:
	420	if default is not NO_DEFAULT:
	421	return default
	422	elif fatal:
	423	name = '%s[@%s]' % (xpath, key) if name is None else name
	424	raise ExtractorError('Could not find XML attribute %s' % name)
	425	else:
	426	return None
	427	return n.attrib[key]
	428
	429
	430	def get_element_by_id(id, html):
	431	"""Return the content of the tag with the specified ID in the passed HTML document"""
	432	return get_element_by_attribute('id', id, html)
	433
	434
	435	def get_element_html_by_id(id, html):
	436	"""Return the html of the tag with the specified ID in the passed HTML document"""
	437	return get_element_html_by_attribute('id', id, html)
	438
	439
	440	def get_element_by_class(class_name, html):
	441	"""Return the content of the first tag with the specified class in the passed HTML document"""
	442	retval = get_elements_by_class(class_name, html)
	443	return retval[0] if retval else None
	444
	445
	446	def get_element_html_by_class(class_name, html):
	447	"""Return the html of the first tag with the specified class in the passed HTML document"""
	448	retval = get_elements_html_by_class(class_name, html)
	449	return retval[0] if retval else None
	450
	451
	452	def get_element_by_attribute(attribute, value, html, escape_value=True):
	453	retval = get_elements_by_attribute(attribute, value, html, escape_value)
	454	return retval[0] if retval else None
	455
	456
	457	def get_element_html_by_attribute(attribute, value, html, escape_value=True):
	458	retval = get_elements_html_by_attribute(attribute, value, html, escape_value)
	459	return retval[0] if retval else None
	460
	461
	462	def get_elements_by_class(class_name, html):
	463	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	464	return get_elements_by_attribute(
	465	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	466	html, escape_value=False)
	467
	468
	469	def get_elements_html_by_class(class_name, html):
	470	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	471	return get_elements_html_by_attribute(
	472	'class', r'[^\'"]\b%s\b[^\'"]' % re.escape(class_name),
	473	html, escape_value=False)
	474
	475
	476	def get_elements_by_attribute(args, *kwargs):
	477	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	478	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	479
	480
	481	def get_elements_html_by_attribute(args, *kwargs):
	482	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	483	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	484
	485
	486	def get_elements_text_and_html_by_attribute(attribute, value, html, escape_value=True):
	487	"""
	488	Return the text (content) and the html (whole) of the tag with the specified
	489	attribute in the passed HTML document
	490	"""
	491
	492	value_quote_optional = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	493
	494	value = re.escape(value) if escape_value else value
	495
	496	partial_element_re = r'''(?x)
	497	<(?P<tag>[a-zA-Z0-9:._-]+)
	498	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	499	\s%(attribute)s\s=\s(?P<_q>['"]%(vqo)s)(?-x:%(value)s)(?P=_q)
	500	''' % {'attribute': re.escape(attribute), 'value': value, 'vqo': value_quote_optional}
	501
	502	for m in re.finditer(partial_element_re, html):
	503	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	504
	505	yield (
	506	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	507	whole
	508	)
	509
	510
	511	class HTMLBreakOnClosingTagParser(compat_HTMLParser):
	512	"""
	513	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	514	closing tag for the first opening tag it has encountered, and can be used
	515	as a context manager
	516	"""
	517
	518	class HTMLBreakOnClosingTagException(Exception):
	519	pass
	520
	521	def __init__(self):
	522	self.tagstack = collections.deque()
	523	compat_HTMLParser.__init__(self)
	524
	525	def __enter__(self):
	526	return self
	527
	528	def __exit__(self, *_):
	529	self.close()
	530
	531	def close(self):
	532	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	533	# so data remains buffered; we no longer have any interest in it, thus
	534	# override this method to discard it
	535	pass
	536
	537	def handle_starttag(self, tag, _):
	538	self.tagstack.append(tag)
	539
	540	def handle_endtag(self, tag):
	541	if not self.tagstack:
	542	raise compat_HTMLParseError('no tags in the stack')
	543	while self.tagstack:
	544	inner_tag = self.tagstack.pop()
	545	if inner_tag == tag:
	546	break
	547	else:
	548	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	549	if not self.tagstack:
	550	raise self.HTMLBreakOnClosingTagException()
	551
	552
	553	def get_element_text_and_html_by_tag(tag, html):
	554	"""
	555	For the first element with the specified tag in the passed HTML document
	556	return its' content (text) and the whole element (html)
	557	"""
	558	def find_or_raise(haystack, needle, exc):
	559	try:
	560	return haystack.index(needle)
	561	except ValueError:
	562	raise exc
	563	closing_tag = f'</{tag}>'
	564	whole_start = find_or_raise(
	565	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	566	content_start = find_or_raise(
	567	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	568	content_start += whole_start + 1
	569	with HTMLBreakOnClosingTagParser() as parser:
	570	parser.feed(html[whole_start:content_start])
	571	if not parser.tagstack or parser.tagstack[0] != tag:
	572	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	573	offset = content_start
	574	while offset < len(html):
	575	next_closing_tag_start = find_or_raise(
	576	html[offset:], closing_tag,
	577	compat_HTMLParseError(f'closing {tag} tag not found'))
	578	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	579	try:
	580	parser.feed(html[offset:offset + next_closing_tag_end])
	581	offset += next_closing_tag_end
	582	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	583	return html[content_start:offset + next_closing_tag_start], \
	584	html[whole_start:offset + next_closing_tag_end]
	585	raise compat_HTMLParseError('unexpected end of html')
	586
	587
	588	class HTMLAttributeParser(compat_HTMLParser):
	589	"""Trivial HTML parser to gather the attributes for a single element"""
	590
	591	def __init__(self):
	592	self.attrs = {}
	593	compat_HTMLParser.__init__(self)
	594
	595	def handle_starttag(self, tag, attrs):
	596	self.attrs = dict(attrs)
	597
	598
	599	class HTMLListAttrsParser(compat_HTMLParser):
	600	"""HTML parser to gather the attributes for the elements of a list"""
	601
	602	def __init__(self):
	603	compat_HTMLParser.__init__(self)
	604	self.items = []
	605	self._level = 0
	606
	607	def handle_starttag(self, tag, attrs):
	608	if tag == 'li' and self._level == 0:
	609	self.items.append(dict(attrs))
	610	self._level += 1
	611
	612	def handle_endtag(self, tag):
	613	self._level -= 1
	614
	615
	616	def extract_attributes(html_element):
	617	"""Given a string for an HTML element such as
	618	<el
	619	a="foo" B="bar" c="&98;az" d=boz
	620	empty= noval entity="&"
	621	sq='"' dq="'"
	622	>
	623	Decode and return a dictionary of attributes.
	624	{
	625	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	626	'empty': '', 'noval': None, 'entity': '&',
	627	'sq': '"', 'dq': '\''
	628	}.
	629	NB HTMLParser is stricter in Python 2.6 & 3.2 than in later versions,
	630	but the cases in the unit test will work for all of 2.6, 2.7, 3.2-3.5.
	631	"""
	632	parser = HTMLAttributeParser()
	633	try:
	634	parser.feed(html_element)
	635	parser.close()
	636	# Older Python may throw HTMLParseError in case of malformed HTML
	637	except compat_HTMLParseError:
	638	pass
	639	return parser.attrs
	640
	641
	642	def parse_list(webpage):
	643	"""Given a string for an series of HTML <li> elements,
	644	return a dictionary of their attributes"""
	645	parser = HTMLListAttrsParser()
	646	parser.feed(webpage)
	647	parser.close()
	648	return parser.items
	649
	650
	651	def clean_html(html):
	652	"""Clean an HTML snippet into a readable string"""
	653
	654	if html is None: # Convenience for sanitizing descriptions etc.
	655	return html
	656
	657	html = re.sub(r'\s+', ' ', html)
	658	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	659	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	660	# Strip html tags
	661	html = re.sub('<.*?>', '', html)
	662	# Replace html entities
	663	html = unescapeHTML(html)
	664	return html.strip()
	665
	666
	667	def sanitize_open(filename, open_mode):
	668	"""Try to open the given filename, and slightly tweak it if this fails.
	669
	670	Attempts to open the given filename. If this fails, it tries to change
	671	the filename slightly, step by step, until it's either able to open it
	672	or it fails and raises a final exception, like the standard open()
	673	function.
	674
	675	It returns the tuple (stream, definitive_file_name).
	676	"""
	677	try:
	678	if filename == '-':
	679	if sys.platform == 'win32':
	680	import msvcrt
	681	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	682	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	683	stream = locked_file(filename, open_mode, block=False).open()
	684	return (stream, filename)
	685	except (IOError, OSError) as err:
	686	if err.errno in (errno.EACCES,):
	687	raise
	688
	689	# In case of error, try to remove win32 forbidden chars
	690	alt_filename = sanitize_path(filename)
	691	if alt_filename == filename:
	692	raise
	693	else:
	694	# An exception here should be caught in the caller
	695	stream = locked_file(filename, open_mode, block=False).open()
	696	return (stream, alt_filename)
	697
	698
	699	def timeconvert(timestr):
	700	"""Convert RFC 2822 defined time string into system timestamp"""
	701	timestamp = None
	702	timetuple = email.utils.parsedate_tz(timestr)
	703	if timetuple is not None:
	704	timestamp = email.utils.mktime_tz(timetuple)
	705	return timestamp
	706
	707
	708	def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
	709	"""Sanitizes a string so it could be used as part of a filename.
	710	@param restricted Use a stricter subset of allowed characters
	711	@param is_id Whether this is an ID that should be kept unchanged if possible.
	712	If unset, yt-dlp's new sanitization rules are in effect
	713	"""
	714	if s == '':
	715	return ''
	716
	717	def replace_insane(char):
	718	if restricted and char in ACCENT_CHARS:
	719	return ACCENT_CHARS[char]
	720	elif not restricted and char == '\n':
	721	return '\0 '
	722	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	723	return ''
	724	elif char == '"':
	725	return '' if restricted else '\''
	726	elif char == ':':
	727	return '\0_\0-' if restricted else '\0 \0-'
	728	elif char in '\\/\|*<>':
	729	return '\0_'
	730	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
	731	return '\0_'
	732	return char
	733
	734	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
	735	result = ''.join(map(replace_insane, s))
	736	if is_id is NO_DEFAULT:
	737	result = re.sub('(\0.)(?:(?=\\1)..)+', r'\1', result) # Remove repeated substitute chars
	738	STRIP_RE = '(?:\0.\|[ _-])*'
	739	result = re.sub(f'^\0.{STRIP_RE}\|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
	740	result = result.replace('\0', '') or '_'
	741
	742	if not is_id:
	743	while '__' in result:
	744	result = result.replace('__', '_')
	745	result = result.strip('_')
	746	# Common case of "Foreign band name - English song title"
	747	if restricted and result.startswith('-_'):
	748	result = result[2:]
	749	if result.startswith('-'):
	750	result = '_' + result[len('-'):]
	751	result = result.lstrip('.')
	752	if not result:
	753	result = '_'
	754	return result
	755
	756
	757	def sanitize_path(s, force=False):
	758	"""Sanitizes and normalizes path on Windows"""
	759	if sys.platform == 'win32':
	760	force = False
	761	drive_or_unc, _ = os.path.splitdrive(s)
	762	if sys.version_info < (2, 7) and not drive_or_unc:
	763	drive_or_unc, _ = os.path.splitunc(s)
	764	elif force:
	765	drive_or_unc = ''
	766	else:
	767	return s
	768
	769	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	770	if drive_or_unc:
	771	norm_path.pop(0)
	772	sanitized_path = [
	773	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	774	for path_part in norm_path]
	775	if drive_or_unc:
	776	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	777	elif force and s[0] == os.path.sep:
	778	sanitized_path.insert(0, os.path.sep)
	779	return os.path.join(*sanitized_path)
	780
	781
	782	def sanitize_url(url):
	783	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	784	# the number of unwanted failures due to missing protocol
	785	if url.startswith('//'):
	786	return 'http:%s' % url
	787	# Fix some common typos seen so far
	788	COMMON_TYPOS = (
	789	# https://github.com/ytdl-org/youtube-dl/issues/15649
	790	(r'^httpss://', r'https://'),
	791	# https://bx1.be/lives/direct-tv/
	792	(r'^rmtp([es]?)://', r'rtmp\1://'),
	793	)
	794	for mistake, fixup in COMMON_TYPOS:
	795	if re.match(mistake, url):
	796	return re.sub(mistake, fixup, url)
	797	return url
	798
	799
	800	def extract_basic_auth(url):
	801	parts = compat_urlparse.urlsplit(url)
	802	if parts.username is None:
	803	return url, None
	804	url = compat_urlparse.urlunsplit(parts._replace(netloc=(
	805	parts.hostname if parts.port is None
	806	else '%s:%d' % (parts.hostname, parts.port))))
	807	auth_payload = base64.b64encode(
	808	('%s:%s' % (parts.username, parts.password or '')).encode('utf-8'))
	809	return url, 'Basic ' + auth_payload.decode('utf-8')
	810
	811
	812	def sanitized_Request(url, args, *kwargs):
	813	url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
	814	if auth_header is not None:
	815	headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
	816	headers['Authorization'] = auth_header
	817	return compat_urllib_request.Request(url, args, *kwargs)
	818
	819
	820	def expand_path(s):
	821	"""Expand shell variables and ~"""
	822	return os.path.expandvars(compat_expanduser(s))
	823
	824
	825	def orderedSet(iterable):
	826	""" Remove all duplicates from the input iterable """
	827	res = []
	828	for el in iterable:
	829	if el not in res:
	830	res.append(el)
	831	return res
	832
	833
	834	def _htmlentity_transform(entity_with_semicolon):
	835	"""Transforms an HTML entity to a character."""
	836	entity = entity_with_semicolon[:-1]
	837
	838	# Known non-numeric HTML entity
	839	if entity in compat_html_entities.name2codepoint:
	840	return compat_chr(compat_html_entities.name2codepoint[entity])
	841
	842	# TODO: HTML5 allows entities without a semicolon. For example,
	843	# '&Eacuteric' should be decoded as 'Éric'.
	844	if entity_with_semicolon in compat_html_entities_html5:
	845	return compat_html_entities_html5[entity_with_semicolon]
	846
	847	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	848	if mobj is not None:
	849	numstr = mobj.group(1)
	850	if numstr.startswith('x'):
	851	base = 16
	852	numstr = '0%s' % numstr
	853	else:
	854	base = 10
	855	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	856	try:
	857	return compat_chr(int(numstr, base))
	858	except ValueError:
	859	pass
	860
	861	# Unknown entity in name, return its literal representation
	862	return '&%s;' % entity
	863
	864
	865	def unescapeHTML(s):
	866	if s is None:
	867	return None
	868	assert type(s) == compat_str
	869
	870	return re.sub(
	871	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	872
	873
	874	def escapeHTML(text):
	875	return (
	876	text
	877	.replace('&', '&')
	878	.replace('<', '<')
	879	.replace('>', '>')
	880	.replace('"', '"')
	881	.replace("'", ''')
	882	)
	883
	884
	885	def process_communicate_or_kill(p, args, *kwargs):
	886	try:
	887	return p.communicate(args, *kwargs)
	888	except BaseException: # Including KeyboardInterrupt
	889	p.kill()
	890	p.wait()
	891	raise
	892
	893
	894	class Popen(subprocess.Popen):
	895	if sys.platform == 'win32':
	896	_startupinfo = subprocess.STARTUPINFO()
	897	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	898	else:
	899	_startupinfo = None
	900
	901	def __init__(self, args, *kwargs):
	902	super(Popen, self).__init__(args, *kwargs, startupinfo=self._startupinfo)
	903
	904	def communicate_or_kill(self, args, *kwargs):
	905	return process_communicate_or_kill(self, args, *kwargs)
	906
	907
	908	def get_subprocess_encoding():
	909	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	910	# For subprocess calls, encode with locale encoding
	911	# Refer to http://stackoverflow.com/a/9951851/35070
	912	encoding = preferredencoding()
	913	else:
	914	encoding = sys.getfilesystemencoding()
	915	if encoding is None:
	916	encoding = 'utf-8'
	917	return encoding
	918
	919
	920	def encodeFilename(s, for_subprocess=False):
	921	"""
	922	@param s The name of the file
	923	"""
	924
	925	assert type(s) == compat_str
	926
	927	# Python 3 has a Unicode API
	928	if sys.version_info >= (3, 0):
	929	return s
	930
	931	# Pass '' directly to use Unicode APIs on Windows 2000 and up
	932	# (Detecting Windows NT 4 is tricky because 'major >= 4' would
	933	# match Windows 9x series as well. Besides, NT 4 is obsolete.)
	934	if not for_subprocess and sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	935	return s
	936
	937	# Jython assumes filenames are Unicode strings though reported as Python 2.x compatible
	938	if sys.platform.startswith('java'):
	939	return s
	940
	941	return s.encode(get_subprocess_encoding(), 'ignore')
	942
	943
	944	def decodeFilename(b, for_subprocess=False):
	945
	946	if sys.version_info >= (3, 0):
	947	return b
	948
	949	if not isinstance(b, bytes):
	950	return b
	951
	952	return b.decode(get_subprocess_encoding(), 'ignore')
	953
	954
	955	def encodeArgument(s):
	956	if not isinstance(s, compat_str):
	957	# Legacy code that uses byte strings
	958	# Uncomment the following line after fixing all post processors
	959	# assert False, 'Internal error: %r should be of type %r, is %r' % (s, compat_str, type(s))
	960	s = s.decode('ascii')
	961	return encodeFilename(s, True)
	962
	963
	964	def decodeArgument(b):
	965	return decodeFilename(b, True)
	966
	967
	968	def decodeOption(optval):
	969	if optval is None:
	970	return optval
	971	if isinstance(optval, bytes):
	972	optval = optval.decode(preferredencoding())
	973
	974	assert isinstance(optval, compat_str)
	975	return optval
	976
	977
	978	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	979
	980
	981	def timetuple_from_msec(msec):
	982	secs, msec = divmod(msec, 1000)
	983	mins, secs = divmod(secs, 60)
	984	hrs, mins = divmod(mins, 60)
	985	return _timetuple(hrs, mins, secs, msec)
	986
	987
	988	def formatSeconds(secs, delim=':', msec=False):
	989	time = timetuple_from_msec(secs * 1000)
	990	if time.hours:
	991	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	992	elif time.minutes:
	993	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	994	else:
	995	ret = '%d' % time.seconds
	996	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	997
	998
	999	def _ssl_load_windows_store_certs(ssl_context, storename):
	1000	# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
	1001	try:
	1002	certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
	1003	if encoding == 'x509_asn' and (
	1004	trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
	1005	except PermissionError:
	1006	return
	1007	for cert in certs:
	1008	try:
	1009	ssl_context.load_verify_locations(cadata=cert)
	1010	except ssl.SSLError:
	1011	pass
	1012
	1013
	1014	def make_HTTPS_handler(params, **kwargs):
	1015	opts_check_certificate = not params.get('nocheckcertificate')
	1016	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	1017	context.check_hostname = opts_check_certificate
	1018	if params.get('legacyserverconnect'):
	1019	context.options \|= 4 # SSL_OP_LEGACY_SERVER_CONNECT
	1020	context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
	1021	if opts_check_certificate:
	1022	if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
	1023	context.load_verify_locations(cafile=certifi.where())
	1024	else:
	1025	try:
	1026	context.load_default_certs()
	1027	# Work around the issue in load_default_certs when there are bad certificates. See:
	1028	# https://github.com/yt-dlp/yt-dlp/issues/1060,
	1029	# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
	1030	except ssl.SSLError:
	1031	# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
	1032	if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
	1033	# Create a new context to discard any certificates that were already loaded
	1034	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	1035	context.check_hostname, context.verify_mode = True, ssl.CERT_REQUIRED
	1036	for storename in ('CA', 'ROOT'):
	1037	_ssl_load_windows_store_certs(context, storename)
	1038	context.set_default_verify_paths()
	1039	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	1040
	1041
	1042	def bug_reports_message(before=';'):
	1043	msg = ('please report this issue on https://github.com/yt-dlp/yt-dlp , '
	1044	'filling out the appropriate issue template. '
	1045	'Confirm you are on the latest version using yt-dlp -U')
	1046
	1047	before = before.rstrip()
	1048	if not before or before.endswith(('.', '!', '?')):
	1049	msg = msg[0].title() + msg[1:]
	1050
	1051	return (before + ' ' if before else '') + msg
	1052
	1053
	1054	class YoutubeDLError(Exception):
	1055	"""Base exception for YoutubeDL errors."""
	1056	msg = None
	1057
	1058	def __init__(self, msg=None):
	1059	if msg is not None:
	1060	self.msg = msg
	1061	elif self.msg is None:
	1062	self.msg = type(self).__name__
	1063	super().__init__(self.msg)
	1064
	1065
	1066	network_exceptions = [compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error]
	1067	if hasattr(ssl, 'CertificateError'):
	1068	network_exceptions.append(ssl.CertificateError)
	1069	network_exceptions = tuple(network_exceptions)
	1070
	1071
	1072	class ExtractorError(YoutubeDLError):
	1073	"""Error during info extraction."""
	1074
	1075	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	1076	""" tb, if given, is the original traceback (so that it can be printed out).
	1077	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	1078	"""
	1079	if sys.exc_info()[0] in network_exceptions:
	1080	expected = True
	1081
	1082	self.orig_msg = str(msg)
	1083	self.traceback = tb
	1084	self.expected = expected
	1085	self.cause = cause
	1086	self.video_id = video_id
	1087	self.ie = ie
	1088	self.exc_info = sys.exc_info() # preserve original exception
	1089
	1090	super(ExtractorError, self).__init__(''.join((
	1091	format_field(ie, template='[%s] '),
	1092	format_field(video_id, template='%s: '),
	1093	msg,
	1094	format_field(cause, template=' (caused by %r)'),
	1095	'' if expected else bug_reports_message())))
	1096
	1097	def format_traceback(self):
	1098	return join_nonempty(
	1099	self.traceback and ''.join(traceback.format_tb(self.traceback)),
	1100	self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
	1101	delim='\n') or None
	1102
	1103
	1104	class UnsupportedError(ExtractorError):
	1105	def __init__(self, url):
	1106	super(UnsupportedError, self).__init__(
	1107	'Unsupported URL: %s' % url, expected=True)
	1108	self.url = url
	1109
	1110
	1111	class RegexNotFoundError(ExtractorError):
	1112	"""Error when a regex didn't match"""
	1113	pass
	1114
	1115
	1116	class GeoRestrictedError(ExtractorError):
	1117	"""Geographic restriction Error exception.
	1118
	1119	This exception may be thrown when a video is not available from your
	1120	geographic location due to geographic restrictions imposed by a website.
	1121	"""
	1122
	1123	def __init__(self, msg, countries=None, **kwargs):
	1124	kwargs['expected'] = True
	1125	super(GeoRestrictedError, self).__init__(msg, **kwargs)
	1126	self.countries = countries
	1127
	1128
	1129	class DownloadError(YoutubeDLError):
	1130	"""Download Error exception.
	1131
	1132	This exception may be thrown by FileDownloader objects if they are not
	1133	configured to continue on errors. They will contain the appropriate
	1134	error message.
	1135	"""
	1136
	1137	def __init__(self, msg, exc_info=None):
	1138	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1139	super(DownloadError, self).__init__(msg)
	1140	self.exc_info = exc_info
	1141
	1142
	1143	class EntryNotInPlaylist(YoutubeDLError):
	1144	"""Entry not in playlist exception.
	1145
	1146	This exception will be thrown by YoutubeDL when a requested entry
	1147	is not found in the playlist info_dict
	1148	"""
	1149	msg = 'Entry not found in info'
	1150
	1151
	1152	class SameFileError(YoutubeDLError):
	1153	"""Same File exception.
	1154
	1155	This exception will be thrown by FileDownloader objects if they detect
	1156	multiple files would have to be downloaded to the same file on disk.
	1157	"""
	1158	msg = 'Fixed output name but more than one file to download'
	1159
	1160	def __init__(self, filename=None):
	1161	if filename is not None:
	1162	self.msg += f': {filename}'
	1163	super().__init__(self.msg)
	1164
	1165
	1166	class PostProcessingError(YoutubeDLError):
	1167	"""Post Processing exception.
	1168
	1169	This exception may be raised by PostProcessor's .run() method to
	1170	indicate an error in the postprocessing task.
	1171	"""
	1172
	1173
	1174	class DownloadCancelled(YoutubeDLError):
	1175	""" Exception raised when the download queue should be interrupted """
	1176	msg = 'The download was cancelled'
	1177
	1178
	1179	class ExistingVideoReached(DownloadCancelled):
	1180	""" --break-on-existing triggered """
	1181	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1182
	1183
	1184	class RejectedVideoReached(DownloadCancelled):
	1185	""" --break-on-reject triggered """
	1186	msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
	1187
	1188
	1189	class MaxDownloadsReached(DownloadCancelled):
	1190	""" --max-downloads limit has been reached. """
	1191	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1192
	1193
	1194	class ReExtractInfo(YoutubeDLError):
	1195	""" Video info needs to be re-extracted. """
	1196
	1197	def __init__(self, msg, expected=False):
	1198	super().__init__(msg)
	1199	self.expected = expected
	1200
	1201
	1202	class ThrottledDownload(ReExtractInfo):
	1203	""" Download speed below --throttled-rate. """
	1204	msg = 'The download speed is below throttle limit'
	1205
	1206	def __init__(self):
	1207	super().__init__(self.msg, expected=False)
	1208
	1209
	1210	class UnavailableVideoError(YoutubeDLError):
	1211	"""Unavailable Format exception.
	1212
	1213	This exception will be thrown when a video is requested
	1214	in a format that is not available for that video.
	1215	"""
	1216	msg = 'Unable to download video'
	1217
	1218	def __init__(self, err=None):
	1219	if err is not None:
	1220	self.msg += f': {err}'
	1221	super().__init__(self.msg)
	1222
	1223
	1224	class ContentTooShortError(YoutubeDLError):
	1225	"""Content Too Short exception.
	1226
	1227	This exception may be raised by FileDownloader objects when a file they
	1228	download is too small for what the server announced first, indicating
	1229	the connection was probably interrupted.
	1230	"""
	1231
	1232	def __init__(self, downloaded, expected):
	1233	super(ContentTooShortError, self).__init__(
	1234	'Downloaded {0} bytes, expected {1} bytes'.format(downloaded, expected)
	1235	)
	1236	# Both in bytes
	1237	self.downloaded = downloaded
	1238	self.expected = expected
	1239
	1240
	1241	class XAttrMetadataError(YoutubeDLError):
	1242	def __init__(self, code=None, msg='Unknown error'):
	1243	super(XAttrMetadataError, self).__init__(msg)
	1244	self.code = code
	1245	self.msg = msg
	1246
	1247	# Parsing code and msg
	1248	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1249	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1250	self.reason = 'NO_SPACE'
	1251	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1252	self.reason = 'VALUE_TOO_LONG'
	1253	else:
	1254	self.reason = 'NOT_SUPPORTED'
	1255
	1256
	1257	class XAttrUnavailableError(YoutubeDLError):
	1258	pass
	1259
	1260
	1261	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	1262	# Working around python 2 bug (see http://bugs.python.org/issue17849) by limiting
	1263	# expected HTTP responses to meet HTTP/1.0 or later (see also
	1264	# https://github.com/ytdl-org/youtube-dl/issues/6727)
	1265	if sys.version_info < (3, 0):
	1266	kwargs['strict'] = True
	1267	hc = http_class(args, *compat_kwargs(kwargs))
	1268	source_address = ydl_handler._params.get('source_address')
	1269
	1270	if source_address is not None:
	1271	# This is to workaround _create_connection() from socket where it will try all
	1272	# address data from getaddrinfo() including IPv6. This filters the result from
	1273	# getaddrinfo() based on the source_address value.
	1274	# This is based on the cpython socket.create_connection() function.
	1275	# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
	1276	def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
	1277	host, port = address
	1278	err = None
	1279	addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
	1280	af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
	1281	ip_addrs = [addr for addr in addrs if addr[0] == af]
	1282	if addrs and not ip_addrs:
	1283	ip_version = 'v4' if af == socket.AF_INET else 'v6'
	1284	raise socket.error(
	1285	"No remote IP%s addresses available for connect, can't use '%s' as source address"
	1286	% (ip_version, source_address[0]))
	1287	for res in ip_addrs:
	1288	af, socktype, proto, canonname, sa = res
	1289	sock = None
	1290	try:
	1291	sock = socket.socket(af, socktype, proto)
	1292	if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
	1293	sock.settimeout(timeout)
	1294	sock.bind(source_address)
	1295	sock.connect(sa)
	1296	err = None # Explicitly break reference cycle
	1297	return sock
	1298	except socket.error as _:
	1299	err = _
	1300	if sock is not None:
	1301	sock.close()
	1302	if err is not None:
	1303	raise err
	1304	else:
	1305	raise socket.error('getaddrinfo returns an empty list')
	1306	if hasattr(hc, '_create_connection'):
	1307	hc._create_connection = _create_connection
	1308	sa = (source_address, 0)
	1309	if hasattr(hc, 'source_address'): # Python 2.7+
	1310	hc.source_address = sa
	1311	else: # Python 2.6
	1312	def _hc_connect(self, args, *kwargs):
	1313	sock = _create_connection(
	1314	(self.host, self.port), self.timeout, sa)
	1315	if is_https:
	1316	self.sock = ssl.wrap_socket(
	1317	sock, self.key_file, self.cert_file,
	1318	ssl_version=ssl.PROTOCOL_TLSv1)
	1319	else:
	1320	self.sock = sock
	1321	hc.connect = functools.partial(_hc_connect, hc)
	1322
	1323	return hc
	1324
	1325
	1326	def handle_youtubedl_headers(headers):
	1327	filtered_headers = headers
	1328
	1329	if 'Youtubedl-no-compression' in filtered_headers:
	1330	filtered_headers = dict((k, v) for k, v in filtered_headers.items() if k.lower() != 'accept-encoding')
	1331	del filtered_headers['Youtubedl-no-compression']
	1332
	1333	return filtered_headers
	1334
	1335
	1336	class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
	1337	"""Handler for HTTP requests and responses.
	1338
	1339	This class, when installed with an OpenerDirector, automatically adds
	1340	the standard headers to every HTTP request and handles gzipped and
	1341	deflated responses from web servers. If compression is to be avoided in
	1342	a particular request, the original request in the program code only has
	1343	to include the HTTP header "Youtubedl-no-compression", which will be
	1344	removed before making the real request.
	1345
	1346	Part of this code was copied from:
	1347
	1348	http://techknack.net/python-urllib2-handlers/
	1349
	1350	Andrew Rowls, the author of that code, agreed to release it to the
	1351	public domain.
	1352	"""
	1353
	1354	def __init__(self, params, args, *kwargs):
	1355	compat_urllib_request.HTTPHandler.__init__(self, args, *kwargs)
	1356	self._params = params
	1357
	1358	def http_open(self, req):
	1359	conn_class = compat_http_client.HTTPConnection
	1360
	1361	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1362	if socks_proxy:
	1363	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1364	del req.headers['Ytdl-socks-proxy']
	1365
	1366	return self.do_open(functools.partial(
	1367	_create_http_connection, self, conn_class, False),
	1368	req)
	1369
	1370	@staticmethod
	1371	def deflate(data):
	1372	if not data:
	1373	return data
	1374	try:
	1375	return zlib.decompress(data, -zlib.MAX_WBITS)
	1376	except zlib.error:
	1377	return zlib.decompress(data)
	1378
	1379	@staticmethod
	1380	def brotli(data):
	1381	if not data:
	1382	return data
	1383	return compat_brotli.decompress(data)
	1384
	1385	def http_request(self, req):
	1386	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	1387	# always respected by websites, some tend to give out URLs with non percent-encoded
	1388	# non-ASCII characters (see telemb.py, ard.py [#3412])
	1389	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	1390	# To work around aforementioned issue we will replace request's original URL with
	1391	# percent-encoded one
	1392	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	1393	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	1394	url = req.get_full_url()
	1395	url_escaped = escape_url(url)
	1396
	1397	# Substitute URL if any change after escaping
	1398	if url != url_escaped:
	1399	req = update_Request(req, url=url_escaped)
	1400
	1401	for h, v in self._params.get('http_headers', std_headers).items():
	1402	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	1403	# The dict keys are capitalized because of this bug by urllib
	1404	if h.capitalize() not in req.headers:
	1405	req.add_header(h, v)
	1406
	1407	if 'Accept-encoding' not in req.headers:
	1408	req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
	1409
	1410	req.headers = handle_youtubedl_headers(req.headers)
	1411
	1412	if sys.version_info < (2, 7) and '#' in req.get_full_url():
	1413	# Python 2.6 is brain-dead when it comes to fragments
	1414	req._Request__original = req._Request__original.partition('#')[0]
	1415	req._Request__r_type = req._Request__r_type.partition('#')[0]
	1416
	1417	return req
	1418
	1419	def http_response(self, req, resp):
	1420	old_resp = resp
	1421	# gzip
	1422	if resp.headers.get('Content-encoding', '') == 'gzip':
	1423	content = resp.read()
	1424	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	1425	try:
	1426	uncompressed = io.BytesIO(gz.read())
	1427	except IOError as original_ioerror:
	1428	# There may be junk add the end of the file
	1429	# See http://stackoverflow.com/q/4928560/35070 for details
	1430	for i in range(1, 1024):
	1431	try:
	1432	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	1433	uncompressed = io.BytesIO(gz.read())
	1434	except IOError:
	1435	continue
	1436	break
	1437	else:
	1438	raise original_ioerror
	1439	resp = compat_urllib_request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	1440	resp.msg = old_resp.msg
	1441	del resp.headers['Content-encoding']
	1442	# deflate
	1443	if resp.headers.get('Content-encoding', '') == 'deflate':
	1444	gz = io.BytesIO(self.deflate(resp.read()))
	1445	resp = compat_urllib_request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
	1446	resp.msg = old_resp.msg
	1447	del resp.headers['Content-encoding']
	1448	# brotli
	1449	if resp.headers.get('Content-encoding', '') == 'br':
	1450	resp = compat_urllib_request.addinfourl(
	1451	io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
	1452	resp.msg = old_resp.msg
	1453	del resp.headers['Content-encoding']
	1454	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
	1455	# https://github.com/ytdl-org/youtube-dl/issues/6457).
	1456	if 300 <= resp.code < 400:
	1457	location = resp.headers.get('Location')
	1458	if location:
	1459	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	1460	if sys.version_info >= (3, 0):
	1461	location = location.encode('iso-8859-1').decode('utf-8')
	1462	else:
	1463	location = location.decode('utf-8')
	1464	location_escaped = escape_url(location)
	1465	if location != location_escaped:
	1466	del resp.headers['Location']
	1467	if sys.version_info < (3, 0):
	1468	location_escaped = location_escaped.encode('utf-8')
	1469	resp.headers['Location'] = location_escaped
	1470	return resp
	1471
	1472	https_request = http_request
	1473	https_response = http_response
	1474
	1475
	1476	def make_socks_conn_class(base_class, socks_proxy):
	1477	assert issubclass(base_class, (
	1478	compat_http_client.HTTPConnection, compat_http_client.HTTPSConnection))
	1479
	1480	url_components = compat_urlparse.urlparse(socks_proxy)
	1481	if url_components.scheme.lower() == 'socks5':
	1482	socks_type = ProxyType.SOCKS5
	1483	elif url_components.scheme.lower() in ('socks', 'socks4'):
	1484	socks_type = ProxyType.SOCKS4
	1485	elif url_components.scheme.lower() == 'socks4a':
	1486	socks_type = ProxyType.SOCKS4A
	1487
	1488	def unquote_if_non_empty(s):
	1489	if not s:
	1490	return s
	1491	return compat_urllib_parse_unquote_plus(s)
	1492
	1493	proxy_args = (
	1494	socks_type,
	1495	url_components.hostname, url_components.port or 1080,
	1496	True, # Remote DNS
	1497	unquote_if_non_empty(url_components.username),
	1498	unquote_if_non_empty(url_components.password),
	1499	)
	1500
	1501	class SocksConnection(base_class):
	1502	def connect(self):
	1503	self.sock = sockssocket()
	1504	self.sock.setproxy(*proxy_args)
	1505	if type(self.timeout) in (int, float):
	1506	self.sock.settimeout(self.timeout)
	1507	self.sock.connect((self.host, self.port))
	1508
	1509	if isinstance(self, compat_http_client.HTTPSConnection):
	1510	if hasattr(self, '_context'): # Python > 2.6
	1511	self.sock = self._context.wrap_socket(
	1512	self.sock, server_hostname=self.host)
	1513	else:
	1514	self.sock = ssl.wrap_socket(self.sock)
	1515
	1516	return SocksConnection
	1517
	1518
	1519	class YoutubeDLHTTPSHandler(compat_urllib_request.HTTPSHandler):
	1520	def __init__(self, params, https_conn_class=None, args, *kwargs):
	1521	compat_urllib_request.HTTPSHandler.__init__(self, args, *kwargs)
	1522	self._https_conn_class = https_conn_class or compat_http_client.HTTPSConnection
	1523	self._params = params
	1524
	1525	def https_open(self, req):
	1526	kwargs = {}
	1527	conn_class = self._https_conn_class
	1528
	1529	if hasattr(self, '_context'): # python > 2.6
	1530	kwargs['context'] = self._context
	1531	if hasattr(self, '_check_hostname'): # python 3.x
	1532	kwargs['check_hostname'] = self._check_hostname
	1533
	1534	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1535	if socks_proxy:
	1536	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1537	del req.headers['Ytdl-socks-proxy']
	1538
	1539	return self.do_open(functools.partial(
	1540	_create_http_connection, self, conn_class, True),
	1541	req, **kwargs)
	1542
	1543
	1544	class YoutubeDLCookieJar(compat_cookiejar.MozillaCookieJar):
	1545	"""
	1546	See [1] for cookie file format.
	1547
	1548	1. https://curl.haxx.se/docs/http-cookies.html
	1549	"""
	1550	_HTTPONLY_PREFIX = '#HttpOnly_'
	1551	_ENTRY_LEN = 7
	1552	_HEADER = '''# Netscape HTTP Cookie File
	1553	# This file is generated by yt-dlp. Do not edit.
	1554
	1555	'''
	1556	_CookieFileEntry = collections.namedtuple(
	1557	'CookieFileEntry',
	1558	('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
	1559
	1560	def save(self, filename=None, ignore_discard=False, ignore_expires=False):
	1561	"""
	1562	Save cookies to a file.
	1563
	1564	Most of the code is taken from CPython 3.8 and slightly adapted
	1565	to support cookie files with UTF-8 in both python 2 and 3.
	1566	"""
	1567	if filename is None:
	1568	if self.filename is not None:
	1569	filename = self.filename
	1570	else:
	1571	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1572
	1573	# Store session cookies with `expires` set to 0 instead of an empty
	1574	# string
	1575	for cookie in self:
	1576	if cookie.expires is None:
	1577	cookie.expires = 0
	1578
	1579	with io.open(filename, 'w', encoding='utf-8') as f:
	1580	f.write(self._HEADER)
	1581	now = time.time()
	1582	for cookie in self:
	1583	if not ignore_discard and cookie.discard:
	1584	continue
	1585	if not ignore_expires and cookie.is_expired(now):
	1586	continue
	1587	if cookie.secure:
	1588	secure = 'TRUE'
	1589	else:
	1590	secure = 'FALSE'
	1591	if cookie.domain.startswith('.'):
	1592	initial_dot = 'TRUE'
	1593	else:
	1594	initial_dot = 'FALSE'
	1595	if cookie.expires is not None:
	1596	expires = compat_str(cookie.expires)
	1597	else:
	1598	expires = ''
	1599	if cookie.value is None:
	1600	# cookies.txt regards 'Set-Cookie: foo' as a cookie
	1601	# with no name, whereas http.cookiejar regards it as a
	1602	# cookie with no value.
	1603	name = ''
	1604	value = cookie.name
	1605	else:
	1606	name = cookie.name
	1607	value = cookie.value
	1608	f.write(
	1609	'\t'.join([cookie.domain, initial_dot, cookie.path,
	1610	secure, expires, name, value]) + '\n')
	1611
	1612	def load(self, filename=None, ignore_discard=False, ignore_expires=False):
	1613	"""Load cookies from a file."""
	1614	if filename is None:
	1615	if self.filename is not None:
	1616	filename = self.filename
	1617	else:
	1618	raise ValueError(compat_cookiejar.MISSING_FILENAME_TEXT)
	1619
	1620	def prepare_line(line):
	1621	if line.startswith(self._HTTPONLY_PREFIX):
	1622	line = line[len(self._HTTPONLY_PREFIX):]
	1623	# comments and empty lines are fine
	1624	if line.startswith('#') or not line.strip():
	1625	return line
	1626	cookie_list = line.split('\t')
	1627	if len(cookie_list) != self._ENTRY_LEN:
	1628	raise compat_cookiejar.LoadError('invalid length %d' % len(cookie_list))
	1629	cookie = self._CookieFileEntry(*cookie_list)
	1630	if cookie.expires_at and not cookie.expires_at.isdigit():
	1631	raise compat_cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
	1632	return line
	1633
	1634	cf = io.StringIO()
	1635	with io.open(filename, encoding='utf-8') as f:
	1636	for line in f:
	1637	try:
	1638	cf.write(prepare_line(line))
	1639	except compat_cookiejar.LoadError as e:
	1640	write_string(
	1641	'WARNING: skipping cookie file entry due to %s: %r\n'
	1642	% (e, line), sys.stderr)
	1643	continue
	1644	cf.seek(0)
	1645	self._really_load(cf, filename, ignore_discard, ignore_expires)
	1646	# Session cookies are denoted by either `expires` field set to
	1647	# an empty string or 0. MozillaCookieJar only recognizes the former
	1648	# (see [1]). So we need force the latter to be recognized as session
	1649	# cookies on our own.
	1650	# Session cookies may be important for cookies-based authentication,
	1651	# e.g. usually, when user does not check 'Remember me' check box while
	1652	# logging in on a site, some important cookies are stored as session
	1653	# cookies so that not recognizing them will result in failed login.
	1654	# 1. https://bugs.python.org/issue17164
	1655	for cookie in self:
	1656	# Treat `expires=0` cookies as session cookies
	1657	if cookie.expires == 0:
	1658	cookie.expires = None
	1659	cookie.discard = True
	1660
	1661
	1662	class YoutubeDLCookieProcessor(compat_urllib_request.HTTPCookieProcessor):
	1663	def __init__(self, cookiejar=None):
	1664	compat_urllib_request.HTTPCookieProcessor.__init__(self, cookiejar)
	1665
	1666	def http_response(self, request, response):
	1667	# Python 2 will choke on next HTTP request in row if there are non-ASCII
	1668	# characters in Set-Cookie HTTP header of last response (see
	1669	# https://github.com/ytdl-org/youtube-dl/issues/6769).
	1670	# In order to at least prevent crashing we will percent encode Set-Cookie
	1671	# header before HTTPCookieProcessor starts processing it.
	1672	# if sys.version_info < (3, 0) and response.headers:
	1673	# for set_cookie_header in ('Set-Cookie', 'Set-Cookie2'):
	1674	# set_cookie = response.headers.get(set_cookie_header)
	1675	# if set_cookie:
	1676	# set_cookie_escaped = compat_urllib_parse.quote(set_cookie, b"%/;:@&=+$,!~*'()?#[] ")
	1677	# if set_cookie != set_cookie_escaped:
	1678	# del response.headers[set_cookie_header]
	1679	# response.headers[set_cookie_header] = set_cookie_escaped
	1680	return compat_urllib_request.HTTPCookieProcessor.http_response(self, request, response)
	1681
	1682	https_request = compat_urllib_request.HTTPCookieProcessor.http_request
	1683	https_response = http_response
	1684
	1685
	1686	class YoutubeDLRedirectHandler(compat_urllib_request.HTTPRedirectHandler):
	1687	"""YoutubeDL redirect handler
	1688
	1689	The code is based on HTTPRedirectHandler implementation from CPython [1].
	1690
	1691	This redirect handler solves two issues:
	1692	- ensures redirect URL is always unicode under python 2
	1693	- introduces support for experimental HTTP response status code
	1694	308 Permanent Redirect [2] used by some sites [3]
	1695
	1696	1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
	1697	2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
	1698	3. https://github.com/ytdl-org/youtube-dl/issues/28768
	1699	"""
	1700
	1701	http_error_301 = http_error_303 = http_error_307 = http_error_308 = compat_urllib_request.HTTPRedirectHandler.http_error_302
	1702
	1703	def redirect_request(self, req, fp, code, msg, headers, newurl):
	1704	"""Return a Request or None in response to a redirect.
	1705
	1706	This is called by the http_error_30x methods when a
	1707	redirection response is received. If a redirection should
	1708	take place, return a new Request to allow http_error_30x to
	1709	perform the redirect. Otherwise, raise HTTPError if no-one
	1710	else should try to handle this url. Return None if you can't
	1711	but another Handler might.
	1712	"""
	1713	m = req.get_method()
	1714	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
	1715	or code in (301, 302, 303) and m == "POST")):
	1716	raise compat_HTTPError(req.full_url, code, msg, headers, fp)
	1717	# Strictly (according to RFC 2616), 301 or 302 in response to
	1718	# a POST MUST NOT cause a redirection without confirmation
	1719	# from the user (of urllib.request, in this case). In practice,
	1720	# essentially all clients do redirect in this case, so we do
	1721	# the same.
	1722
	1723	# On python 2 urlh.geturl() may sometimes return redirect URL
	1724	# as byte string instead of unicode. This workaround allows
	1725	# to force it always return unicode.
	1726	if sys.version_info[0] < 3:
	1727	newurl = compat_str(newurl)
	1728
	1729	# Be conciliant with URIs containing a space. This is mainly
	1730	# redundant with the more complete encoding done in http_error_302(),
	1731	# but it is kept for compatibility with other callers.
	1732	newurl = newurl.replace(' ', '%20')
	1733
	1734	CONTENT_HEADERS = ("content-length", "content-type")
	1735	# NB: don't use dict comprehension for python 2.6 compatibility
	1736	newheaders = dict((k, v) for k, v in req.headers.items()
	1737	if k.lower() not in CONTENT_HEADERS)
	1738	return compat_urllib_request.Request(
	1739	newurl, headers=newheaders, origin_req_host=req.origin_req_host,
	1740	unverifiable=True)
	1741
	1742
	1743	def extract_timezone(date_str):
	1744	m = re.search(
	1745	r'''(?x)
	1746	^.{8,}? # >=8 char non-TZ prefix, if present
	1747	(?P<tz>Z\| # just the UTC Z, or
	1748	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1749	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1750	[ ]? # optional space
	1751	(?P<sign>\+\|-) # +/-
	1752	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1753	$)
	1754	''', date_str)
	1755	if not m:
	1756	timezone = datetime.timedelta()
	1757	else:
	1758	date_str = date_str[:-len(m.group('tz'))]
	1759	if not m.group('sign'):
	1760	timezone = datetime.timedelta()
	1761	else:
	1762	sign = 1 if m.group('sign') == '+' else -1
	1763	timezone = datetime.timedelta(
	1764	hours=sign * int(m.group('hours')),
	1765	minutes=sign * int(m.group('minutes')))
	1766	return timezone, date_str
	1767
	1768
	1769	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1770	""" Return a UNIX timestamp from the given date """
	1771
	1772	if date_str is None:
	1773	return None
	1774
	1775	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1776
	1777	if timezone is None:
	1778	timezone, date_str = extract_timezone(date_str)
	1779
	1780	try:
	1781	date_format = '%Y-%m-%d{0}%H:%M:%S'.format(delimiter)
	1782	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	1783	return calendar.timegm(dt.timetuple())
	1784	except ValueError:
	1785	pass
	1786
	1787
	1788	def date_formats(day_first=True):
	1789	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1790
	1791
	1792	def unified_strdate(date_str, day_first=True):
	1793	"""Return a string with the date in the format YYYYMMDD"""
	1794
	1795	if date_str is None:
	1796	return None
	1797	upload_date = None
	1798	# Replace commas
	1799	date_str = date_str.replace(',', ' ')
	1800	# Remove AM/PM + timezone
	1801	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1802	_, date_str = extract_timezone(date_str)
	1803
	1804	for expression in date_formats(day_first):
	1805	try:
	1806	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1807	except ValueError:
	1808	pass
	1809	if upload_date is None:
	1810	timetuple = email.utils.parsedate_tz(date_str)
	1811	if timetuple:
	1812	try:
	1813	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1814	except ValueError:
	1815	pass
	1816	if upload_date is not None:
	1817	return compat_str(upload_date)
	1818
	1819
	1820	def unified_timestamp(date_str, day_first=True):
	1821	if date_str is None:
	1822	return None
	1823
	1824	date_str = re.sub(r'[,\|]', '', date_str)
	1825
	1826	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1827	timezone, date_str = extract_timezone(date_str)
	1828
	1829	# Remove AM/PM + timezone
	1830	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1831
	1832	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1833	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1834	if m:
	1835	date_str = date_str[:-len(m.group('tz'))]
	1836
	1837	# Python only supports microseconds, so remove nanoseconds
	1838	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1839	if m:
	1840	date_str = m.group(1)
	1841
	1842	for expression in date_formats(day_first):
	1843	try:
	1844	dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
	1845	return calendar.timegm(dt.timetuple())
	1846	except ValueError:
	1847	pass
	1848	timetuple = email.utils.parsedate_tz(date_str)
	1849	if timetuple:
	1850	return calendar.timegm(timetuple) + pm_delta * 3600
	1851
	1852
	1853	def determine_ext(url, default_ext='unknown_video'):
	1854	if url is None or '.' not in url:
	1855	return default_ext
	1856	guess = url.partition('?')[0].rpartition('.')[2]
	1857	if re.match(r'^[A-Za-z0-9]+$', guess):
	1858	return guess
	1859	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1860	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1861	return guess.rstrip('/')
	1862	else:
	1863	return default_ext
	1864
	1865
	1866	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1867	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1868
	1869
	1870	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1871	"""
	1872	Return a datetime object from a string in the format YYYYMMDD or
	1873	(now\|today\|yesterday\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1874
	1875	format: string date format used to return datetime object from
	1876	precision: round the time portion of a datetime object.
	1877	auto\|microsecond\|second\|minute\|hour\|day.
	1878	auto: round to the unit provided in date_str (if applicable).
	1879	"""
	1880	auto_precision = False
	1881	if precision == 'auto':
	1882	auto_precision = True
	1883	precision = 'microsecond'
	1884	today = datetime_round(datetime.datetime.utcnow(), precision)
	1885	if date_str in ('now', 'today'):
	1886	return today
	1887	if date_str == 'yesterday':
	1888	return today - datetime.timedelta(days=1)
	1889	match = re.match(
	1890	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?',
	1891	date_str)
	1892	if match is not None:
	1893	start_time = datetime_from_str(match.group('start'), precision, format)
	1894	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1895	unit = match.group('unit')
	1896	if unit == 'month' or unit == 'year':
	1897	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1898	unit = 'day'
	1899	else:
	1900	if unit == 'week':
	1901	unit = 'day'
	1902	time *= 7
	1903	delta = datetime.timedelta(**{unit + 's': time})
	1904	new_date = start_time + delta
	1905	if auto_precision:
	1906	return datetime_round(new_date, unit)
	1907	return new_date
	1908
	1909	return datetime_round(datetime.datetime.strptime(date_str, format), precision)
	1910
	1911
	1912	def date_from_str(date_str, format='%Y%m%d', strict=False):
	1913	"""
	1914	Return a datetime object from a string in the format YYYYMMDD or
	1915	(now\|today\|yesterday\|date)[+-][0-9](microsecond\|second\|minute\|hour\|day\|week\|month\|year)(s)?
	1916
	1917	If "strict", only (now\|today)[+-][0-9](day\|week\|month\|year)(s)? is allowed
	1918
	1919	format: string date format used to return datetime object from
	1920	"""
	1921	if strict and not re.fullmatch(r'\d{8}\|(now\|today)[+-]\d+(day\|week\|month\|year)(s)?', date_str):
	1922	raise ValueError(f'Invalid date format {date_str}')
	1923	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1924
	1925
	1926	def datetime_add_months(dt, months):
	1927	"""Increment/Decrement a datetime object by months."""
	1928	month = dt.month + months - 1
	1929	year = dt.year + month // 12
	1930	month = month % 12 + 1
	1931	day = min(dt.day, calendar.monthrange(year, month)[1])
	1932	return dt.replace(year, month, day)
	1933
	1934
	1935	def datetime_round(dt, precision='day'):
	1936	"""
	1937	Round a datetime object's time to a specific precision
	1938	"""
	1939	if precision == 'microsecond':
	1940	return dt
	1941
	1942	unit_seconds = {
	1943	'day': 86400,
	1944	'hour': 3600,
	1945	'minute': 60,
	1946	'second': 1,
	1947	}
	1948	roundto = lambda x, n: ((x + n / 2) // n) * n
	1949	timestamp = calendar.timegm(dt.timetuple())
	1950	return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
	1951
	1952
	1953	def hyphenate_date(date_str):
	1954	"""
	1955	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1956	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1957	if match is not None:
	1958	return '-'.join(match.groups())
	1959	else:
	1960	return date_str
	1961
	1962
	1963	class DateRange(object):
	1964	"""Represents a time interval between two dates"""
	1965
	1966	def __init__(self, start=None, end=None):
	1967	"""start and end must be strings in the format accepted by date"""
	1968	if start is not None:
	1969	self.start = date_from_str(start, strict=True)
	1970	else:
	1971	self.start = datetime.datetime.min.date()
	1972	if end is not None:
	1973	self.end = date_from_str(end, strict=True)
	1974	else:
	1975	self.end = datetime.datetime.max.date()
	1976	if self.start > self.end:
	1977	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1978
	1979	@classmethod
	1980	def day(cls, day):
	1981	"""Returns a range that only contains the given day"""
	1982	return cls(day, day)
	1983
	1984	def __contains__(self, date):
	1985	"""Check if the date is in the range"""
	1986	if not isinstance(date, datetime.date):
	1987	date = date_from_str(date)
	1988	return self.start <= date <= self.end
	1989
	1990	def __str__(self):
	1991	return '%s - %s' % (self.start.isoformat(), self.end.isoformat())
	1992
	1993
	1994	def platform_name():
	1995	""" Returns the platform name as a compat_str """
	1996	res = platform.platform()
	1997	if isinstance(res, bytes):
	1998	res = res.decode(preferredencoding())
	1999
	2000	assert isinstance(res, compat_str)
	2001	return res
	2002
	2003
	2004	def get_windows_version():
	2005	''' Get Windows version. None if it's not running on Windows '''
	2006	if compat_os_name == 'nt':
	2007	return version_tuple(platform.win32_ver()[1])
	2008	else:
	2009	return None
	2010
	2011
	2012	def _windows_write_string(s, out):
	2013	""" Returns True if the string was written using special methods,
	2014	False if it has yet to be written out."""
	2015	# Adapted from http://stackoverflow.com/a/3259271/35070
	2016
	2017	import ctypes.wintypes
	2018
	2019	WIN_OUTPUT_IDS = {
	2020	1: -11,
	2021	2: -12,
	2022	}
	2023
	2024	try:
	2025	fileno = out.fileno()
	2026	except AttributeError:
	2027	# If the output stream doesn't have a fileno, it's virtual
	2028	return False
	2029	except io.UnsupportedOperation:
	2030	# Some strange Windows pseudo files?
	2031	return False
	2032	if fileno not in WIN_OUTPUT_IDS:
	2033	return False
	2034
	2035	GetStdHandle = compat_ctypes_WINFUNCTYPE(
	2036	ctypes.wintypes.HANDLE, ctypes.wintypes.DWORD)(
	2037	('GetStdHandle', ctypes.windll.kernel32))
	2038	h = GetStdHandle(WIN_OUTPUT_IDS[fileno])
	2039
	2040	WriteConsoleW = compat_ctypes_WINFUNCTYPE(
	2041	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE, ctypes.wintypes.LPWSTR,
	2042	ctypes.wintypes.DWORD, ctypes.POINTER(ctypes.wintypes.DWORD),
	2043	ctypes.wintypes.LPVOID)(('WriteConsoleW', ctypes.windll.kernel32))
	2044	written = ctypes.wintypes.DWORD(0)
	2045
	2046	GetFileType = compat_ctypes_WINFUNCTYPE(ctypes.wintypes.DWORD, ctypes.wintypes.DWORD)(('GetFileType', ctypes.windll.kernel32))
	2047	FILE_TYPE_CHAR = 0x0002
	2048	FILE_TYPE_REMOTE = 0x8000
	2049	GetConsoleMode = compat_ctypes_WINFUNCTYPE(
	2050	ctypes.wintypes.BOOL, ctypes.wintypes.HANDLE,
	2051	ctypes.POINTER(ctypes.wintypes.DWORD))(
	2052	('GetConsoleMode', ctypes.windll.kernel32))
	2053	INVALID_HANDLE_VALUE = ctypes.wintypes.DWORD(-1).value
	2054
	2055	def not_a_console(handle):
	2056	if handle == INVALID_HANDLE_VALUE or handle is None:
	2057	return True
	2058	return ((GetFileType(handle) & ~FILE_TYPE_REMOTE) != FILE_TYPE_CHAR
	2059	or GetConsoleMode(handle, ctypes.byref(ctypes.wintypes.DWORD())) == 0)
	2060
	2061	if not_a_console(h):
	2062	return False
	2063
	2064	def next_nonbmp_pos(s):
	2065	try:
	2066	return next(i for i, c in enumerate(s) if ord(c) > 0xffff)
	2067	except StopIteration:
	2068	return len(s)
	2069
	2070	while s:
	2071	count = min(next_nonbmp_pos(s), 1024)
	2072
	2073	ret = WriteConsoleW(
	2074	h, s, count if count else 2, ctypes.byref(written), None)
	2075	if ret == 0:
	2076	raise OSError('Failed to write string')
	2077	if not count: # We just wrote a non-BMP character
	2078	assert written.value == 2
	2079	s = s[1:]
	2080	else:
	2081	assert written.value > 0
	2082	s = s[written.value:]
	2083	return True
	2084
	2085
	2086	def write_string(s, out=None, encoding=None):
	2087	if out is None:
	2088	out = sys.stderr
	2089	assert type(s) == compat_str
	2090
	2091	if sys.platform == 'win32' and encoding is None and hasattr(out, 'fileno'):
	2092	if _windows_write_string(s, out):
	2093	return
	2094
	2095	if ('b' in getattr(out, 'mode', '')
	2096	or sys.version_info[0] < 3): # Python 2 lies about mode of sys.stderr
	2097	byt = s.encode(encoding or preferredencoding(), 'ignore')
	2098	out.write(byt)
	2099	elif hasattr(out, 'buffer'):
	2100	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	2101	byt = s.encode(enc, 'ignore')
	2102	out.buffer.write(byt)
	2103	else:
	2104	out.write(s)
	2105	out.flush()
	2106
	2107
	2108	def bytes_to_intlist(bs):
	2109	if not bs:
	2110	return []
	2111	if isinstance(bs[0], int): # Python 3
	2112	return list(bs)
	2113	else:
	2114	return [ord(c) for c in bs]
	2115
	2116
	2117	def intlist_to_bytes(xs):
	2118	if not xs:
	2119	return b''
	2120	return compat_struct_pack('%dB' % len(xs), *xs)
	2121
	2122
	2123	# Cross-platform file locking
	2124	if sys.platform == 'win32':
	2125	import ctypes.wintypes
	2126	import msvcrt
	2127
	2128	class OVERLAPPED(ctypes.Structure):
	2129	_fields_ = [
	2130	('Internal', ctypes.wintypes.LPVOID),
	2131	('InternalHigh', ctypes.wintypes.LPVOID),
	2132	('Offset', ctypes.wintypes.DWORD),
	2133	('OffsetHigh', ctypes.wintypes.DWORD),
	2134	('hEvent', ctypes.wintypes.HANDLE),
	2135	]
	2136
	2137	kernel32 = ctypes.windll.kernel32
	2138	LockFileEx = kernel32.LockFileEx
	2139	LockFileEx.argtypes = [
	2140	ctypes.wintypes.HANDLE, # hFile
	2141	ctypes.wintypes.DWORD, # dwFlags
	2142	ctypes.wintypes.DWORD, # dwReserved
	2143	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2144	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2145	ctypes.POINTER(OVERLAPPED) # Overlapped
	2146	]
	2147	LockFileEx.restype = ctypes.wintypes.BOOL
	2148	UnlockFileEx = kernel32.UnlockFileEx
	2149	UnlockFileEx.argtypes = [
	2150	ctypes.wintypes.HANDLE, # hFile
	2151	ctypes.wintypes.DWORD, # dwReserved
	2152	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2153	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2154	ctypes.POINTER(OVERLAPPED) # Overlapped
	2155	]
	2156	UnlockFileEx.restype = ctypes.wintypes.BOOL
	2157	whole_low = 0xffffffff
	2158	whole_high = 0x7fffffff
	2159
	2160	def _lock_file(f, exclusive, block):
	2161	overlapped = OVERLAPPED()
	2162	overlapped.Offset = 0
	2163	overlapped.OffsetHigh = 0
	2164	overlapped.hEvent = 0
	2165	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	2166
	2167	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	2168	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	2169	0, whole_low, whole_high, f._lock_file_overlapped_p):
	2170	raise BlockingIOError('Locking file failed: %r' % ctypes.FormatError())
	2171
	2172	def _unlock_file(f):
	2173	assert f._lock_file_overlapped_p
	2174	handle = msvcrt.get_osfhandle(f.fileno())
	2175	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
	2176	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	2177
	2178	else:
	2179	try:
	2180	import fcntl
	2181
	2182	def _lock_file(f, exclusive, block):
	2183	try:
	2184	fcntl.flock(f,
	2185	fcntl.LOCK_SH if not exclusive
	2186	else fcntl.LOCK_EX if block
	2187	else fcntl.LOCK_EX \| fcntl.LOCK_NB)
	2188	except BlockingIOError:
	2189	raise
	2190	except OSError: # AOSP does not have flock()
	2191	fcntl.lockf(f,
	2192	fcntl.LOCK_SH if not exclusive
	2193	else fcntl.LOCK_EX if block
	2194	else fcntl.LOCK_EX \| fcntl.LOCK_NB)
	2195
	2196	def _unlock_file(f):
	2197	try:
	2198	fcntl.flock(f, fcntl.LOCK_UN)
	2199	except OSError:
	2200	fcntl.lockf(f, fcntl.LOCK_UN)
	2201
	2202	except ImportError:
	2203	UNSUPPORTED_MSG = 'file locking is not supported on this platform'
	2204
	2205	def _lock_file(f, exclusive, block):
	2206	raise IOError(UNSUPPORTED_MSG)
	2207
	2208	def _unlock_file(f):
	2209	raise IOError(UNSUPPORTED_MSG)
	2210
	2211
	2212	class locked_file(object):
	2213	_closed = False
	2214
	2215	def __init__(self, filename, mode, block=True, encoding=None):
	2216	assert mode in ['r', 'rb', 'a', 'ab', 'w', 'wb']
	2217	self.f = io.open(filename, mode, encoding=encoding)
	2218	self.mode = mode
	2219	self.block = block
	2220
	2221	def __enter__(self):
	2222	exclusive = 'r' not in self.mode
	2223	try:
	2224	_lock_file(self.f, exclusive, self.block)
	2225	except IOError:
	2226	self.f.close()
	2227	raise
	2228	return self
	2229
	2230	def __exit__(self, etype, value, traceback):
	2231	try:
	2232	if not self._closed:
	2233	_unlock_file(self.f)
	2234	finally:
	2235	self.f.close()
	2236	self._closed = True
	2237
	2238	def __iter__(self):
	2239	return iter(self.f)
	2240
	2241	def write(self, *args):
	2242	return self.f.write(*args)
	2243
	2244	def read(self, *args):
	2245	return self.f.read(*args)
	2246
	2247	def flush(self):
	2248	self.f.flush()
	2249
	2250	def open(self):
	2251	return self.__enter__()
	2252
	2253	def close(self, *args):
	2254	self.__exit__(self, *args, value=False, traceback=False)
	2255
	2256
	2257	def get_filesystem_encoding():
	2258	encoding = sys.getfilesystemencoding()
	2259	return encoding if encoding is not None else 'utf-8'
	2260
	2261
	2262	def shell_quote(args):
	2263	quoted_args = []
	2264	encoding = get_filesystem_encoding()
	2265	for a in args:
	2266	if isinstance(a, bytes):
	2267	# We may get a filename encoded with 'encodeFilename'
	2268	a = a.decode(encoding)
	2269	quoted_args.append(compat_shlex_quote(a))
	2270	return ' '.join(quoted_args)
	2271
	2272
	2273	def smuggle_url(url, data):
	2274	""" Pass additional data in a URL for internal use. """
	2275
	2276	url, idata = unsmuggle_url(url, {})
	2277	data.update(idata)
	2278	sdata = compat_urllib_parse_urlencode(
	2279	{'__youtubedl_smuggle': json.dumps(data)})
	2280	return url + '#' + sdata
	2281
	2282
	2283	def unsmuggle_url(smug_url, default=None):
	2284	if '#__youtubedl_smuggle' not in smug_url:
	2285	return smug_url, default
	2286	url, _, sdata = smug_url.rpartition('#')
	2287	jsond = compat_parse_qs(sdata)['__youtubedl_smuggle'][0]
	2288	data = json.loads(jsond)
	2289	return url, data
	2290
	2291
	2292	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	2293	""" Formats numbers with decimal sufixes like K, M, etc """
	2294	num, factor = float_or_none(num), float(factor)
	2295	if num is None or num < 0:
	2296	return None
	2297	POSSIBLE_SUFFIXES = 'kMGTPEZY'
	2298	exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
	2299	suffix = ['', *POSSIBLE_SUFFIXES][exponent]
	2300	if factor == 1024:
	2301	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	2302	converted = num / (factor ** exponent)
	2303	return fmt % (converted, suffix)
	2304
	2305
	2306	def format_bytes(bytes):
	2307	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	2308
	2309
	2310	def lookup_unit_table(unit_table, s):
	2311	units_re = '\|'.join(re.escape(u) for u in unit_table)
	2312	m = re.match(
	2313	r'(?P<num>[0-9]+(?:[,.][0-9])?)\s(?P<unit>%s)\b' % units_re, s)
	2314	if not m:
	2315	return None
	2316	num_str = m.group('num').replace(',', '.')
	2317	mult = unit_table[m.group('unit')]
	2318	return int(float(num_str) * mult)
	2319
	2320
	2321	def parse_filesize(s):
	2322	if s is None:
	2323	return None
	2324
	2325	# The lower-case forms are of course incorrect and unofficial,
	2326	# but we support those too
	2327	_UNIT_TABLE = {
	2328	'B': 1,
	2329	'b': 1,
	2330	'bytes': 1,
	2331	'KiB': 1024,
	2332	'KB': 1000,
	2333	'kB': 1024,
	2334	'Kb': 1000,
	2335	'kb': 1000,
	2336	'kilobytes': 1000,
	2337	'kibibytes': 1024,
	2338	'MiB': 1024 ** 2,
	2339	'MB': 1000 ** 2,
	2340	'mB': 1024 ** 2,
	2341	'Mb': 1000 ** 2,
	2342	'mb': 1000 ** 2,
	2343	'megabytes': 1000 ** 2,
	2344	'mebibytes': 1024 ** 2,
	2345	'GiB': 1024 ** 3,
	2346	'GB': 1000 ** 3,
	2347	'gB': 1024 ** 3,
	2348	'Gb': 1000 ** 3,
	2349	'gb': 1000 ** 3,
	2350	'gigabytes': 1000 ** 3,
	2351	'gibibytes': 1024 ** 3,
	2352	'TiB': 1024 ** 4,
	2353	'TB': 1000 ** 4,
	2354	'tB': 1024 ** 4,
	2355	'Tb': 1000 ** 4,
	2356	'tb': 1000 ** 4,
	2357	'terabytes': 1000 ** 4,
	2358	'tebibytes': 1024 ** 4,
	2359	'PiB': 1024 ** 5,
	2360	'PB': 1000 ** 5,
	2361	'pB': 1024 ** 5,
	2362	'Pb': 1000 ** 5,
	2363	'pb': 1000 ** 5,
	2364	'petabytes': 1000 ** 5,
	2365	'pebibytes': 1024 ** 5,
	2366	'EiB': 1024 ** 6,
	2367	'EB': 1000 ** 6,
	2368	'eB': 1024 ** 6,
	2369	'Eb': 1000 ** 6,
	2370	'eb': 1000 ** 6,
	2371	'exabytes': 1000 ** 6,
	2372	'exbibytes': 1024 ** 6,
	2373	'ZiB': 1024 ** 7,
	2374	'ZB': 1000 ** 7,
	2375	'zB': 1024 ** 7,
	2376	'Zb': 1000 ** 7,
	2377	'zb': 1000 ** 7,
	2378	'zettabytes': 1000 ** 7,
	2379	'zebibytes': 1024 ** 7,
	2380	'YiB': 1024 ** 8,
	2381	'YB': 1000 ** 8,
	2382	'yB': 1024 ** 8,
	2383	'Yb': 1000 ** 8,
	2384	'yb': 1000 ** 8,
	2385	'yottabytes': 1000 ** 8,
	2386	'yobibytes': 1024 ** 8,
	2387	}
	2388
	2389	return lookup_unit_table(_UNIT_TABLE, s)
	2390
	2391
	2392	def parse_count(s):
	2393	if s is None:
	2394	return None
	2395
	2396	s = re.sub(r'^[^\d]+\s', '', s).strip()
	2397
	2398	if re.match(r'^[\d,.]+$', s):
	2399	return str_to_int(s)
	2400
	2401	_UNIT_TABLE = {
	2402	'k': 1000,
	2403	'K': 1000,
	2404	'm': 1000 ** 2,
	2405	'M': 1000 ** 2,
	2406	'kk': 1000 ** 2,
	2407	'KK': 1000 ** 2,
	2408	'b': 1000 ** 3,
	2409	'B': 1000 ** 3,
	2410	}
	2411
	2412	ret = lookup_unit_table(_UNIT_TABLE, s)
	2413	if ret is not None:
	2414	return ret
	2415
	2416	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	2417	if mobj:
	2418	return str_to_int(mobj.group(1))
	2419
	2420
	2421	def parse_resolution(s):
	2422	if s is None:
	2423	return {}
	2424
	2425	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	2426	if mobj:
	2427	return {
	2428	'width': int(mobj.group('w')),
	2429	'height': int(mobj.group('h')),
	2430	}
	2431
	2432	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	2433	if mobj:
	2434	return {'height': int(mobj.group(1))}
	2435
	2436	mobj = re.search(r'\b([48])[kK]\b', s)
	2437	if mobj:
	2438	return {'height': int(mobj.group(1)) * 540}
	2439
	2440	return {}
	2441
	2442
	2443	def parse_bitrate(s):
	2444	if not isinstance(s, compat_str):
	2445	return
	2446	mobj = re.search(r'\b(\d+)\s*kbps', s)
	2447	if mobj:
	2448	return int(mobj.group(1))
	2449
	2450
	2451	def month_by_name(name, lang='en'):
	2452	""" Return the number of a month by (locale-independently) English name """
	2453
	2454	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	2455
	2456	try:
	2457	return month_names.index(name) + 1
	2458	except ValueError:
	2459	return None
	2460
	2461
	2462	def month_by_abbreviation(abbrev):
	2463	""" Return the number of a month by (locale-independently) English
	2464	abbreviations """
	2465
	2466	try:
	2467	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	2468	except ValueError:
	2469	return None
	2470
	2471
	2472	def fix_xml_ampersands(xml_str):
	2473	"""Replace all the '&' by '&' in XML"""
	2474	return re.sub(
	2475	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	2476	'&',
	2477	xml_str)
	2478
	2479
	2480	def setproctitle(title):
	2481	assert isinstance(title, compat_str)
	2482
	2483	# ctypes in Jython is not complete
	2484	# http://bugs.jython.org/issue2148
	2485	if sys.platform.startswith('java'):
	2486	return
	2487
	2488	try:
	2489	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	2490	except OSError:
	2491	return
	2492	except TypeError:
	2493	# LoadLibrary in Windows Python 2.7.13 only expects
	2494	# a bytestring, but since unicode_literals turns
	2495	# every string into a unicode string, it fails.
	2496	return
	2497	title_bytes = title.encode('utf-8')
	2498	buf = ctypes.create_string_buffer(len(title_bytes))
	2499	buf.value = title_bytes
	2500	try:
	2501	libc.prctl(15, buf, 0, 0, 0)
	2502	except AttributeError:
	2503	return # Strange libc, just skip this
	2504
	2505
	2506	def remove_start(s, start):
	2507	return s[len(start):] if s is not None and s.startswith(start) else s
	2508
	2509
	2510	def remove_end(s, end):
	2511	return s[:-len(end)] if s is not None and s.endswith(end) else s
	2512
	2513
	2514	def remove_quotes(s):
	2515	if s is None or len(s) < 2:
	2516	return s
	2517	for quote in ('"', "'", ):
	2518	if s[0] == quote and s[-1] == quote:
	2519	return s[1:-1]
	2520	return s
	2521
	2522
	2523	def get_domain(url):
	2524	domain = re.match(r'(?:https?:\/\/)?(?:www\.)?(?P<domain>[^\n\/]+\.[^\n\/]+)(?:\/(.*))?', url)
	2525	return domain.group('domain') if domain else None
	2526
	2527
	2528	def url_basename(url):
	2529	path = compat_urlparse.urlparse(url).path
	2530	return path.strip('/').split('/')[-1]
	2531
	2532
	2533	def base_url(url):
	2534	return re.match(r'https?://[^?#&]+/', url).group()
	2535
	2536
	2537	def urljoin(base, path):
	2538	if isinstance(path, bytes):
	2539	path = path.decode('utf-8')
	2540	if not isinstance(path, compat_str) or not path:
	2541	return None
	2542	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	2543	return path
	2544	if isinstance(base, bytes):
	2545	base = base.decode('utf-8')
	2546	if not isinstance(base, compat_str) or not re.match(
	2547	r'^(?:https?:)?//', base):
	2548	return None
	2549	return compat_urlparse.urljoin(base, path)
	2550
	2551
	2552	class HEADRequest(compat_urllib_request.Request):
	2553	def get_method(self):
	2554	return 'HEAD'
	2555
	2556
	2557	class PUTRequest(compat_urllib_request.Request):
	2558	def get_method(self):
	2559	return 'PUT'
	2560
	2561
	2562	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	2563	if get_attr and v is not None:
	2564	v = getattr(v, get_attr, None)
	2565	try:
	2566	return int(v) * invscale // scale
	2567	except (ValueError, TypeError, OverflowError):
	2568	return default
	2569
	2570
	2571	def str_or_none(v, default=None):
	2572	return default if v is None else compat_str(v)
	2573
	2574
	2575	def str_to_int(int_str):
	2576	""" A more relaxed version of int_or_none """
	2577	if isinstance(int_str, compat_integer_types):
	2578	return int_str
	2579	elif isinstance(int_str, compat_str):
	2580	int_str = re.sub(r'[,\.\+]', '', int_str)
	2581	return int_or_none(int_str)
	2582
	2583
	2584	def float_or_none(v, scale=1, invscale=1, default=None):
	2585	if v is None:
	2586	return default
	2587	try:
	2588	return float(v) * invscale / scale
	2589	except (ValueError, TypeError):
	2590	return default
	2591
	2592
	2593	def bool_or_none(v, default=None):
	2594	return v if isinstance(v, bool) else default
	2595
	2596
	2597	def strip_or_none(v, default=None):
	2598	return v.strip() if isinstance(v, compat_str) else default
	2599
	2600
	2601	def url_or_none(url):
	2602	if not url or not isinstance(url, compat_str):
	2603	return None
	2604	url = url.strip()
	2605	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2606
	2607
	2608	def request_to_url(req):
	2609	if isinstance(req, compat_urllib_request.Request):
	2610	return req.get_full_url()
	2611	else:
	2612	return req
	2613
	2614
	2615	def strftime_or_none(timestamp, date_format, default=None):
	2616	datetime_object = None
	2617	try:
	2618	if isinstance(timestamp, compat_numeric_types): # unix timestamp
	2619	datetime_object = datetime.datetime.utcfromtimestamp(timestamp)
	2620	elif isinstance(timestamp, compat_str): # assume YYYYMMDD
	2621	datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
	2622	return datetime_object.strftime(date_format)
	2623	except (ValueError, TypeError, AttributeError):
	2624	return default
	2625
	2626
	2627	def parse_duration(s):
	2628	if not isinstance(s, compat_basestring):
	2629	return None
	2630	s = s.strip()
	2631	if not s:
	2632	return None
	2633
	2634	days, hours, mins, secs, ms = [None] * 5
	2635	m = re.match(r'''(?x)
	2636	(?P<before_secs>
	2637	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2638	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2639	(?P<ms>[.:][0-9]+)?Z?$
	2640	''', s)
	2641	if m:
	2642	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2643	else:
	2644	m = re.match(
	2645	r'''(?ix)(?:P?
	2646	(?:
	2647	[0-9]+\sy(?:ears?)?,?\s
	2648	)?
	2649	(?:
	2650	[0-9]+\sm(?:onths?)?,?\s
	2651	)?
	2652	(?:
	2653	[0-9]+\sw(?:eeks?)?,?\s
	2654	)?
	2655	(?:
	2656	(?P<days>[0-9]+)\sd(?:ays?)?,?\s
	2657	)?
	2658	T)?
	2659	(?:
	2660	(?P<hours>[0-9]+)\sh(?:ours?)?,?\s
	2661	)?
	2662	(?:
	2663	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?,?\s
	2664	)?
	2665	(?:
	2666	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2667	)?Z?$''', s)
	2668	if m:
	2669	days, hours, mins, secs, ms = m.groups()
	2670	else:
	2671	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2672	if m:
	2673	hours, mins = m.groups()
	2674	else:
	2675	return None
	2676
	2677	duration = 0
	2678	if secs:
	2679	duration += float(secs)
	2680	if mins:
	2681	duration += float(mins) * 60
	2682	if hours:
	2683	duration += float(hours) * 60 * 60
	2684	if days:
	2685	duration += float(days) * 24 * 60 * 60
	2686	if ms:
	2687	duration += float(ms.replace(':', '.'))
	2688	return duration
	2689
	2690
	2691	def prepend_extension(filename, ext, expected_real_ext=None):
	2692	name, real_ext = os.path.splitext(filename)
	2693	return (
	2694	'{0}.{1}{2}'.format(name, ext, real_ext)
	2695	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2696	else '{0}.{1}'.format(filename, ext))
	2697
	2698
	2699	def replace_extension(filename, ext, expected_real_ext=None):
	2700	name, real_ext = os.path.splitext(filename)
	2701	return '{0}.{1}'.format(
	2702	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2703	ext)
	2704
	2705
	2706	def check_executable(exe, args=[]):
	2707	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2708	args can be a list of arguments for a short output (like -version) """
	2709	try:
	2710	Popen([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE).communicate_or_kill()
	2711	except OSError:
	2712	return False
	2713	return exe
	2714
	2715
	2716	def _get_exe_version_output(exe, args):
	2717	try:
	2718	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2719	# SIGTTOU if yt-dlp is run in the background.
	2720	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2721	out, _ = Popen(
	2722	[encodeArgument(exe)] + args, stdin=subprocess.PIPE,
	2723	stdout=subprocess.PIPE, stderr=subprocess.STDOUT).communicate_or_kill()
	2724	except OSError:
	2725	return False
	2726	if isinstance(out, bytes): # Python 2.x
	2727	out = out.decode('ascii', 'ignore')
	2728	return out
	2729
	2730
	2731	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2732	assert isinstance(output, compat_str)
	2733	if version_re is None:
	2734	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2735	m = re.search(version_re, output)
	2736	if m:
	2737	return m.group(1)
	2738	else:
	2739	return unrecognized
	2740
	2741
	2742	def get_exe_version(exe, args=['--version'],
	2743	version_re=None, unrecognized='present'):
	2744	""" Returns the version of the specified executable,
	2745	or False if the executable is not present """
	2746	out = _get_exe_version_output(exe, args)
	2747	return detect_exe_version(out, version_re, unrecognized) if out else False
	2748
	2749
	2750	class LazyList(collections.abc.Sequence):
	2751	''' Lazy immutable list from an iterable
	2752	Note that slices of a LazyList are lists and not LazyList'''
	2753
	2754	class IndexError(IndexError):
	2755	pass
	2756
	2757	def __init__(self, iterable, *, reverse=False, _cache=None):
	2758	self.__iterable = iter(iterable)
	2759	self.__cache = [] if _cache is None else _cache
	2760	self.__reversed = reverse
	2761
	2762	def __iter__(self):
	2763	if self.__reversed:
	2764	# We need to consume the entire iterable to iterate in reverse
	2765	yield from self.exhaust()
	2766	return
	2767	yield from self.__cache
	2768	for item in self.__iterable:
	2769	self.__cache.append(item)
	2770	yield item
	2771
	2772	def __exhaust(self):
	2773	self.__cache.extend(self.__iterable)
	2774	# Discard the emptied iterable to make it pickle-able
	2775	self.__iterable = []
	2776	return self.__cache
	2777
	2778	def exhaust(self):
	2779	''' Evaluate the entire iterable '''
	2780	return self.__exhaust()[::-1 if self.__reversed else 1]
	2781
	2782	@staticmethod
	2783	def __reverse_index(x):
	2784	return None if x is None else -(x + 1)
	2785
	2786	def __getitem__(self, idx):
	2787	if isinstance(idx, slice):
	2788	if self.__reversed:
	2789	idx = slice(self.__reverse_index(idx.start), self.__reverse_index(idx.stop), -(idx.step or 1))
	2790	start, stop, step = idx.start, idx.stop, idx.step or 1
	2791	elif isinstance(idx, int):
	2792	if self.__reversed:
	2793	idx = self.__reverse_index(idx)
	2794	start, stop, step = idx, idx, 0
	2795	else:
	2796	raise TypeError('indices must be integers or slices')
	2797	if ((start or 0) < 0 or (stop or 0) < 0
	2798	or (start is None and step < 0)
	2799	or (stop is None and step > 0)):
	2800	# We need to consume the entire iterable to be able to slice from the end
	2801	# Obviously, never use this with infinite iterables
	2802	self.__exhaust()
	2803	try:
	2804	return self.__cache[idx]
	2805	except IndexError as e:
	2806	raise self.IndexError(e) from e
	2807	n = max(start or 0, stop or 0) - len(self.__cache) + 1
	2808	if n > 0:
	2809	self.__cache.extend(itertools.islice(self.__iterable, n))
	2810	try:
	2811	return self.__cache[idx]
	2812	except IndexError as e:
	2813	raise self.IndexError(e) from e
	2814
	2815	def __bool__(self):
	2816	try:
	2817	self[-1] if self.__reversed else self[0]
	2818	except self.IndexError:
	2819	return False
	2820	return True
	2821
	2822	def __len__(self):
	2823	self.__exhaust()
	2824	return len(self.__cache)
	2825
	2826	def __reversed__(self):
	2827	return type(self)(self.__iterable, reverse=not self.__reversed, _cache=self.__cache)
	2828
	2829	def __copy__(self):
	2830	return type(self)(self.__iterable, reverse=self.__reversed, _cache=self.__cache)
	2831
	2832	def __repr__(self):
	2833	# repr and str should mimic a list. So we exhaust the iterable
	2834	return repr(self.exhaust())
	2835
	2836	def __str__(self):
	2837	return repr(self.exhaust())
	2838
	2839
	2840	class PagedList:
	2841
	2842	class IndexError(IndexError):
	2843	pass
	2844
	2845	def __len__(self):
	2846	# This is only useful for tests
	2847	return len(self.getslice())
	2848
	2849	def __init__(self, pagefunc, pagesize, use_cache=True):
	2850	self._pagefunc = pagefunc
	2851	self._pagesize = pagesize
	2852	self._pagecount = float('inf')
	2853	self._use_cache = use_cache
	2854	self._cache = {}
	2855
	2856	def getpage(self, pagenum):
	2857	page_results = self._cache.get(pagenum)
	2858	if page_results is None:
	2859	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
	2860	if self._use_cache:
	2861	self._cache[pagenum] = page_results
	2862	return page_results
	2863
	2864	def getslice(self, start=0, end=None):
	2865	return list(self._getslice(start, end))
	2866
	2867	def _getslice(self, start, end):
	2868	raise NotImplementedError('This method must be implemented by subclasses')
	2869
	2870	def __getitem__(self, idx):
	2871	assert self._use_cache, 'Indexing PagedList requires cache'
	2872	if not isinstance(idx, int) or idx < 0:
	2873	raise TypeError('indices must be non-negative integers')
	2874	entries = self.getslice(idx, idx + 1)
	2875	if not entries:
	2876	raise self.IndexError()
	2877	return entries[0]
	2878
	2879
	2880	class OnDemandPagedList(PagedList):
	2881	def _getslice(self, start, end):
	2882	for pagenum in itertools.count(start // self._pagesize):
	2883	firstid = pagenum * self._pagesize
	2884	nextfirstid = pagenum * self._pagesize + self._pagesize
	2885	if start >= nextfirstid:
	2886	continue
	2887
	2888	startv = (
	2889	start % self._pagesize
	2890	if firstid <= start < nextfirstid
	2891	else 0)
	2892	endv = (
	2893	((end - 1) % self._pagesize) + 1
	2894	if (end is not None and firstid <= end <= nextfirstid)
	2895	else None)
	2896
	2897	try:
	2898	page_results = self.getpage(pagenum)
	2899	except Exception:
	2900	self._pagecount = pagenum - 1
	2901	raise
	2902	if startv != 0 or endv is not None:
	2903	page_results = page_results[startv:endv]
	2904	yield from page_results
	2905
	2906	# A little optimization - if current page is not "full", ie. does
	2907	# not contain page_size videos then we can assume that this page
	2908	# is the last one - there are no more ids on further pages -
	2909	# i.e. no need to query again.
	2910	if len(page_results) + startv < self._pagesize:
	2911	break
	2912
	2913	# If we got the whole page, but the next page is not interesting,
	2914	# break out early as well
	2915	if end == nextfirstid:
	2916	break
	2917
	2918
	2919	class InAdvancePagedList(PagedList):
	2920	def __init__(self, pagefunc, pagecount, pagesize):
	2921	PagedList.__init__(self, pagefunc, pagesize, True)
	2922	self._pagecount = pagecount
	2923
	2924	def _getslice(self, start, end):
	2925	start_page = start // self._pagesize
	2926	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2927	skip_elems = start - start_page * self._pagesize
	2928	only_more = None if end is None else end - start
	2929	for pagenum in range(start_page, end_page):
	2930	page_results = self.getpage(pagenum)
	2931	if skip_elems:
	2932	page_results = page_results[skip_elems:]
	2933	skip_elems = None
	2934	if only_more is not None:
	2935	if len(page_results) < only_more:
	2936	only_more -= len(page_results)
	2937	else:
	2938	yield from page_results[:only_more]
	2939	break
	2940	yield from page_results
	2941
	2942
	2943	def uppercase_escape(s):
	2944	unicode_escape = codecs.getdecoder('unicode_escape')
	2945	return re.sub(
	2946	r'\\U[0-9a-fA-F]{8}',
	2947	lambda m: unicode_escape(m.group(0))[0],
	2948	s)
	2949
	2950
	2951	def lowercase_escape(s):
	2952	unicode_escape = codecs.getdecoder('unicode_escape')
	2953	return re.sub(
	2954	r'\\u[0-9a-fA-F]{4}',
	2955	lambda m: unicode_escape(m.group(0))[0],
	2956	s)
	2957
	2958
	2959	def escape_rfc3986(s):
	2960	"""Escape non-ASCII characters as suggested by RFC 3986"""
	2961	if sys.version_info < (3, 0) and isinstance(s, compat_str):
	2962	s = s.encode('utf-8')
	2963	return compat_urllib_parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	2964
	2965
	2966	def escape_url(url):
	2967	"""Escape URL as suggested by RFC 3986"""
	2968	url_parsed = compat_urllib_parse_urlparse(url)
	2969	return url_parsed._replace(
	2970	netloc=url_parsed.netloc.encode('idna').decode('ascii'),
	2971	path=escape_rfc3986(url_parsed.path),
	2972	params=escape_rfc3986(url_parsed.params),
	2973	query=escape_rfc3986(url_parsed.query),
	2974	fragment=escape_rfc3986(url_parsed.fragment)
	2975	).geturl()
	2976
	2977
	2978	def parse_qs(url):
	2979	return compat_parse_qs(compat_urllib_parse_urlparse(url).query)
	2980
	2981
	2982	def read_batch_urls(batch_fd):
	2983	def fixup(url):
	2984	if not isinstance(url, compat_str):
	2985	url = url.decode('utf-8', 'replace')
	2986	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2987	for bom in BOM_UTF8:
	2988	if url.startswith(bom):
	2989	url = url[len(bom):]
	2990	url = url.lstrip()
	2991	if not url or url.startswith(('#', ';', ']')):
	2992	return False
	2993	# "#" cannot be stripped out since it is part of the URI
	2994	# However, it can be safely stipped out if follwing a whitespace
	2995	return re.split(r'\s#', url, 1)[0].rstrip()
	2996
	2997	with contextlib.closing(batch_fd) as fd:
	2998	return [url for url in map(fixup, fd) if url]
	2999
	3000
	3001	def urlencode_postdata(args, *kargs):
	3002	return compat_urllib_parse_urlencode(args, *kargs).encode('ascii')
	3003
	3004
	3005	def update_url_query(url, query):
	3006	if not query:
	3007	return url
	3008	parsed_url = compat_urlparse.urlparse(url)
	3009	qs = compat_parse_qs(parsed_url.query)
	3010	qs.update(query)
	3011	return compat_urlparse.urlunparse(parsed_url._replace(
	3012	query=compat_urllib_parse_urlencode(qs, True)))
	3013
	3014
	3015	def update_Request(req, url=None, data=None, headers={}, query={}):
	3016	req_headers = req.headers.copy()
	3017	req_headers.update(headers)
	3018	req_data = data or req.data
	3019	req_url = update_url_query(url or req.get_full_url(), query)
	3020	req_get_method = req.get_method()
	3021	if req_get_method == 'HEAD':
	3022	req_type = HEADRequest
	3023	elif req_get_method == 'PUT':
	3024	req_type = PUTRequest
	3025	else:
	3026	req_type = compat_urllib_request.Request
	3027	new_req = req_type(
	3028	req_url, data=req_data, headers=req_headers,
	3029	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	3030	if hasattr(req, 'timeout'):
	3031	new_req.timeout = req.timeout
	3032	return new_req
	3033
	3034
	3035	def _multipart_encode_impl(data, boundary):
	3036	content_type = 'multipart/form-data; boundary=%s' % boundary
	3037
	3038	out = b''
	3039	for k, v in data.items():
	3040	out += b'--' + boundary.encode('ascii') + b'\r\n'
	3041	if isinstance(k, compat_str):
	3042	k = k.encode('utf-8')
	3043	if isinstance(v, compat_str):
	3044	v = v.encode('utf-8')
	3045	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	3046	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	3047	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	3048	if boundary.encode('ascii') in content:
	3049	raise ValueError('Boundary overlaps with data')
	3050	out += content
	3051
	3052	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	3053
	3054	return out, content_type
	3055
	3056
	3057	def multipart_encode(data, boundary=None):
	3058	'''
	3059	Encode a dict to RFC 7578-compliant form-data
	3060
	3061	data:
	3062	A dict where keys and values can be either Unicode or bytes-like
	3063	objects.
	3064	boundary:
	3065	If specified a Unicode object, it's used as the boundary. Otherwise
	3066	a random boundary is generated.
	3067
	3068	Reference: https://tools.ietf.org/html/rfc7578
	3069	'''
	3070	has_specified_boundary = boundary is not None
	3071
	3072	while True:
	3073	if boundary is None:
	3074	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	3075
	3076	try:
	3077	out, content_type = _multipart_encode_impl(data, boundary)
	3078	break
	3079	except ValueError:
	3080	if has_specified_boundary:
	3081	raise
	3082	boundary = None
	3083
	3084	return out, content_type
	3085
	3086
	3087	def dict_get(d, key_or_keys, default=None, skip_false_values=True):
	3088	if isinstance(key_or_keys, (list, tuple)):
	3089	for key in key_or_keys:
	3090	if key not in d or d[key] is None or skip_false_values and not d[key]:
	3091	continue
	3092	return d[key]
	3093	return default
	3094	return d.get(key_or_keys, default)
	3095
	3096
	3097	def try_get(src, getter, expected_type=None):
	3098	for get in variadic(getter):
	3099	try:
	3100	v = get(src)
	3101	except (AttributeError, KeyError, TypeError, IndexError):
	3102	pass
	3103	else:
	3104	if expected_type is None or isinstance(v, expected_type):
	3105	return v
	3106
	3107
	3108	def merge_dicts(*dicts):
	3109	merged = {}
	3110	for a_dict in dicts:
	3111	for k, v in a_dict.items():
	3112	if v is None:
	3113	continue
	3114	if (k not in merged
	3115	or (isinstance(v, compat_str) and v
	3116	and isinstance(merged[k], compat_str)
	3117	and not merged[k])):
	3118	merged[k] = v
	3119	return merged
	3120
	3121
	3122	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	3123	return string if isinstance(string, compat_str) else compat_str(string, encoding, errors)
	3124
	3125
	3126	US_RATINGS = {
	3127	'G': 0,
	3128	'PG': 10,
	3129	'PG-13': 13,
	3130	'R': 16,
	3131	'NC': 18,
	3132	}
	3133
	3134
	3135	TV_PARENTAL_GUIDELINES = {
	3136	'TV-Y': 0,
	3137	'TV-Y7': 7,
	3138	'TV-G': 0,
	3139	'TV-PG': 0,
	3140	'TV-14': 14,
	3141	'TV-MA': 17,
	3142	}
	3143
	3144
	3145	def parse_age_limit(s):
	3146	if type(s) == int:
	3147	return s if 0 <= s <= 21 else None
	3148	if not isinstance(s, compat_basestring):
	3149	return None
	3150	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	3151	if m:
	3152	return int(m.group('age'))
	3153	s = s.upper()
	3154	if s in US_RATINGS:
	3155	return US_RATINGS[s]
	3156	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	3157	if m:
	3158	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	3159	return None
	3160
	3161
	3162	def strip_jsonp(code):
	3163	return re.sub(
	3164	r'''(?sx)^
	3165	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	3166	(?:\s&&\s(?P=func_name))?
	3167	\s$\s(?P<callback_data>.*)$;?
	3168	\s?(?://[^\n])*$''',
	3169	r'\g<callback_data>', code)
	3170
	3171
	3172	def js_to_json(code, vars={}):
	3173	# vars is a dict of var, val pairs to substitute
	3174	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	3175	SKIP_RE = r'\s(?:{comment})?\s'.format(comment=COMMENT_RE)
	3176	INTEGER_TABLE = (
	3177	(r'(?s)^(0[xX][0-9a-fA-F]+){skip}:?$'.format(skip=SKIP_RE), 16),
	3178	(r'(?s)^(0+[0-7]+){skip}:?$'.format(skip=SKIP_RE), 8),
	3179	)
	3180
	3181	def fix_kv(m):
	3182	v = m.group(0)
	3183	if v in ('true', 'false', 'null'):
	3184	return v
	3185	elif v in ('undefined', 'void 0'):
	3186	return 'null'
	3187	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	3188	return ""
	3189
	3190	if v[0] in ("'", '"'):
	3191	v = re.sub(r'(?s)\\.\|"', lambda m: {
	3192	'"': '\\"',
	3193	"\\'": "'",
	3194	'\\\n': '',
	3195	'\\x': '\\u00',
	3196	}.get(m.group(0), m.group(0)), v[1:-1])
	3197	else:
	3198	for regex, base in INTEGER_TABLE:
	3199	im = re.match(regex, v)
	3200	if im:
	3201	i = int(im.group(1), base)
	3202	return '"%d":' % i if v.endswith(':') else '%d' % i
	3203
	3204	if v in vars:
	3205	return vars[v]
	3206
	3207	return '"%s"' % v
	3208
	3209	code = re.sub(r'new Date$(".+")$', r'\g<1>', code)
	3210
	3211	return re.sub(r'''(?sx)
	3212	"(?:[^"\\](?:\\\\\|\\['"nurtbfx/\n]))[^"\\]*"\|
	3213	'(?:[^'\\](?:\\\\\|\\['"nurtbfx/\n]))[^'\\]*'\|
	3214	{comment}\|,(?={skip}[\]}}])\|
	3215	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	3216	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{skip}:)?\|
	3217	[0-9]+(?={skip}:)\|
	3218	!+
	3219	'''.format(comment=COMMENT_RE, skip=SKIP_RE), fix_kv, code)
	3220
	3221
	3222	def qualities(quality_ids):
	3223	""" Get a numeric quality value out of a list of possible values """
	3224	def q(qid):
	3225	try:
	3226	return quality_ids.index(qid)
	3227	except ValueError:
	3228	return -1
	3229	return q
	3230
	3231
	3232	POSTPROCESS_WHEN = {'pre_process', 'after_filter', 'before_dl', 'after_move', 'post_process', 'after_video', 'playlist'}
	3233
	3234
	3235	DEFAULT_OUTTMPL = {
	3236	'default': '%(title)s [%(id)s].%(ext)s',
	3237	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	3238	}
	3239	OUTTMPL_TYPES = {
	3240	'chapter': None,
	3241	'subtitle': None,
	3242	'thumbnail': None,
	3243	'description': 'description',
	3244	'annotation': 'annotations.xml',
	3245	'infojson': 'info.json',
	3246	'link': None,
	3247	'pl_video': None,
	3248	'pl_thumbnail': None,
	3249	'pl_description': 'description',
	3250	'pl_infojson': 'info.json',
	3251	}
	3252
	3253	# As of [1] format syntax is:
	3254	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	3255	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	3256	STR_FORMAT_RE_TMPL = r'''(?x)
	3257	(?<!%)(?P<prefix>(?:%%)*)
	3258	%
	3259	(?P<has_key>$(?P<key>{0})$)?
	3260	(?P<format>
	3261	(?P<conversion>[#0\-+ ]+)?
	3262	(?P<min_width>\d+)?
	3263	(?P<precision>\.\d+)?
	3264	(?P<len_mod>[hlL])? # unused in python
	3265	{1} # conversion type
	3266	)
	3267	'''
	3268
	3269
	3270	STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
	3271
	3272
	3273	def limit_length(s, length):
	3274	""" Add ellipses to overly long strings """
	3275	if s is None:
	3276	return None
	3277	ELLIPSES = '...'
	3278	if len(s) > length:
	3279	return s[:length - len(ELLIPSES)] + ELLIPSES
	3280	return s
	3281
	3282
	3283	def version_tuple(v):
	3284	return tuple(int(e) for e in re.split(r'[-.]', v))
	3285
	3286
	3287	def is_outdated_version(version, limit, assume_new=True):
	3288	if not version:
	3289	return not assume_new
	3290	try:
	3291	return version_tuple(version) < version_tuple(limit)
	3292	except ValueError:
	3293	return not assume_new
	3294
	3295
	3296	def ytdl_is_updateable():
	3297	""" Returns if yt-dlp can be updated with -U """
	3298
	3299	from .update import is_non_updateable
	3300
	3301	return not is_non_updateable()
	3302
	3303
	3304	def args_to_str(args):
	3305	# Get a short string representation for a subprocess command
	3306	return ' '.join(compat_shlex_quote(a) for a in args)
	3307
	3308
	3309	def error_to_compat_str(err):
	3310	err_str = str(err)
	3311	# On python 2 error byte string must be decoded with proper
	3312	# encoding rather than ascii
	3313	if sys.version_info[0] < 3:
	3314	err_str = err_str.decode(preferredencoding())
	3315	return err_str
	3316
	3317
	3318	def mimetype2ext(mt):
	3319	if mt is None:
	3320	return None
	3321
	3322	mt, _, params = mt.partition(';')
	3323	mt = mt.strip()
	3324
	3325	FULL_MAP = {
	3326	'audio/mp4': 'm4a',
	3327	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3. Here use .mp3 as
	3328	# it's the most popular one
	3329	'audio/mpeg': 'mp3',
	3330	'audio/x-wav': 'wav',
	3331	'audio/wav': 'wav',
	3332	'audio/wave': 'wav',
	3333	}
	3334
	3335	ext = FULL_MAP.get(mt)
	3336	if ext is not None:
	3337	return ext
	3338
	3339	SUBTYPE_MAP = {
	3340	'3gpp': '3gp',
	3341	'smptett+xml': 'tt',
	3342	'ttaf+xml': 'dfxp',
	3343	'ttml+xml': 'ttml',
	3344	'x-flv': 'flv',
	3345	'x-mp4-fragmented': 'mp4',
	3346	'x-ms-sami': 'sami',
	3347	'x-ms-wmv': 'wmv',
	3348	'mpegurl': 'm3u8',
	3349	'x-mpegurl': 'm3u8',
	3350	'vnd.apple.mpegurl': 'm3u8',
	3351	'dash+xml': 'mpd',
	3352	'f4m+xml': 'f4m',
	3353	'hds+xml': 'f4m',
	3354	'vnd.ms-sstr+xml': 'ism',
	3355	'quicktime': 'mov',
	3356	'mp2t': 'ts',
	3357	'x-wav': 'wav',
	3358	'filmstrip+json': 'fs',
	3359	'svg+xml': 'svg',
	3360	}
	3361
	3362	_, _, subtype = mt.rpartition('/')
	3363	ext = SUBTYPE_MAP.get(subtype.lower())
	3364	if ext is not None:
	3365	return ext
	3366
	3367	SUFFIX_MAP = {
	3368	'json': 'json',
	3369	'xml': 'xml',
	3370	'zip': 'zip',
	3371	'gzip': 'gz',
	3372	}
	3373
	3374	_, _, suffix = subtype.partition('+')
	3375	ext = SUFFIX_MAP.get(suffix)
	3376	if ext is not None:
	3377	return ext
	3378
	3379	return subtype.replace('+', '.')
	3380
	3381
	3382	def ext2mimetype(ext_or_url):
	3383	if not ext_or_url:
	3384	return None
	3385	if '.' not in ext_or_url:
	3386	ext_or_url = f'file.{ext_or_url}'
	3387	return mimetypes.guess_type(ext_or_url)[0]
	3388
	3389
	3390	def parse_codecs(codecs_str):
	3391	# http://tools.ietf.org/html/rfc6381
	3392	if not codecs_str:
	3393	return {}
	3394	split_codecs = list(filter(None, map(
	3395	str.strip, codecs_str.strip().strip(',').split(','))))
	3396	vcodec, acodec, tcodec, hdr = None, None, None, None
	3397	for full_codec in split_codecs:
	3398	parts = full_codec.split('.')
	3399	codec = parts[0].replace('0', '')
	3400	if codec in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	3401	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	3402	if not vcodec:
	3403	vcodec = '.'.join(parts[:4]) if codec in ('vp9', 'av1', 'hvc1') else full_codec
	3404	if codec in ('dvh1', 'dvhe'):
	3405	hdr = 'DV'
	3406	elif codec == 'av1' and len(parts) > 3 and parts[3] == '10':
	3407	hdr = 'HDR10'
	3408	elif full_codec.replace('0', '').startswith('vp9.2'):
	3409	hdr = 'HDR10'
	3410	elif codec in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3411	if not acodec:
	3412	acodec = full_codec
	3413	elif codec in ('stpp', 'wvtt',):
	3414	if not tcodec:
	3415	tcodec = full_codec
	3416	else:
	3417	write_string('WARNING: Unknown codec %s\n' % full_codec, sys.stderr)
	3418	if vcodec or acodec or tcodec:
	3419	return {
	3420	'vcodec': vcodec or 'none',
	3421	'acodec': acodec or 'none',
	3422	'dynamic_range': hdr,
	3423	**({'tcodec': tcodec} if tcodec is not None else {}),
	3424	}
	3425	elif len(split_codecs) == 2:
	3426	return {
	3427	'vcodec': split_codecs[0],
	3428	'acodec': split_codecs[1],
	3429	}
	3430	return {}
	3431
	3432
	3433	def urlhandle_detect_ext(url_handle):
	3434	getheader = url_handle.headers.get
	3435
	3436	cd = getheader('Content-Disposition')
	3437	if cd:
	3438	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3439	if m:
	3440	e = determine_ext(m.group('filename'), default_ext=None)
	3441	if e:
	3442	return e
	3443
	3444	return mimetype2ext(getheader('Content-Type'))
	3445
	3446
	3447	def encode_data_uri(data, mime_type):
	3448	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3449
	3450
	3451	def age_restricted(content_limit, age_limit):
	3452	""" Returns True iff the content should be blocked """
	3453
	3454	if age_limit is None: # No limit set
	3455	return False
	3456	if content_limit is None:
	3457	return False # Content available for everyone
	3458	return age_limit < content_limit
	3459
	3460
	3461	def is_html(first_bytes):
	3462	""" Detect whether a file contains HTML by examining its first bytes. """
	3463
	3464	BOMS = [
	3465	(b'\xef\xbb\xbf', 'utf-8'),
	3466	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3467	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3468	(b'\xff\xfe', 'utf-16-le'),
	3469	(b'\xfe\xff', 'utf-16-be'),
	3470	]
	3471	for bom, enc in BOMS:
	3472	if first_bytes.startswith(bom):
	3473	s = first_bytes[len(bom):].decode(enc, 'replace')
	3474	break
	3475	else:
	3476	s = first_bytes.decode('utf-8', 'replace')
	3477
	3478	return re.match(r'^\s*<', s)
	3479
	3480
	3481	def determine_protocol(info_dict):
	3482	protocol = info_dict.get('protocol')
	3483	if protocol is not None:
	3484	return protocol
	3485
	3486	url = sanitize_url(info_dict['url'])
	3487	if url.startswith('rtmp'):
	3488	return 'rtmp'
	3489	elif url.startswith('mms'):
	3490	return 'mms'
	3491	elif url.startswith('rtsp'):
	3492	return 'rtsp'
	3493
	3494	ext = determine_ext(url)
	3495	if ext == 'm3u8':
	3496	return 'm3u8'
	3497	elif ext == 'f4m':
	3498	return 'f4m'
	3499
	3500	return compat_urllib_parse_urlparse(url).scheme
	3501
	3502
	3503	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3504	""" Render a list of rows, each as a list of values.
	3505	Text after a \t will be right aligned """
	3506	def width(string):
	3507	return len(remove_terminal_sequences(string).replace('\t', ''))
	3508
	3509	def get_max_lens(table):
	3510	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3511
	3512	def filter_using_list(row, filterArray):
	3513	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3514
	3515	max_lens = get_max_lens(data) if hide_empty else []
	3516	header_row = filter_using_list(header_row, max_lens)
	3517	data = [filter_using_list(row, max_lens) for row in data]
	3518
	3519	table = [header_row] + data
	3520	max_lens = get_max_lens(table)
	3521	extra_gap += 1
	3522	if delim:
	3523	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3524	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
	3525	for row in table:
	3526	for pos, text in enumerate(map(str, row)):
	3527	if '\t' in text:
	3528	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3529	else:
	3530	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3531	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3532	return ret
	3533
	3534
	3535	def _match_one(filter_part, dct, incomplete):
	3536	# TODO: Generalize code with YoutubeDL._build_format_filter
	3537	STRING_OPERATORS = {
	3538	'*=': operator.contains,
	3539	'^=': lambda attr, value: attr.startswith(value),
	3540	'$=': lambda attr, value: attr.endswith(value),
	3541	'~=': lambda attr, value: re.search(value, attr),
	3542	}
	3543	COMPARISON_OPERATORS = {
	3544	**STRING_OPERATORS,
	3545	'<=': operator.le, # "<=" must be defined above "<"
	3546	'<': operator.lt,
	3547	'>=': operator.ge,
	3548	'>': operator.gt,
	3549	'=': operator.eq,
	3550	}
	3551
	3552	if isinstance(incomplete, bool):
	3553	is_incomplete = lambda _: incomplete
	3554	else:
	3555	is_incomplete = lambda k: k in incomplete
	3556
	3557	operator_rex = re.compile(r'''(?x)\s*
	3558	(?P<key>[a-z_]+)
	3559	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3560	(?:
	3561	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3562	(?P<strval>.+?)
	3563	)
	3564	\s*$
	3565	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3566	m = operator_rex.search(filter_part)
	3567	if m:
	3568	m = m.groupdict()
	3569	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3570	if m['negation']:
	3571	op = lambda attr, value: not unnegated_op(attr, value)
	3572	else:
	3573	op = unnegated_op
	3574	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3575	if m['quote']:
	3576	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3577	actual_value = dct.get(m['key'])
	3578	numeric_comparison = None
	3579	if isinstance(actual_value, compat_numeric_types):
	3580	# If the original field is a string and matching comparisonvalue is
	3581	# a number we should respect the origin of the original field
	3582	# and process comparison value as a string (see
	3583	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3584	try:
	3585	numeric_comparison = int(comparison_value)
	3586	except ValueError:
	3587	numeric_comparison = parse_filesize(comparison_value)
	3588	if numeric_comparison is None:
	3589	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3590	if numeric_comparison is None:
	3591	numeric_comparison = parse_duration(comparison_value)
	3592	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3593	raise ValueError('Operator %s only supports string values!' % m['op'])
	3594	if actual_value is None:
	3595	return is_incomplete(m['key']) or m['none_inclusive']
	3596	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3597
	3598	UNARY_OPERATORS = {
	3599	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3600	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3601	}
	3602	operator_rex = re.compile(r'''(?x)\s*
	3603	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3604	\s*$
	3605	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3606	m = operator_rex.search(filter_part)
	3607	if m:
	3608	op = UNARY_OPERATORS[m.group('op')]
	3609	actual_value = dct.get(m.group('key'))
	3610	if is_incomplete(m.group('key')) and actual_value is None:
	3611	return True
	3612	return op(actual_value)
	3613
	3614	raise ValueError('Invalid filter part %r' % filter_part)
	3615
	3616
	3617	def match_str(filter_str, dct, incomplete=False):
	3618	""" Filter a dictionary with a simple string syntax.
	3619	@returns Whether the filter passes
	3620	@param incomplete Set of keys that is expected to be missing from dct.
	3621	Can be True/False to indicate all/none of the keys may be missing.
	3622	All conditions on incomplete keys pass if the key is missing
	3623	"""
	3624	return all(
	3625	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3626	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3627
	3628
	3629	def match_filter_func(filters):
	3630	if not filters:
	3631	return None
	3632	filters = variadic(filters)
	3633
	3634	def _match_func(info_dict, args, *kwargs):
	3635	if any(match_str(f, info_dict, args, *kwargs) for f in filters):
	3636	return None
	3637	else:
	3638	video_title = info_dict.get('title') or info_dict.get('id') or 'video'
	3639	filter_str = ') \| ('.join(map(str.strip, filters))
	3640	return f'{video_title} does not pass filter ({filter_str}), skipping ..'
	3641	return _match_func
	3642
	3643
	3644	def parse_dfxp_time_expr(time_expr):
	3645	if not time_expr:
	3646	return
	3647
	3648	mobj = re.match(r'^(?P<time_offset>\d+(?:\.\d+)?)s?$', time_expr)
	3649	if mobj:
	3650	return float(mobj.group('time_offset'))
	3651
	3652	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3653	if mobj:
	3654	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3655
	3656
	3657	def srt_subtitles_timecode(seconds):
	3658	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3659
	3660
	3661	def ass_subtitles_timecode(seconds):
	3662	time = timetuple_from_msec(seconds * 1000)
	3663	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3664
	3665
	3666	def dfxp2srt(dfxp_data):
	3667	'''
	3668	@param dfxp_data A bytes-like object containing DFXP data
	3669	@returns A unicode object containing converted SRT data
	3670	'''
	3671	LEGACY_NAMESPACES = (
	3672	(b'http://www.w3.org/ns/ttml', [
	3673	b'http://www.w3.org/2004/11/ttaf1',
	3674	b'http://www.w3.org/2006/04/ttaf1',
	3675	b'http://www.w3.org/2006/10/ttaf1',
	3676	]),
	3677	(b'http://www.w3.org/ns/ttml#styling', [
	3678	b'http://www.w3.org/ns/ttml#style',
	3679	]),
	3680	)
	3681
	3682	SUPPORTED_STYLING = [
	3683	'color',
	3684	'fontFamily',
	3685	'fontSize',
	3686	'fontStyle',
	3687	'fontWeight',
	3688	'textDecoration'
	3689	]
	3690
	3691	_x = functools.partial(xpath_with_ns, ns_map={
	3692	'xml': 'http://www.w3.org/XML/1998/namespace',
	3693	'ttml': 'http://www.w3.org/ns/ttml',
	3694	'tts': 'http://www.w3.org/ns/ttml#styling',
	3695	})
	3696
	3697	styles = {}
	3698	default_style = {}
	3699
	3700	class TTMLPElementParser(object):
	3701	_out = ''
	3702	_unclosed_elements = []
	3703	_applied_styles = []
	3704
	3705	def start(self, tag, attrib):
	3706	if tag in (_x('ttml:br'), 'br'):
	3707	self._out += '\n'
	3708	else:
	3709	unclosed_elements = []
	3710	style = {}
	3711	element_style_id = attrib.get('style')
	3712	if default_style:
	3713	style.update(default_style)
	3714	if element_style_id:
	3715	style.update(styles.get(element_style_id, {}))
	3716	for prop in SUPPORTED_STYLING:
	3717	prop_val = attrib.get(_x('tts:' + prop))
	3718	if prop_val:
	3719	style[prop] = prop_val
	3720	if style:
	3721	font = ''
	3722	for k, v in sorted(style.items()):
	3723	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3724	continue
	3725	if k == 'color':
	3726	font += ' color="%s"' % v
	3727	elif k == 'fontSize':
	3728	font += ' size="%s"' % v
	3729	elif k == 'fontFamily':
	3730	font += ' face="%s"' % v
	3731	elif k == 'fontWeight' and v == 'bold':
	3732	self._out += '<b>'
	3733	unclosed_elements.append('b')
	3734	elif k == 'fontStyle' and v == 'italic':
	3735	self._out += '<i>'
	3736	unclosed_elements.append('i')
	3737	elif k == 'textDecoration' and v == 'underline':
	3738	self._out += '<u>'
	3739	unclosed_elements.append('u')
	3740	if font:
	3741	self._out += '<font' + font + '>'
	3742	unclosed_elements.append('font')
	3743	applied_style = {}
	3744	if self._applied_styles:
	3745	applied_style.update(self._applied_styles[-1])
	3746	applied_style.update(style)
	3747	self._applied_styles.append(applied_style)
	3748	self._unclosed_elements.append(unclosed_elements)
	3749
	3750	def end(self, tag):
	3751	if tag not in (_x('ttml:br'), 'br'):
	3752	unclosed_elements = self._unclosed_elements.pop()
	3753	for element in reversed(unclosed_elements):
	3754	self._out += '</%s>' % element
	3755	if unclosed_elements and self._applied_styles:
	3756	self._applied_styles.pop()
	3757
	3758	def data(self, data):
	3759	self._out += data
	3760
	3761	def close(self):
	3762	return self._out.strip()
	3763
	3764	def parse_node(node):
	3765	target = TTMLPElementParser()
	3766	parser = xml.etree.ElementTree.XMLParser(target=target)
	3767	parser.feed(xml.etree.ElementTree.tostring(node))
	3768	return parser.close()
	3769
	3770	for k, v in LEGACY_NAMESPACES:
	3771	for ns in v:
	3772	dfxp_data = dfxp_data.replace(ns, k)
	3773
	3774	dfxp = compat_etree_fromstring(dfxp_data)
	3775	out = []
	3776	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	3777
	3778	if not paras:
	3779	raise ValueError('Invalid dfxp/TTML subtitle')
	3780
	3781	repeat = False
	3782	while True:
	3783	for style in dfxp.findall(_x('.//ttml:style')):
	3784	style_id = style.get('id') or style.get(_x('xml:id'))
	3785	if not style_id:
	3786	continue
	3787	parent_style_id = style.get('style')
	3788	if parent_style_id:
	3789	if parent_style_id not in styles:
	3790	repeat = True
	3791	continue
	3792	styles[style_id] = styles[parent_style_id].copy()
	3793	for prop in SUPPORTED_STYLING:
	3794	prop_val = style.get(_x('tts:' + prop))
	3795	if prop_val:
	3796	styles.setdefault(style_id, {})[prop] = prop_val
	3797	if repeat:
	3798	repeat = False
	3799	else:
	3800	break
	3801
	3802	for p in ('body', 'div'):
	3803	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3804	if ele is None:
	3805	continue
	3806	style = styles.get(ele.get('style'))
	3807	if not style:
	3808	continue
	3809	default_style.update(style)
	3810
	3811	for para, index in zip(paras, itertools.count(1)):
	3812	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	3813	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	3814	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3815	if begin_time is None:
	3816	continue
	3817	if not end_time:
	3818	if not dur:
	3819	continue
	3820	end_time = begin_time + dur
	3821	out.append('%d\n%s --> %s\n%s\n\n' % (
	3822	index,
	3823	srt_subtitles_timecode(begin_time),
	3824	srt_subtitles_timecode(end_time),
	3825	parse_node(para)))
	3826
	3827	return ''.join(out)
	3828
	3829
	3830	def cli_option(params, command_option, param):
	3831	param = params.get(param)
	3832	if param:
	3833	param = compat_str(param)
	3834	return [command_option, param] if param is not None else []
	3835
	3836
	3837	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3838	param = params.get(param)
	3839	if param is None:
	3840	return []
	3841	assert isinstance(param, bool)
	3842	if separator:
	3843	return [command_option + separator + (true_value if param else false_value)]
	3844	return [command_option, true_value if param else false_value]
	3845
	3846
	3847	def cli_valueless_option(params, command_option, param, expected_value=True):
	3848	param = params.get(param)
	3849	return [command_option] if param == expected_value else []
	3850
	3851
	3852	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	3853	if isinstance(argdict, (list, tuple)): # for backward compatibility
	3854	if use_compat:
	3855	return argdict
	3856	else:
	3857	argdict = None
	3858	if argdict is None:
	3859	return default
	3860	assert isinstance(argdict, dict)
	3861
	3862	assert isinstance(keys, (list, tuple))
	3863	for key_list in keys:
	3864	arg_list = list(filter(
	3865	lambda x: x is not None,
	3866	[argdict.get(key.lower()) for key in variadic(key_list)]))
	3867	if arg_list:
	3868	return [arg for args in arg_list for arg in args]
	3869	return default
	3870
	3871
	3872	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3873	main_key, exe = main_key.lower(), exe.lower()
	3874	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3875	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3876	if root_key in keys:
	3877	if main_key != exe:
	3878	keys.append((main_key, exe))
	3879	keys.append('default')
	3880	else:
	3881	use_compat = False
	3882	return cli_configuration_args(argdict, keys, default, use_compat)
	3883
	3884
	3885	class ISO639Utils(object):
	3886	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3887	_lang_map = {
	3888	'aa': 'aar',
	3889	'ab': 'abk',
	3890	'ae': 'ave',
	3891	'af': 'afr',
	3892	'ak': 'aka',
	3893	'am': 'amh',
	3894	'an': 'arg',
	3895	'ar': 'ara',
	3896	'as': 'asm',
	3897	'av': 'ava',
	3898	'ay': 'aym',
	3899	'az': 'aze',
	3900	'ba': 'bak',
	3901	'be': 'bel',
	3902	'bg': 'bul',
	3903	'bh': 'bih',
	3904	'bi': 'bis',
	3905	'bm': 'bam',
	3906	'bn': 'ben',
	3907	'bo': 'bod',
	3908	'br': 'bre',
	3909	'bs': 'bos',
	3910	'ca': 'cat',
	3911	'ce': 'che',
	3912	'ch': 'cha',
	3913	'co': 'cos',
	3914	'cr': 'cre',
	3915	'cs': 'ces',
	3916	'cu': 'chu',
	3917	'cv': 'chv',
	3918	'cy': 'cym',
	3919	'da': 'dan',
	3920	'de': 'deu',
	3921	'dv': 'div',
	3922	'dz': 'dzo',
	3923	'ee': 'ewe',
	3924	'el': 'ell',
	3925	'en': 'eng',
	3926	'eo': 'epo',
	3927	'es': 'spa',
	3928	'et': 'est',
	3929	'eu': 'eus',
	3930	'fa': 'fas',
	3931	'ff': 'ful',
	3932	'fi': 'fin',
	3933	'fj': 'fij',
	3934	'fo': 'fao',
	3935	'fr': 'fra',
	3936	'fy': 'fry',
	3937	'ga': 'gle',
	3938	'gd': 'gla',
	3939	'gl': 'glg',
	3940	'gn': 'grn',
	3941	'gu': 'guj',
	3942	'gv': 'glv',
	3943	'ha': 'hau',
	3944	'he': 'heb',
	3945	'iw': 'heb', # Replaced by he in 1989 revision
	3946	'hi': 'hin',
	3947	'ho': 'hmo',
	3948	'hr': 'hrv',
	3949	'ht': 'hat',
	3950	'hu': 'hun',
	3951	'hy': 'hye',
	3952	'hz': 'her',
	3953	'ia': 'ina',
	3954	'id': 'ind',
	3955	'in': 'ind', # Replaced by id in 1989 revision
	3956	'ie': 'ile',
	3957	'ig': 'ibo',
	3958	'ii': 'iii',
	3959	'ik': 'ipk',
	3960	'io': 'ido',
	3961	'is': 'isl',
	3962	'it': 'ita',
	3963	'iu': 'iku',
	3964	'ja': 'jpn',
	3965	'jv': 'jav',
	3966	'ka': 'kat',
	3967	'kg': 'kon',
	3968	'ki': 'kik',
	3969	'kj': 'kua',
	3970	'kk': 'kaz',
	3971	'kl': 'kal',
	3972	'km': 'khm',
	3973	'kn': 'kan',
	3974	'ko': 'kor',
	3975	'kr': 'kau',
	3976	'ks': 'kas',
	3977	'ku': 'kur',
	3978	'kv': 'kom',
	3979	'kw': 'cor',
	3980	'ky': 'kir',
	3981	'la': 'lat',
	3982	'lb': 'ltz',
	3983	'lg': 'lug',
	3984	'li': 'lim',
	3985	'ln': 'lin',
	3986	'lo': 'lao',
	3987	'lt': 'lit',
	3988	'lu': 'lub',
	3989	'lv': 'lav',
	3990	'mg': 'mlg',
	3991	'mh': 'mah',
	3992	'mi': 'mri',
	3993	'mk': 'mkd',
	3994	'ml': 'mal',
	3995	'mn': 'mon',
	3996	'mr': 'mar',
	3997	'ms': 'msa',
	3998	'mt': 'mlt',
	3999	'my': 'mya',
	4000	'na': 'nau',
	4001	'nb': 'nob',
	4002	'nd': 'nde',
	4003	'ne': 'nep',
	4004	'ng': 'ndo',
	4005	'nl': 'nld',
	4006	'nn': 'nno',
	4007	'no': 'nor',
	4008	'nr': 'nbl',
	4009	'nv': 'nav',
	4010	'ny': 'nya',
	4011	'oc': 'oci',
	4012	'oj': 'oji',
	4013	'om': 'orm',
	4014	'or': 'ori',
	4015	'os': 'oss',
	4016	'pa': 'pan',
	4017	'pi': 'pli',
	4018	'pl': 'pol',
	4019	'ps': 'pus',
	4020	'pt': 'por',
	4021	'qu': 'que',
	4022	'rm': 'roh',
	4023	'rn': 'run',
	4024	'ro': 'ron',
	4025	'ru': 'rus',
	4026	'rw': 'kin',
	4027	'sa': 'san',
	4028	'sc': 'srd',
	4029	'sd': 'snd',
	4030	'se': 'sme',
	4031	'sg': 'sag',
	4032	'si': 'sin',
	4033	'sk': 'slk',
	4034	'sl': 'slv',
	4035	'sm': 'smo',
	4036	'sn': 'sna',
	4037	'so': 'som',
	4038	'sq': 'sqi',
	4039	'sr': 'srp',
	4040	'ss': 'ssw',
	4041	'st': 'sot',
	4042	'su': 'sun',
	4043	'sv': 'swe',
	4044	'sw': 'swa',
	4045	'ta': 'tam',
	4046	'te': 'tel',
	4047	'tg': 'tgk',
	4048	'th': 'tha',
	4049	'ti': 'tir',
	4050	'tk': 'tuk',
	4051	'tl': 'tgl',
	4052	'tn': 'tsn',
	4053	'to': 'ton',
	4054	'tr': 'tur',
	4055	'ts': 'tso',
	4056	'tt': 'tat',
	4057	'tw': 'twi',
	4058	'ty': 'tah',
	4059	'ug': 'uig',
	4060	'uk': 'ukr',
	4061	'ur': 'urd',
	4062	'uz': 'uzb',
	4063	've': 'ven',
	4064	'vi': 'vie',
	4065	'vo': 'vol',
	4066	'wa': 'wln',
	4067	'wo': 'wol',
	4068	'xh': 'xho',
	4069	'yi': 'yid',
	4070	'ji': 'yid', # Replaced by yi in 1989 revision
	4071	'yo': 'yor',
	4072	'za': 'zha',
	4073	'zh': 'zho',
	4074	'zu': 'zul',
	4075	}
	4076
	4077	@classmethod
	4078	def short2long(cls, code):
	4079	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	4080	return cls._lang_map.get(code[:2])
	4081
	4082	@classmethod
	4083	def long2short(cls, code):
	4084	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	4085	for short_name, long_name in cls._lang_map.items():
	4086	if long_name == code:
	4087	return short_name
	4088
	4089
	4090	class ISO3166Utils(object):
	4091	# From http://data.okfn.org/data/core/country-list
	4092	_country_map = {
	4093	'AF': 'Afghanistan',
	4094	'AX': 'Åland Islands',
	4095	'AL': 'Albania',
	4096	'DZ': 'Algeria',
	4097	'AS': 'American Samoa',
	4098	'AD': 'Andorra',
	4099	'AO': 'Angola',
	4100	'AI': 'Anguilla',
	4101	'AQ': 'Antarctica',
	4102	'AG': 'Antigua and Barbuda',
	4103	'AR': 'Argentina',
	4104	'AM': 'Armenia',
	4105	'AW': 'Aruba',
	4106	'AU': 'Australia',
	4107	'AT': 'Austria',
	4108	'AZ': 'Azerbaijan',
	4109	'BS': 'Bahamas',
	4110	'BH': 'Bahrain',
	4111	'BD': 'Bangladesh',
	4112	'BB': 'Barbados',
	4113	'BY': 'Belarus',
	4114	'BE': 'Belgium',
	4115	'BZ': 'Belize',
	4116	'BJ': 'Benin',
	4117	'BM': 'Bermuda',
	4118	'BT': 'Bhutan',
	4119	'BO': 'Bolivia, Plurinational State of',
	4120	'BQ': 'Bonaire, Sint Eustatius and Saba',
	4121	'BA': 'Bosnia and Herzegovina',
	4122	'BW': 'Botswana',
	4123	'BV': 'Bouvet Island',
	4124	'BR': 'Brazil',
	4125	'IO': 'British Indian Ocean Territory',
	4126	'BN': 'Brunei Darussalam',
	4127	'BG': 'Bulgaria',
	4128	'BF': 'Burkina Faso',
	4129	'BI': 'Burundi',
	4130	'KH': 'Cambodia',
	4131	'CM': 'Cameroon',
	4132	'CA': 'Canada',
	4133	'CV': 'Cape Verde',
	4134	'KY': 'Cayman Islands',
	4135	'CF': 'Central African Republic',
	4136	'TD': 'Chad',
	4137	'CL': 'Chile',
	4138	'CN': 'China',
	4139	'CX': 'Christmas Island',
	4140	'CC': 'Cocos (Keeling) Islands',
	4141	'CO': 'Colombia',
	4142	'KM': 'Comoros',
	4143	'CG': 'Congo',
	4144	'CD': 'Congo, the Democratic Republic of the',
	4145	'CK': 'Cook Islands',
	4146	'CR': 'Costa Rica',
	4147	'CI': 'Côte d\'Ivoire',
	4148	'HR': 'Croatia',
	4149	'CU': 'Cuba',
	4150	'CW': 'Curaçao',
	4151	'CY': 'Cyprus',
	4152	'CZ': 'Czech Republic',
	4153	'DK': 'Denmark',
	4154	'DJ': 'Djibouti',
	4155	'DM': 'Dominica',
	4156	'DO': 'Dominican Republic',
	4157	'EC': 'Ecuador',
	4158	'EG': 'Egypt',
	4159	'SV': 'El Salvador',
	4160	'GQ': 'Equatorial Guinea',
	4161	'ER': 'Eritrea',
	4162	'EE': 'Estonia',
	4163	'ET': 'Ethiopia',
	4164	'FK': 'Falkland Islands (Malvinas)',
	4165	'FO': 'Faroe Islands',
	4166	'FJ': 'Fiji',
	4167	'FI': 'Finland',
	4168	'FR': 'France',
	4169	'GF': 'French Guiana',
	4170	'PF': 'French Polynesia',
	4171	'TF': 'French Southern Territories',
	4172	'GA': 'Gabon',
	4173	'GM': 'Gambia',
	4174	'GE': 'Georgia',
	4175	'DE': 'Germany',
	4176	'GH': 'Ghana',
	4177	'GI': 'Gibraltar',
	4178	'GR': 'Greece',
	4179	'GL': 'Greenland',
	4180	'GD': 'Grenada',
	4181	'GP': 'Guadeloupe',
	4182	'GU': 'Guam',
	4183	'GT': 'Guatemala',
	4184	'GG': 'Guernsey',
	4185	'GN': 'Guinea',
	4186	'GW': 'Guinea-Bissau',
	4187	'GY': 'Guyana',
	4188	'HT': 'Haiti',
	4189	'HM': 'Heard Island and McDonald Islands',
	4190	'VA': 'Holy See (Vatican City State)',
	4191	'HN': 'Honduras',
	4192	'HK': 'Hong Kong',
	4193	'HU': 'Hungary',
	4194	'IS': 'Iceland',
	4195	'IN': 'India',
	4196	'ID': 'Indonesia',
	4197	'IR': 'Iran, Islamic Republic of',
	4198	'IQ': 'Iraq',
	4199	'IE': 'Ireland',
	4200	'IM': 'Isle of Man',
	4201	'IL': 'Israel',
	4202	'IT': 'Italy',
	4203	'JM': 'Jamaica',
	4204	'JP': 'Japan',
	4205	'JE': 'Jersey',
	4206	'JO': 'Jordan',
	4207	'KZ': 'Kazakhstan',
	4208	'KE': 'Kenya',
	4209	'KI': 'Kiribati',
	4210	'KP': 'Korea, Democratic People\'s Republic of',
	4211	'KR': 'Korea, Republic of',
	4212	'KW': 'Kuwait',
	4213	'KG': 'Kyrgyzstan',
	4214	'LA': 'Lao People\'s Democratic Republic',
	4215	'LV': 'Latvia',
	4216	'LB': 'Lebanon',
	4217	'LS': 'Lesotho',
	4218	'LR': 'Liberia',
	4219	'LY': 'Libya',
	4220	'LI': 'Liechtenstein',
	4221	'LT': 'Lithuania',
	4222	'LU': 'Luxembourg',
	4223	'MO': 'Macao',
	4224	'MK': 'Macedonia, the Former Yugoslav Republic of',
	4225	'MG': 'Madagascar',
	4226	'MW': 'Malawi',
	4227	'MY': 'Malaysia',
	4228	'MV': 'Maldives',
	4229	'ML': 'Mali',
	4230	'MT': 'Malta',
	4231	'MH': 'Marshall Islands',
	4232	'MQ': 'Martinique',
	4233	'MR': 'Mauritania',
	4234	'MU': 'Mauritius',
	4235	'YT': 'Mayotte',
	4236	'MX': 'Mexico',
	4237	'FM': 'Micronesia, Federated States of',
	4238	'MD': 'Moldova, Republic of',
	4239	'MC': 'Monaco',
	4240	'MN': 'Mongolia',
	4241	'ME': 'Montenegro',
	4242	'MS': 'Montserrat',
	4243	'MA': 'Morocco',
	4244	'MZ': 'Mozambique',
	4245	'MM': 'Myanmar',
	4246	'NA': 'Namibia',
	4247	'NR': 'Nauru',
	4248	'NP': 'Nepal',
	4249	'NL': 'Netherlands',
	4250	'NC': 'New Caledonia',
	4251	'NZ': 'New Zealand',
	4252	'NI': 'Nicaragua',
	4253	'NE': 'Niger',
	4254	'NG': 'Nigeria',
	4255	'NU': 'Niue',
	4256	'NF': 'Norfolk Island',
	4257	'MP': 'Northern Mariana Islands',
	4258	'NO': 'Norway',
	4259	'OM': 'Oman',
	4260	'PK': 'Pakistan',
	4261	'PW': 'Palau',
	4262	'PS': 'Palestine, State of',
	4263	'PA': 'Panama',
	4264	'PG': 'Papua New Guinea',
	4265	'PY': 'Paraguay',
	4266	'PE': 'Peru',
	4267	'PH': 'Philippines',
	4268	'PN': 'Pitcairn',
	4269	'PL': 'Poland',
	4270	'PT': 'Portugal',
	4271	'PR': 'Puerto Rico',
	4272	'QA': 'Qatar',
	4273	'RE': 'Réunion',
	4274	'RO': 'Romania',
	4275	'RU': 'Russian Federation',
	4276	'RW': 'Rwanda',
	4277	'BL': 'Saint Barthélemy',
	4278	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	4279	'KN': 'Saint Kitts and Nevis',
	4280	'LC': 'Saint Lucia',
	4281	'MF': 'Saint Martin (French part)',
	4282	'PM': 'Saint Pierre and Miquelon',
	4283	'VC': 'Saint Vincent and the Grenadines',
	4284	'WS': 'Samoa',
	4285	'SM': 'San Marino',
	4286	'ST': 'Sao Tome and Principe',
	4287	'SA': 'Saudi Arabia',
	4288	'SN': 'Senegal',
	4289	'RS': 'Serbia',
	4290	'SC': 'Seychelles',
	4291	'SL': 'Sierra Leone',
	4292	'SG': 'Singapore',
	4293	'SX': 'Sint Maarten (Dutch part)',
	4294	'SK': 'Slovakia',
	4295	'SI': 'Slovenia',
	4296	'SB': 'Solomon Islands',
	4297	'SO': 'Somalia',
	4298	'ZA': 'South Africa',
	4299	'GS': 'South Georgia and the South Sandwich Islands',
	4300	'SS': 'South Sudan',
	4301	'ES': 'Spain',
	4302	'LK': 'Sri Lanka',
	4303	'SD': 'Sudan',
	4304	'SR': 'Suriname',
	4305	'SJ': 'Svalbard and Jan Mayen',
	4306	'SZ': 'Swaziland',
	4307	'SE': 'Sweden',
	4308	'CH': 'Switzerland',
	4309	'SY': 'Syrian Arab Republic',
	4310	'TW': 'Taiwan, Province of China',
	4311	'TJ': 'Tajikistan',
	4312	'TZ': 'Tanzania, United Republic of',
	4313	'TH': 'Thailand',
	4314	'TL': 'Timor-Leste',
	4315	'TG': 'Togo',
	4316	'TK': 'Tokelau',
	4317	'TO': 'Tonga',
	4318	'TT': 'Trinidad and Tobago',
	4319	'TN': 'Tunisia',
	4320	'TR': 'Turkey',
	4321	'TM': 'Turkmenistan',
	4322	'TC': 'Turks and Caicos Islands',
	4323	'TV': 'Tuvalu',
	4324	'UG': 'Uganda',
	4325	'UA': 'Ukraine',
	4326	'AE': 'United Arab Emirates',
	4327	'GB': 'United Kingdom',
	4328	'US': 'United States',
	4329	'UM': 'United States Minor Outlying Islands',
	4330	'UY': 'Uruguay',
	4331	'UZ': 'Uzbekistan',
	4332	'VU': 'Vanuatu',
	4333	'VE': 'Venezuela, Bolivarian Republic of',
	4334	'VN': 'Viet Nam',
	4335	'VG': 'Virgin Islands, British',
	4336	'VI': 'Virgin Islands, U.S.',
	4337	'WF': 'Wallis and Futuna',
	4338	'EH': 'Western Sahara',
	4339	'YE': 'Yemen',
	4340	'ZM': 'Zambia',
	4341	'ZW': 'Zimbabwe',
	4342	}
	4343
	4344	@classmethod
	4345	def short2full(cls, code):
	4346	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4347	return cls._country_map.get(code.upper())
	4348
	4349
	4350	class GeoUtils(object):
	4351	# Major IPv4 address blocks per country
	4352	_country_ip_map = {
	4353	'AD': '46.172.224.0/19',
	4354	'AE': '94.200.0.0/13',
	4355	'AF': '149.54.0.0/17',
	4356	'AG': '209.59.64.0/18',
	4357	'AI': '204.14.248.0/21',
	4358	'AL': '46.99.0.0/16',
	4359	'AM': '46.70.0.0/15',
	4360	'AO': '105.168.0.0/13',
	4361	'AP': '182.50.184.0/21',
	4362	'AQ': '23.154.160.0/24',
	4363	'AR': '181.0.0.0/12',
	4364	'AS': '202.70.112.0/20',
	4365	'AT': '77.116.0.0/14',
	4366	'AU': '1.128.0.0/11',
	4367	'AW': '181.41.0.0/18',
	4368	'AX': '185.217.4.0/22',
	4369	'AZ': '5.197.0.0/16',
	4370	'BA': '31.176.128.0/17',
	4371	'BB': '65.48.128.0/17',
	4372	'BD': '114.130.0.0/16',
	4373	'BE': '57.0.0.0/8',
	4374	'BF': '102.178.0.0/15',
	4375	'BG': '95.42.0.0/15',
	4376	'BH': '37.131.0.0/17',
	4377	'BI': '154.117.192.0/18',
	4378	'BJ': '137.255.0.0/16',
	4379	'BL': '185.212.72.0/23',
	4380	'BM': '196.12.64.0/18',
	4381	'BN': '156.31.0.0/16',
	4382	'BO': '161.56.0.0/16',
	4383	'BQ': '161.0.80.0/20',
	4384	'BR': '191.128.0.0/12',
	4385	'BS': '24.51.64.0/18',
	4386	'BT': '119.2.96.0/19',
	4387	'BW': '168.167.0.0/16',
	4388	'BY': '178.120.0.0/13',
	4389	'BZ': '179.42.192.0/18',
	4390	'CA': '99.224.0.0/11',
	4391	'CD': '41.243.0.0/16',
	4392	'CF': '197.242.176.0/21',
	4393	'CG': '160.113.0.0/16',
	4394	'CH': '85.0.0.0/13',
	4395	'CI': '102.136.0.0/14',
	4396	'CK': '202.65.32.0/19',
	4397	'CL': '152.172.0.0/14',
	4398	'CM': '102.244.0.0/14',
	4399	'CN': '36.128.0.0/10',
	4400	'CO': '181.240.0.0/12',
	4401	'CR': '201.192.0.0/12',
	4402	'CU': '152.206.0.0/15',
	4403	'CV': '165.90.96.0/19',
	4404	'CW': '190.88.128.0/17',
	4405	'CY': '31.153.0.0/16',
	4406	'CZ': '88.100.0.0/14',
	4407	'DE': '53.0.0.0/8',
	4408	'DJ': '197.241.0.0/17',
	4409	'DK': '87.48.0.0/12',
	4410	'DM': '192.243.48.0/20',
	4411	'DO': '152.166.0.0/15',
	4412	'DZ': '41.96.0.0/12',
	4413	'EC': '186.68.0.0/15',
	4414	'EE': '90.190.0.0/15',
	4415	'EG': '156.160.0.0/11',
	4416	'ER': '196.200.96.0/20',
	4417	'ES': '88.0.0.0/11',
	4418	'ET': '196.188.0.0/14',
	4419	'EU': '2.16.0.0/13',
	4420	'FI': '91.152.0.0/13',
	4421	'FJ': '144.120.0.0/16',
	4422	'FK': '80.73.208.0/21',
	4423	'FM': '119.252.112.0/20',
	4424	'FO': '88.85.32.0/19',
	4425	'FR': '90.0.0.0/9',
	4426	'GA': '41.158.0.0/15',
	4427	'GB': '25.0.0.0/8',
	4428	'GD': '74.122.88.0/21',
	4429	'GE': '31.146.0.0/16',
	4430	'GF': '161.22.64.0/18',
	4431	'GG': '62.68.160.0/19',
	4432	'GH': '154.160.0.0/12',
	4433	'GI': '95.164.0.0/16',
	4434	'GL': '88.83.0.0/19',
	4435	'GM': '160.182.0.0/15',
	4436	'GN': '197.149.192.0/18',
	4437	'GP': '104.250.0.0/19',
	4438	'GQ': '105.235.224.0/20',
	4439	'GR': '94.64.0.0/13',
	4440	'GT': '168.234.0.0/16',
	4441	'GU': '168.123.0.0/16',
	4442	'GW': '197.214.80.0/20',
	4443	'GY': '181.41.64.0/18',
	4444	'HK': '113.252.0.0/14',
	4445	'HN': '181.210.0.0/16',
	4446	'HR': '93.136.0.0/13',
	4447	'HT': '148.102.128.0/17',
	4448	'HU': '84.0.0.0/14',
	4449	'ID': '39.192.0.0/10',
	4450	'IE': '87.32.0.0/12',
	4451	'IL': '79.176.0.0/13',
	4452	'IM': '5.62.80.0/20',
	4453	'IN': '117.192.0.0/10',
	4454	'IO': '203.83.48.0/21',
	4455	'IQ': '37.236.0.0/14',
	4456	'IR': '2.176.0.0/12',
	4457	'IS': '82.221.0.0/16',
	4458	'IT': '79.0.0.0/10',
	4459	'JE': '87.244.64.0/18',
	4460	'JM': '72.27.0.0/17',
	4461	'JO': '176.29.0.0/16',
	4462	'JP': '133.0.0.0/8',
	4463	'KE': '105.48.0.0/12',
	4464	'KG': '158.181.128.0/17',
	4465	'KH': '36.37.128.0/17',
	4466	'KI': '103.25.140.0/22',
	4467	'KM': '197.255.224.0/20',
	4468	'KN': '198.167.192.0/19',
	4469	'KP': '175.45.176.0/22',
	4470	'KR': '175.192.0.0/10',
	4471	'KW': '37.36.0.0/14',
	4472	'KY': '64.96.0.0/15',
	4473	'KZ': '2.72.0.0/13',
	4474	'LA': '115.84.64.0/18',
	4475	'LB': '178.135.0.0/16',
	4476	'LC': '24.92.144.0/20',
	4477	'LI': '82.117.0.0/19',
	4478	'LK': '112.134.0.0/15',
	4479	'LR': '102.183.0.0/16',
	4480	'LS': '129.232.0.0/17',
	4481	'LT': '78.56.0.0/13',
	4482	'LU': '188.42.0.0/16',
	4483	'LV': '46.109.0.0/16',
	4484	'LY': '41.252.0.0/14',
	4485	'MA': '105.128.0.0/11',
	4486	'MC': '88.209.64.0/18',
	4487	'MD': '37.246.0.0/16',
	4488	'ME': '178.175.0.0/17',
	4489	'MF': '74.112.232.0/21',
	4490	'MG': '154.126.0.0/17',
	4491	'MH': '117.103.88.0/21',
	4492	'MK': '77.28.0.0/15',
	4493	'ML': '154.118.128.0/18',
	4494	'MM': '37.111.0.0/17',
	4495	'MN': '49.0.128.0/17',
	4496	'MO': '60.246.0.0/16',
	4497	'MP': '202.88.64.0/20',
	4498	'MQ': '109.203.224.0/19',
	4499	'MR': '41.188.64.0/18',
	4500	'MS': '208.90.112.0/22',
	4501	'MT': '46.11.0.0/16',
	4502	'MU': '105.16.0.0/12',
	4503	'MV': '27.114.128.0/18',
	4504	'MW': '102.70.0.0/15',
	4505	'MX': '187.192.0.0/11',
	4506	'MY': '175.136.0.0/13',
	4507	'MZ': '197.218.0.0/15',
	4508	'NA': '41.182.0.0/16',
	4509	'NC': '101.101.0.0/18',
	4510	'NE': '197.214.0.0/18',
	4511	'NF': '203.17.240.0/22',
	4512	'NG': '105.112.0.0/12',
	4513	'NI': '186.76.0.0/15',
	4514	'NL': '145.96.0.0/11',
	4515	'NO': '84.208.0.0/13',
	4516	'NP': '36.252.0.0/15',
	4517	'NR': '203.98.224.0/19',
	4518	'NU': '49.156.48.0/22',
	4519	'NZ': '49.224.0.0/14',
	4520	'OM': '5.36.0.0/15',
	4521	'PA': '186.72.0.0/15',
	4522	'PE': '186.160.0.0/14',
	4523	'PF': '123.50.64.0/18',
	4524	'PG': '124.240.192.0/19',
	4525	'PH': '49.144.0.0/13',
	4526	'PK': '39.32.0.0/11',
	4527	'PL': '83.0.0.0/11',
	4528	'PM': '70.36.0.0/20',
	4529	'PR': '66.50.0.0/16',
	4530	'PS': '188.161.0.0/16',
	4531	'PT': '85.240.0.0/13',
	4532	'PW': '202.124.224.0/20',
	4533	'PY': '181.120.0.0/14',
	4534	'QA': '37.210.0.0/15',
	4535	'RE': '102.35.0.0/16',
	4536	'RO': '79.112.0.0/13',
	4537	'RS': '93.86.0.0/15',
	4538	'RU': '5.136.0.0/13',
	4539	'RW': '41.186.0.0/16',
	4540	'SA': '188.48.0.0/13',
	4541	'SB': '202.1.160.0/19',
	4542	'SC': '154.192.0.0/11',
	4543	'SD': '102.120.0.0/13',
	4544	'SE': '78.64.0.0/12',
	4545	'SG': '8.128.0.0/10',
	4546	'SI': '188.196.0.0/14',
	4547	'SK': '78.98.0.0/15',
	4548	'SL': '102.143.0.0/17',
	4549	'SM': '89.186.32.0/19',
	4550	'SN': '41.82.0.0/15',
	4551	'SO': '154.115.192.0/18',
	4552	'SR': '186.179.128.0/17',
	4553	'SS': '105.235.208.0/21',
	4554	'ST': '197.159.160.0/19',
	4555	'SV': '168.243.0.0/16',
	4556	'SX': '190.102.0.0/20',
	4557	'SY': '5.0.0.0/16',
	4558	'SZ': '41.84.224.0/19',
	4559	'TC': '65.255.48.0/20',
	4560	'TD': '154.68.128.0/19',
	4561	'TG': '196.168.0.0/14',
	4562	'TH': '171.96.0.0/13',
	4563	'TJ': '85.9.128.0/18',
	4564	'TK': '27.96.24.0/21',
	4565	'TL': '180.189.160.0/20',
	4566	'TM': '95.85.96.0/19',
	4567	'TN': '197.0.0.0/11',
	4568	'TO': '175.176.144.0/21',
	4569	'TR': '78.160.0.0/11',
	4570	'TT': '186.44.0.0/15',
	4571	'TV': '202.2.96.0/19',
	4572	'TW': '120.96.0.0/11',
	4573	'TZ': '156.156.0.0/14',
	4574	'UA': '37.52.0.0/14',
	4575	'UG': '102.80.0.0/13',
	4576	'US': '6.0.0.0/8',
	4577	'UY': '167.56.0.0/13',
	4578	'UZ': '84.54.64.0/18',
	4579	'VA': '212.77.0.0/19',
	4580	'VC': '207.191.240.0/21',
	4581	'VE': '186.88.0.0/13',
	4582	'VG': '66.81.192.0/20',
	4583	'VI': '146.226.0.0/16',
	4584	'VN': '14.160.0.0/11',
	4585	'VU': '202.80.32.0/20',
	4586	'WF': '117.20.32.0/21',
	4587	'WS': '202.4.32.0/19',
	4588	'YE': '134.35.0.0/16',
	4589	'YT': '41.242.116.0/22',
	4590	'ZA': '41.0.0.0/11',
	4591	'ZM': '102.144.0.0/13',
	4592	'ZW': '102.177.192.0/18',
	4593	}
	4594
	4595	@classmethod
	4596	def random_ipv4(cls, code_or_block):
	4597	if len(code_or_block) == 2:
	4598	block = cls._country_ip_map.get(code_or_block.upper())
	4599	if not block:
	4600	return None
	4601	else:
	4602	block = code_or_block
	4603	addr, preflen = block.split('/')
	4604	addr_min = compat_struct_unpack('!L', socket.inet_aton(addr))[0]
	4605	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4606	return compat_str(socket.inet_ntoa(
	4607	compat_struct_pack('!L', random.randint(addr_min, addr_max))))
	4608
	4609
	4610	class PerRequestProxyHandler(compat_urllib_request.ProxyHandler):
	4611	def __init__(self, proxies=None):
	4612	# Set default handlers
	4613	for type in ('http', 'https'):
	4614	setattr(self, '%s_open' % type,
	4615	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	4616	meth(r, proxy, type))
	4617	compat_urllib_request.ProxyHandler.__init__(self, proxies)
	4618
	4619	def proxy_open(self, req, proxy, type):
	4620	req_proxy = req.headers.get('Ytdl-request-proxy')
	4621	if req_proxy is not None:
	4622	proxy = req_proxy
	4623	del req.headers['Ytdl-request-proxy']
	4624
	4625	if proxy == '__noproxy__':
	4626	return None # No Proxy
	4627	if compat_urlparse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
	4628	req.add_header('Ytdl-socks-proxy', proxy)
	4629	# yt-dlp's http/https handlers do wrapping the socket with socks
	4630	return None
	4631	return compat_urllib_request.ProxyHandler.proxy_open(
	4632	self, req, proxy, type)
	4633
	4634
	4635	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4636	# released into Public Domain
	4637	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4638
	4639	def long_to_bytes(n, blocksize=0):
	4640	"""long_to_bytes(n:long, blocksize:int) : string
	4641	Convert a long integer to a byte string.
	4642
	4643	If optional blocksize is given and greater than zero, pad the front of the
	4644	byte string with binary zeros so that the length is a multiple of
	4645	blocksize.
	4646	"""
	4647	# after much testing, this algorithm was deemed to be the fastest
	4648	s = b''
	4649	n = int(n)
	4650	while n > 0:
	4651	s = compat_struct_pack('>I', n & 0xffffffff) + s
	4652	n = n >> 32
	4653	# strip off leading zeros
	4654	for i in range(len(s)):
	4655	if s[i] != b'\000'[0]:
	4656	break
	4657	else:
	4658	# only happens when n == 0
	4659	s = b'\000'
	4660	i = 0
	4661	s = s[i:]
	4662	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4663	# de-padding being done above, but sigh...
	4664	if blocksize > 0 and len(s) % blocksize:
	4665	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4666	return s
	4667
	4668
	4669	def bytes_to_long(s):
	4670	"""bytes_to_long(string) : long
	4671	Convert a byte string to a long integer.
	4672
	4673	This is (essentially) the inverse of long_to_bytes().
	4674	"""
	4675	acc = 0
	4676	length = len(s)
	4677	if length % 4:
	4678	extra = (4 - length % 4)
	4679	s = b'\000' * extra + s
	4680	length = length + extra
	4681	for i in range(0, length, 4):
	4682	acc = (acc << 32) + compat_struct_unpack('>I', s[i:i + 4])[0]
	4683	return acc
	4684
	4685
	4686	def ohdave_rsa_encrypt(data, exponent, modulus):
	4687	'''
	4688	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4689
	4690	Input:
	4691	data: data to encrypt, bytes-like object
	4692	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4693	Output: hex string of encrypted data
	4694
	4695	Limitation: supports one block encryption only
	4696	'''
	4697
	4698	payload = int(binascii.hexlify(data[::-1]), 16)
	4699	encrypted = pow(payload, exponent, modulus)
	4700	return '%x' % encrypted
	4701
	4702
	4703	def pkcs1pad(data, length):
	4704	"""
	4705	Padding input data with PKCS#1 scheme
	4706
	4707	@param {int[]} data input data
	4708	@param {int} length target length
	4709	@returns {int[]} padded data
	4710	"""
	4711	if len(data) > length - 11:
	4712	raise ValueError('Input data too long for PKCS#1 padding')
	4713
	4714	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4715	return [0, 2] + pseudo_random + [0] + data
	4716
	4717
	4718	def encode_base_n(num, n, table=None):
	4719	FULL_TABLE = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
	4720	if not table:
	4721	table = FULL_TABLE[:n]
	4722
	4723	if n > len(table):
	4724	raise ValueError('base %d exceeds table length %d' % (n, len(table)))
	4725
	4726	if num == 0:
	4727	return table[0]
	4728
	4729	ret = ''
	4730	while num:
	4731	ret = table[num % n] + ret
	4732	num = num // n
	4733	return ret
	4734
	4735
	4736	def decode_packed_codes(code):
	4737	mobj = re.search(PACKED_CODES_RE, code)
	4738	obfuscated_code, base, count, symbols = mobj.groups()
	4739	base = int(base)
	4740	count = int(count)
	4741	symbols = symbols.split('\|')
	4742	symbol_table = {}
	4743
	4744	while count:
	4745	count -= 1
	4746	base_n_count = encode_base_n(count, base)
	4747	symbol_table[base_n_count] = symbols[count] or base_n_count
	4748
	4749	return re.sub(
	4750	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	4751	obfuscated_code)
	4752
	4753
	4754	def caesar(s, alphabet, shift):
	4755	if shift == 0:
	4756	return s
	4757	l = len(alphabet)
	4758	return ''.join(
	4759	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4760	for c in s)
	4761
	4762
	4763	def rot47(s):
	4764	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4765
	4766
	4767	def parse_m3u8_attributes(attrib):
	4768	info = {}
	4769	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4770	if val.startswith('"'):
	4771	val = val[1:-1]
	4772	info[key] = val
	4773	return info
	4774
	4775
	4776	def urshift(val, n):
	4777	return val >> n if val >= 0 else (val + 0x100000000) >> n
	4778
	4779
	4780	# Based on png2str() written by @gdkchan and improved by @yokrysty
	4781	# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
	4782	def decode_png(png_data):
	4783	# Reference: https://www.w3.org/TR/PNG/
	4784	header = png_data[8:]
	4785
	4786	if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
	4787	raise IOError('Not a valid PNG file.')
	4788
	4789	int_map = {1: '>B', 2: '>H', 4: '>I'}
	4790	unpack_integer = lambda x: compat_struct_unpack(int_map[len(x)], x)[0]
	4791
	4792	chunks = []
	4793
	4794	while header:
	4795	length = unpack_integer(header[:4])
	4796	header = header[4:]
	4797
	4798	chunk_type = header[:4]
	4799	header = header[4:]
	4800
	4801	chunk_data = header[:length]
	4802	header = header[length:]
	4803
	4804	header = header[4:] # Skip CRC
	4805
	4806	chunks.append({
	4807	'type': chunk_type,
	4808	'length': length,
	4809	'data': chunk_data
	4810	})
	4811
	4812	ihdr = chunks[0]['data']
	4813
	4814	width = unpack_integer(ihdr[:4])
	4815	height = unpack_integer(ihdr[4:8])
	4816
	4817	idat = b''
	4818
	4819	for chunk in chunks:
	4820	if chunk['type'] == b'IDAT':
	4821	idat += chunk['data']
	4822
	4823	if not idat:
	4824	raise IOError('Unable to read PNG data.')
	4825
	4826	decompressed_data = bytearray(zlib.decompress(idat))
	4827
	4828	stride = width * 3
	4829	pixels = []
	4830
	4831	def _get_pixel(idx):
	4832	x = idx % stride
	4833	y = idx // stride
	4834	return pixels[y][x]
	4835
	4836	for y in range(height):
	4837	basePos = y * (1 + stride)
	4838	filter_type = decompressed_data[basePos]
	4839
	4840	current_row = []
	4841
	4842	pixels.append(current_row)
	4843
	4844	for x in range(stride):
	4845	color = decompressed_data[1 + basePos + x]
	4846	basex = y * stride + x
	4847	left = 0
	4848	up = 0
	4849
	4850	if x > 2:
	4851	left = _get_pixel(basex - 3)
	4852	if y > 0:
	4853	up = _get_pixel(basex - stride)
	4854
	4855	if filter_type == 1: # Sub
	4856	color = (color + left) & 0xff
	4857	elif filter_type == 2: # Up
	4858	color = (color + up) & 0xff
	4859	elif filter_type == 3: # Average
	4860	color = (color + ((left + up) >> 1)) & 0xff
	4861	elif filter_type == 4: # Paeth
	4862	a = left
	4863	b = up
	4864	c = 0
	4865
	4866	if x > 2 and y > 0:
	4867	c = _get_pixel(basex - stride - 3)
	4868
	4869	p = a + b - c
	4870
	4871	pa = abs(p - a)
	4872	pb = abs(p - b)
	4873	pc = abs(p - c)
	4874
	4875	if pa <= pb and pa <= pc:
	4876	color = (color + a) & 0xff
	4877	elif pb <= pc:
	4878	color = (color + b) & 0xff
	4879	else:
	4880	color = (color + c) & 0xff
	4881
	4882	current_row.append(color)
	4883
	4884	return width, height, pixels
	4885
	4886
	4887	def write_xattr(path, key, value):
	4888	# This mess below finds the best xattr tool for the job
	4889	try:
	4890	# try the pyxattr module...
	4891	import xattr
	4892
	4893	if hasattr(xattr, 'set'): # pyxattr
	4894	# Unicode arguments are not supported in python-pyxattr until
	4895	# version 0.5.0
	4896	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4897	pyxattr_required_version = '0.5.0'
	4898	if version_tuple(xattr.__version__) < version_tuple(pyxattr_required_version):
	4899	# TODO: fallback to CLI tools
	4900	raise XAttrUnavailableError(
	4901	'python-pyxattr is detected but is too old. '
	4902	'yt-dlp requires %s or above while your version is %s. '
	4903	'Falling back to other xattr implementations' % (
	4904	pyxattr_required_version, xattr.__version__))
	4905
	4906	setxattr = xattr.set
	4907	else: # xattr
	4908	setxattr = xattr.setxattr
	4909
	4910	try:
	4911	setxattr(path, key, value)
	4912	except EnvironmentError as e:
	4913	raise XAttrMetadataError(e.errno, e.strerror)
	4914
	4915	except ImportError:
	4916	if compat_os_name == 'nt':
	4917	# Write xattrs to NTFS Alternate Data Streams:
	4918	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4919	assert ':' not in key
	4920	assert os.path.exists(path)
	4921
	4922	ads_fn = path + ':' + key
	4923	try:
	4924	with open(ads_fn, 'wb') as f:
	4925	f.write(value)
	4926	except EnvironmentError as e:
	4927	raise XAttrMetadataError(e.errno, e.strerror)
	4928	else:
	4929	user_has_setfattr = check_executable('setfattr', ['--version'])
	4930	user_has_xattr = check_executable('xattr', ['-h'])
	4931
	4932	if user_has_setfattr or user_has_xattr:
	4933
	4934	value = value.decode('utf-8')
	4935	if user_has_setfattr:
	4936	executable = 'setfattr'
	4937	opts = ['-n', key, '-v', value]
	4938	elif user_has_xattr:
	4939	executable = 'xattr'
	4940	opts = ['-w', key, value]
	4941
	4942	cmd = ([encodeFilename(executable, True)]
	4943	+ [encodeArgument(o) for o in opts]
	4944	+ [encodeFilename(path, True)])
	4945
	4946	try:
	4947	p = Popen(
	4948	cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	4949	except EnvironmentError as e:
	4950	raise XAttrMetadataError(e.errno, e.strerror)
	4951	stdout, stderr = p.communicate_or_kill()
	4952	stderr = stderr.decode('utf-8', 'replace')
	4953	if p.returncode != 0:
	4954	raise XAttrMetadataError(p.returncode, stderr)
	4955
	4956	else:
	4957	# On Unix, and can't find pyxattr, setfattr, or xattr.
	4958	if sys.platform.startswith('linux'):
	4959	raise XAttrUnavailableError(
	4960	"Couldn't find a tool to set the xattrs. "
	4961	"Install either the python 'pyxattr' or 'xattr' "
	4962	"modules, or the GNU 'attr' package "
	4963	"(which contains the 'setfattr' tool).")
	4964	else:
	4965	raise XAttrUnavailableError(
	4966	"Couldn't find a tool to set the xattrs. "
	4967	"Install either the python 'xattr' module, "
	4968	"or the 'xattr' binary.")
	4969
	4970
	4971	def random_birthday(year_field, month_field, day_field):
	4972	start_date = datetime.date(1950, 1, 1)
	4973	end_date = datetime.date(1995, 12, 31)
	4974	offset = random.randint(0, (end_date - start_date).days)
	4975	random_date = start_date + datetime.timedelta(offset)
	4976	return {
	4977	year_field: str(random_date.year),
	4978	month_field: str(random_date.month),
	4979	day_field: str(random_date.day),
	4980	}
	4981
	4982
	4983	# Templates for internet shortcut files, which are plain text files.
	4984	DOT_URL_LINK_TEMPLATE = '''
	4985	[InternetShortcut]
	4986	URL=%(url)s
	4987	'''.lstrip()
	4988
	4989	DOT_WEBLOC_LINK_TEMPLATE = '''
	4990	<?xml version="1.0" encoding="UTF-8"?>
	4991	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4992	<plist version="1.0">
	4993	<dict>
	4994	\t<key>URL</key>
	4995	\t<string>%(url)s</string>
	4996	</dict>
	4997	</plist>
	4998	'''.lstrip()
	4999
	5000	DOT_DESKTOP_LINK_TEMPLATE = '''
	5001	[Desktop Entry]
	5002	Encoding=UTF-8
	5003	Name=%(filename)s
	5004	Type=Link
	5005	URL=%(url)s
	5006	Icon=text-html
	5007	'''.lstrip()
	5008
	5009	LINK_TEMPLATES = {
	5010	'url': DOT_URL_LINK_TEMPLATE,
	5011	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	5012	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	5013	}
	5014
	5015
	5016	def iri_to_uri(iri):
	5017	"""
	5018	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	5019
	5020	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	5021	"""
	5022
	5023	iri_parts = compat_urllib_parse_urlparse(iri)
	5024
	5025	if '[' in iri_parts.netloc:
	5026	raise ValueError('IPv6 URIs are not, yet, supported.')
	5027	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	5028
	5029	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	5030
	5031	net_location = ''
	5032	if iri_parts.username:
	5033	net_location += compat_urllib_parse_quote(iri_parts.username, safe=r"!$%&'()*+,~")
	5034	if iri_parts.password is not None:
	5035	net_location += ':' + compat_urllib_parse_quote(iri_parts.password, safe=r"!$%&'()*+,~")
	5036	net_location += '@'
	5037
	5038	net_location += iri_parts.hostname.encode('idna').decode('utf-8') # Punycode for Unicode hostnames.
	5039	# The 'idna' encoding produces ASCII text.
	5040	if iri_parts.port is not None and iri_parts.port != 80:
	5041	net_location += ':' + str(iri_parts.port)
	5042
	5043	return compat_urllib_parse_urlunparse(
	5044	(iri_parts.scheme,
	5045	net_location,
	5046
	5047	compat_urllib_parse_quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	5048
	5049	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	5050	compat_urllib_parse_quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	5051
	5052	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	5053	compat_urllib_parse_quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	5054
	5055	compat_urllib_parse_quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	5056
	5057	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	5058
	5059
	5060	def to_high_limit_path(path):
	5061	if sys.platform in ['win32', 'cygwin']:
	5062	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	5063	return r'\\?\ '.rstrip() + os.path.abspath(path)
	5064
	5065	return path
	5066
	5067
	5068	def format_field(obj, field=None, template='%s', ignore=(None, ''), default='', func=None):
	5069	val = traverse_obj(obj, *variadic(field))
	5070	if val in ignore:
	5071	return default
	5072	return template % (func(val) if func else val)
	5073
	5074
	5075	def clean_podcast_url(url):
	5076	return re.sub(r'''(?x)
	5077	(?:
	5078	(?:
	5079	chtbl\.com/track\|
	5080	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	5081	play\.podtrac\.com
	5082	)/[^/]+\|
	5083	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	5084	flex\.acast\.com\|
	5085	pd(?:
	5086	cn\.co\| # https://podcorn.com/analytics-prefix/
	5087	st\.fm # https://podsights.com/docs/
	5088	)/e
	5089	)/''', '', url)
	5090
	5091
	5092	_HEX_TABLE = '0123456789abcdef'
	5093
	5094
	5095	def random_uuidv4():
	5096	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	5097
	5098
	5099	def make_dir(path, to_screen=None):
	5100	try:
	5101	dn = os.path.dirname(path)
	5102	if dn and not os.path.exists(dn):
	5103	os.makedirs(dn)
	5104	return True
	5105	except (OSError, IOError) as err:
	5106	if callable(to_screen) is not None:
	5107	to_screen('unable to create directory ' + error_to_compat_str(err))
	5108	return False
	5109
	5110
	5111	def get_executable_path():
	5112	from zipimport import zipimporter
	5113	if hasattr(sys, 'frozen'): # Running from PyInstaller
	5114	path = os.path.dirname(sys.executable)
	5115	elif isinstance(globals().get('__loader__'), zipimporter): # Running from ZIP
	5116	path = os.path.join(os.path.dirname(__file__), '../..')
	5117	else:
	5118	path = os.path.join(os.path.dirname(__file__), '..')
	5119	return os.path.abspath(path)
	5120
	5121
	5122	def load_plugins(name, suffix, namespace):
	5123	classes = {}
	5124	try:
	5125	plugins_spec = importlib.util.spec_from_file_location(
	5126	name, os.path.join(get_executable_path(), 'ytdlp_plugins', name, '__init__.py'))
	5127	plugins = importlib.util.module_from_spec(plugins_spec)
	5128	sys.modules[plugins_spec.name] = plugins
	5129	plugins_spec.loader.exec_module(plugins)
	5130	for name in dir(plugins):
	5131	if name in namespace:
	5132	continue
	5133	if not name.endswith(suffix):
	5134	continue
	5135	klass = getattr(plugins, name)
	5136	classes[name] = namespace[name] = klass
	5137	except FileNotFoundError:
	5138	pass
	5139	return classes
	5140
	5141
	5142	def traverse_obj(
	5143	obj, *path_list, default=None, expected_type=None, get_all=True,
	5144	casesense=True, is_user_input=False, traverse_string=False):
	5145	''' Traverse nested list/dict/tuple
	5146	@param path_list A list of paths which are checked one by one.
	5147	Each path is a list of keys where each key is a string,
	5148	a function, a tuple of strings/None or "...".
	5149	When a fuction is given, it takes the key as argument and
	5150	returns whether the key matches or not. When a tuple is given,
	5151	all the keys given in the tuple are traversed, and
	5152	"..." traverses all the keys in the object
	5153	"None" returns the object without traversal
	5154	@param default Default value to return
	5155	@param expected_type Only accept final value of this type (Can also be any callable)
	5156	@param get_all Return all the values obtained from a path or only the first one
	5157	@param casesense Whether to consider dictionary keys as case sensitive
	5158	@param is_user_input Whether the keys are generated from user input. If True,
	5159	strings are converted to int/slice if necessary
	5160	@param traverse_string Whether to traverse inside strings. If True, any
	5161	non-compatible object will also be converted into a string
	5162	# TODO: Write tests
	5163	'''
	5164	if not casesense:
	5165	_lower = lambda k: (k.lower() if isinstance(k, str) else k)
	5166	path_list = (map(_lower, variadic(path)) for path in path_list)
	5167
	5168	def _traverse_obj(obj, path, _current_depth=0):
	5169	nonlocal depth
	5170	path = tuple(variadic(path))
	5171	for i, key in enumerate(path):
	5172	if None in (key, obj):
	5173	return obj
	5174	if isinstance(key, (list, tuple)):
	5175	obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
	5176	key = ...
	5177	if key is ...:
	5178	obj = (obj.values() if isinstance(obj, dict)
	5179	else obj if isinstance(obj, (list, tuple, LazyList))
	5180	else str(obj) if traverse_string else [])
	5181	_current_depth += 1
	5182	depth = max(depth, _current_depth)
	5183	return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
	5184	elif callable(key):
	5185	if isinstance(obj, (list, tuple, LazyList)):
	5186	obj = enumerate(obj)
	5187	elif isinstance(obj, dict):
	5188	obj = obj.items()
	5189	else:
	5190	if not traverse_string:
	5191	return None
	5192	obj = str(obj)
	5193	_current_depth += 1
	5194	depth = max(depth, _current_depth)
	5195	return [_traverse_obj(v, path[i + 1:], _current_depth) for k, v in obj if key(k)]
	5196	elif isinstance(obj, dict) and not (is_user_input and key == ':'):
	5197	obj = (obj.get(key) if casesense or (key in obj)
	5198	else next((v for k, v in obj.items() if _lower(k) == key), None))
	5199	else:
	5200	if is_user_input:
	5201	key = (int_or_none(key) if ':' not in key
	5202	else slice(*map(int_or_none, key.split(':'))))
	5203	if key == slice(None):
	5204	return _traverse_obj(obj, (..., *path[i + 1:]), _current_depth)
	5205	if not isinstance(key, (int, slice)):
	5206	return None
	5207	if not isinstance(obj, (list, tuple, LazyList)):
	5208	if not traverse_string:
	5209	return None
	5210	obj = str(obj)
	5211	try:
	5212	obj = obj[key]
	5213	except IndexError:
	5214	return None
	5215	return obj
	5216
	5217	if isinstance(expected_type, type):
	5218	type_test = lambda val: val if isinstance(val, expected_type) else None
	5219	elif expected_type is not None:
	5220	type_test = expected_type
	5221	else:
	5222	type_test = lambda val: val
	5223
	5224	for path in path_list:
	5225	depth = 0
	5226	val = _traverse_obj(obj, path)
	5227	if val is not None:
	5228	if depth:
	5229	for _ in range(depth - 1):
	5230	val = itertools.chain.from_iterable(v for v in val if v is not None)
	5231	val = [v for v in map(type_test, val) if v is not None]
	5232	if val:
	5233	return val if get_all else val[0]
	5234	else:
	5235	val = type_test(val)
	5236	if val is not None:
	5237	return val
	5238	return default
	5239
	5240
	5241	def traverse_dict(dictn, keys, casesense=True):
	5242	write_string('DeprecationWarning: yt_dlp.utils.traverse_dict is deprecated '
	5243	'and may be removed in a future version. Use yt_dlp.utils.traverse_obj instead')
	5244	return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
	5245
	5246
	5247	def get_first(obj, keys, **kwargs):
	5248	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	5249
	5250
	5251	def variadic(x, allowed_types=(str, bytes, dict)):
	5252	return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
	5253
	5254
	5255	def decode_base(value, digits):
	5256	# This will convert given base-x string to scalar (long or int)
	5257	table = {char: index for index, char in enumerate(digits)}
	5258	result = 0
	5259	base = len(digits)
	5260	for chr in value:
	5261	result *= base
	5262	result += table[chr]
	5263	return result
	5264
	5265
	5266	def time_seconds(**kwargs):
	5267	t = datetime.datetime.now(datetime.timezone(datetime.timedelta(**kwargs)))
	5268	return t.timestamp()
	5269
	5270
	5271	# create a JSON Web Signature (jws) with HS256 algorithm
	5272	# the resulting format is in JWS Compact Serialization
	5273	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	5274	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	5275	def jwt_encode_hs256(payload_data, key, headers={}):
	5276	header_data = {
	5277	'alg': 'HS256',
	5278	'typ': 'JWT',
	5279	}
	5280	if headers:
	5281	header_data.update(headers)
	5282	header_b64 = base64.b64encode(json.dumps(header_data).encode('utf-8'))
	5283	payload_b64 = base64.b64encode(json.dumps(payload_data).encode('utf-8'))
	5284	h = hmac.new(key.encode('utf-8'), header_b64 + b'.' + payload_b64, hashlib.sha256)
	5285	signature_b64 = base64.b64encode(h.digest())
	5286	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	5287	return token
	5288
	5289
	5290	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	5291	def jwt_decode_hs256(jwt):
	5292	header_b64, payload_b64, signature_b64 = jwt.split('.')
	5293	payload_data = json.loads(base64.urlsafe_b64decode(payload_b64))
	5294	return payload_data
	5295
	5296
	5297	def supports_terminal_sequences(stream):
	5298	if compat_os_name == 'nt':
	5299	from .compat import WINDOWS_VT_MODE # Must be imported locally
	5300	if not WINDOWS_VT_MODE or get_windows_version() < (10, 0, 10586):
	5301	return False
	5302	elif not os.getenv('TERM'):
	5303	return False
	5304	try:
	5305	return stream.isatty()
	5306	except BaseException:
	5307	return False
	5308
	5309
	5310	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	5311
	5312
	5313	def remove_terminal_sequences(string):
	5314	return _terminal_sequences_re.sub('', string)
	5315
	5316
	5317	def number_of_digits(number):
	5318	return len('%d' % number)
	5319
	5320
	5321	def join_nonempty(*values, delim='-', from_dict=None):
	5322	if from_dict is not None:
	5323	values = map(from_dict.get, values)
	5324	return delim.join(map(str, filter(None, values)))
	5325
	5326
	5327	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	5328	"""
	5329	Find the largest format dimensions in terms of video width and, for each thumbnail:
	5330	* Modify the URL: Match the width with the provided regex and replace with the former width
	5331	* Update dimensions
	5332
	5333	This function is useful with video services that scale the provided thumbnails on demand
	5334	"""
	5335	_keys = ('width', 'height')
	5336	max_dimensions = max(
	5337	[tuple(format.get(k) or 0 for k in _keys) for format in formats],
	5338	default=(0, 0))
	5339	if not max_dimensions[0]:
	5340	return thumbnails
	5341	return [
	5342	merge_dicts(
	5343	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	5344	dict(zip(_keys, max_dimensions)), thumbnail)
	5345	for thumbnail in thumbnails
	5346	]
	5347
	5348
	5349	def parse_http_range(range):
	5350	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	5351	if not range:
	5352	return None, None, None
	5353	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	5354	if not crg:
	5355	return None, None, None
	5356	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	5357
	5358
	5359	class Config:
	5360	own_args = None
	5361	filename = None
	5362	__initialized = False
	5363
	5364	def __init__(self, parser, label=None):
	5365	self._parser, self.label = parser, label
	5366	self._loaded_paths, self.configs = set(), []
	5367
	5368	def init(self, args=None, filename=None):
	5369	assert not self.__initialized
	5370	directory = ''
	5371	if filename:
	5372	location = os.path.realpath(filename)
	5373	directory = os.path.dirname(location)
	5374	if location in self._loaded_paths:
	5375	return False
	5376	self._loaded_paths.add(location)
	5377
	5378	self.__initialized = True
	5379	self.own_args, self.filename = args, filename
	5380	for location in self._parser.parse_args(args)[0].config_locations or []:
	5381	location = os.path.join(directory, expand_path(location))
	5382	if os.path.isdir(location):
	5383	location = os.path.join(location, 'yt-dlp.conf')
	5384	if not os.path.exists(location):
	5385	self._parser.error(f'config location {location} does not exist')
	5386	self.append_config(self.read_file(location), location)
	5387	return True
	5388
	5389	def __str__(self):
	5390	label = join_nonempty(
	5391	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	5392	delim=' ')
	5393	return join_nonempty(
	5394	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	5395	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	5396	delim='\n')
	5397
	5398	@staticmethod
	5399	def read_file(filename, default=[]):
	5400	try:
	5401	optionf = open(filename)
	5402	except IOError:
	5403	return default # silently skip if file is not present
	5404	try:
	5405	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	5406	contents = optionf.read()
	5407	if sys.version_info < (3,):
	5408	contents = contents.decode(preferredencoding())
	5409	res = compat_shlex_split(contents, comments=True)
	5410	finally:
	5411	optionf.close()
	5412	return res
	5413
	5414	@staticmethod
	5415	def hide_login_info(opts):
	5416	PRIVATE_OPTS = set(['-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'])
	5417	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	5418
	5419	def _scrub_eq(o):
	5420	m = eqre.match(o)
	5421	if m:
	5422	return m.group('key') + '=PRIVATE'
	5423	else:
	5424	return o
	5425
	5426	opts = list(map(_scrub_eq, opts))
	5427	for idx, opt in enumerate(opts):
	5428	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	5429	opts[idx + 1] = 'PRIVATE'
	5430	return opts
	5431
	5432	def append_config(self, *args, label=None):
	5433	config = type(self)(self._parser, label)
	5434	config._loaded_paths = self._loaded_paths
	5435	if config.init(*args):
	5436	self.configs.append(config)
	5437
	5438	@property
	5439	def all_args(self):
	5440	for config in reversed(self.configs):
	5441	yield from config.all_args
	5442	yield from self.own_args or []
	5443
	5444	def parse_args(self):
	5445	return self._parser.parse_args(list(self.all_args))
	5446
	5447
	5448	class WebSocketsWrapper():
	5449	"""Wraps websockets module to use in non-async scopes"""
	5450
	5451	def __init__(self, url, headers=None, connect=True):
	5452	self.loop = asyncio.events.new_event_loop()
	5453	self.conn = compat_websockets.connect(
	5454	url, extra_headers=headers, ping_interval=None,
	5455	close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
	5456	if connect:
	5457	self.__enter__()
	5458	atexit.register(self.__exit__, None, None, None)
	5459
	5460	def __enter__(self):
	5461	if not self.pool:
	5462	self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
	5463	return self
	5464
	5465	def send(self, *args):
	5466	self.run_with_loop(self.pool.send(*args), self.loop)
	5467
	5468	def recv(self, *args):
	5469	return self.run_with_loop(self.pool.recv(*args), self.loop)
	5470
	5471	def __exit__(self, type, value, traceback):
	5472	try:
	5473	return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
	5474	finally:
	5475	self.loop.close()
	5476	self._cancel_all_tasks(self.loop)
	5477
	5478	# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
	5479	# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
	5480	@staticmethod
	5481	def run_with_loop(main, loop):
	5482	if not asyncio.coroutines.iscoroutine(main):
	5483	raise ValueError(f'a coroutine was expected, got {main!r}')
	5484
	5485	try:
	5486	return loop.run_until_complete(main)
	5487	finally:
	5488	loop.run_until_complete(loop.shutdown_asyncgens())
	5489	if hasattr(loop, 'shutdown_default_executor'):
	5490	loop.run_until_complete(loop.shutdown_default_executor())
	5491
	5492	@staticmethod
	5493	def _cancel_all_tasks(loop):
	5494	to_cancel = asyncio.tasks.all_tasks(loop)
	5495
	5496	if not to_cancel:
	5497	return
	5498
	5499	for task in to_cancel:
	5500	task.cancel()
	5501
	5502	loop.run_until_complete(
	5503	asyncio.tasks.gather(*to_cancel, loop=loop, return_exceptions=True))
	5504
	5505	for task in to_cancel:
	5506	if task.cancelled():
	5507	continue
	5508	if task.exception() is not None:
	5509	loop.call_exception_handler({
	5510	'message': 'unhandled exception during asyncio.run() shutdown',
	5511	'exception': task.exception(),
	5512	'task': task,
	5513	})
	5514
	5515
	5516	has_websockets = bool(compat_websockets)
	5517
	5518
	5519	def merge_headers(*dicts):
	5520	"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
	5521	return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
	5522
	5523
	5524	class classproperty:
	5525	def __init__(self, f):
	5526	self.f = f
	5527
	5528	def __get__(self, _, cls):
	5529	return self.f(cls)