jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import asyncio
	2	import atexit
	3	import base64
	4	import binascii
	5	import calendar
	6	import codecs
	7	import collections
	8	import collections.abc
	9	import contextlib
	10	import datetime
	11	import email.header
	12	import email.utils
	13	import errno
	14	import gzip
	15	import hashlib
	16	import hmac
	17	import html.entities
	18	import html.parser
	19	import http.client
	20	import http.cookiejar
	21	import inspect
	22	import io
	23	import itertools
	24	import json
	25	import locale
	26	import math
	27	import mimetypes
	28	import operator
	29	import os
	30	import platform
	31	import random
	32	import re
	33	import shlex
	34	import socket
	35	import ssl
	36	import struct
	37	import subprocess
	38	import sys
	39	import tempfile
	40	import time
	41	import traceback
	42	import types
	43	import unicodedata
	44	import urllib.error
	45	import urllib.parse
	46	import urllib.request
	47	import xml.etree.ElementTree
	48	import zlib
	49
	50	from .compat import functools # isort: split
	51	from .compat import (
	52	compat_etree_fromstring,
	53	compat_expanduser,
	54	compat_HTMLParseError,
	55	compat_os_name,
	56	compat_shlex_quote,
	57	)
	58	from .dependencies import brotli, certifi, websockets, xattr
	59	from .socks import ProxyType, sockssocket
	60
	61
	62	def register_socks_protocols():
	63	# "Register" SOCKS protocols
	64	# In Python < 2.6.5, urlsplit() suffers from bug https://bugs.python.org/issue7904
	65	# URLs with protocols not in urlparse.uses_netloc are not handled correctly
	66	for scheme in ('socks', 'socks4', 'socks4a', 'socks5'):
	67	if scheme not in urllib.parse.uses_netloc:
	68	urllib.parse.uses_netloc.append(scheme)
	69
	70
	71	# This is not clearly defined otherwise
	72	compiled_regex_type = type(re.compile(''))
	73
	74
	75	def random_user_agent():
	76	_USER_AGENT_TPL = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/%s Safari/537.36'
	77	_CHROME_VERSIONS = (
	78	'90.0.4430.212',
	79	'90.0.4430.24',
	80	'90.0.4430.70',
	81	'90.0.4430.72',
	82	'90.0.4430.85',
	83	'90.0.4430.93',
	84	'91.0.4472.101',
	85	'91.0.4472.106',
	86	'91.0.4472.114',
	87	'91.0.4472.124',
	88	'91.0.4472.164',
	89	'91.0.4472.19',
	90	'91.0.4472.77',
	91	'92.0.4515.107',
	92	'92.0.4515.115',
	93	'92.0.4515.131',
	94	'92.0.4515.159',
	95	'92.0.4515.43',
	96	'93.0.4556.0',
	97	'93.0.4577.15',
	98	'93.0.4577.63',
	99	'93.0.4577.82',
	100	'94.0.4606.41',
	101	'94.0.4606.54',
	102	'94.0.4606.61',
	103	'94.0.4606.71',
	104	'94.0.4606.81',
	105	'94.0.4606.85',
	106	'95.0.4638.17',
	107	'95.0.4638.50',
	108	'95.0.4638.54',
	109	'95.0.4638.69',
	110	'95.0.4638.74',
	111	'96.0.4664.18',
	112	'96.0.4664.45',
	113	'96.0.4664.55',
	114	'96.0.4664.93',
	115	'97.0.4692.20',
	116	)
	117	return _USER_AGENT_TPL % random.choice(_CHROME_VERSIONS)
	118
	119
	120	SUPPORTED_ENCODINGS = [
	121	'gzip', 'deflate'
	122	]
	123	if brotli:
	124	SUPPORTED_ENCODINGS.append('br')
	125
	126	std_headers = {
	127	'User-Agent': random_user_agent(),
	128	'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,/;q=0.8',
	129	'Accept-Language': 'en-us,en;q=0.5',
	130	'Sec-Fetch-Mode': 'navigate',
	131	}
	132
	133
	134	USER_AGENTS = {
	135	'Safari': 'Mozilla/5.0 (X11; Linux x86_64; rv:10.0) AppleWebKit/533.20.25 (KHTML, like Gecko) Version/5.0.4 Safari/533.20.27',
	136	}
	137
	138
	139	NO_DEFAULT = object()
	140	IDENTITY = lambda x: x
	141
	142	ENGLISH_MONTH_NAMES = [
	143	'January', 'February', 'March', 'April', 'May', 'June',
	144	'July', 'August', 'September', 'October', 'November', 'December']
	145
	146	MONTH_NAMES = {
	147	'en': ENGLISH_MONTH_NAMES,
	148	'fr': [
	149	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	150	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	151	# these follow the genitive grammatical case (dopełniacz)
	152	# some websites might be using nominative, which will require another month list
	153	# https://en.wikibooks.org/wiki/Polish/Noun_cases
	154	'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
	155	'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
	156	}
	157
	158	# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
	159	TIMEZONE_NAMES = {
	160	'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
	161	'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
	162	'EST': -5, 'EDT': -4, # Eastern
	163	'CST': -6, 'CDT': -5, # Central
	164	'MST': -7, 'MDT': -6, # Mountain
	165	'PST': -8, 'PDT': -7 # Pacific
	166	}
	167
	168	# needed for sanitizing filenames in restricted mode
	169	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	170	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	171	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	172
	173	DATE_FORMATS = (
	174	'%d %B %Y',
	175	'%d %b %Y',
	176	'%B %d %Y',
	177	'%B %dst %Y',
	178	'%B %dnd %Y',
	179	'%B %drd %Y',
	180	'%B %dth %Y',
	181	'%b %d %Y',
	182	'%b %dst %Y',
	183	'%b %dnd %Y',
	184	'%b %drd %Y',
	185	'%b %dth %Y',
	186	'%b %dst %Y %I:%M',
	187	'%b %dnd %Y %I:%M',
	188	'%b %drd %Y %I:%M',
	189	'%b %dth %Y %I:%M',
	190	'%Y %m %d',
	191	'%Y-%m-%d',
	192	'%Y.%m.%d.',
	193	'%Y/%m/%d',
	194	'%Y/%m/%d %H:%M',
	195	'%Y/%m/%d %H:%M:%S',
	196	'%Y%m%d%H%M',
	197	'%Y%m%d%H%M%S',
	198	'%Y%m%d',
	199	'%Y-%m-%d %H:%M',
	200	'%Y-%m-%d %H:%M:%S',
	201	'%Y-%m-%d %H:%M:%S.%f',
	202	'%Y-%m-%d %H:%M:%S:%f',
	203	'%d.%m.%Y %H:%M',
	204	'%d.%m.%Y %H.%M',
	205	'%Y-%m-%dT%H:%M:%SZ',
	206	'%Y-%m-%dT%H:%M:%S.%fZ',
	207	'%Y-%m-%dT%H:%M:%S.%f0Z',
	208	'%Y-%m-%dT%H:%M:%S',
	209	'%Y-%m-%dT%H:%M:%S.%f',
	210	'%Y-%m-%dT%H:%M',
	211	'%b %d %Y at %H:%M',
	212	'%b %d %Y at %H:%M:%S',
	213	'%B %d %Y at %H:%M',
	214	'%B %d %Y at %H:%M:%S',
	215	'%H:%M %d-%b-%Y',
	216	)
	217
	218	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	219	DATE_FORMATS_DAY_FIRST.extend([
	220	'%d-%m-%Y',
	221	'%d.%m.%Y',
	222	'%d.%m.%y',
	223	'%d/%m/%Y',
	224	'%d/%m/%y',
	225	'%d/%m/%Y %H:%M:%S',
	226	'%d-%m-%Y %H:%M',
	227	])
	228
	229	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	230	DATE_FORMATS_MONTH_FIRST.extend([
	231	'%m-%d-%Y',
	232	'%m.%d.%Y',
	233	'%m/%d/%Y',
	234	'%m/%d/%y',
	235	'%m/%d/%Y %H:%M:%S',
	236	])
	237
	238	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	239	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]>\s(?P<json_ld>{.+?}\|\[.+?\])\s*</script>'
	240
	241	NUMBER_RE = r'\d+(?:\.\d+)?'
	242
	243
	244	@functools.cache
	245	def preferredencoding():
	246	"""Get preferred encoding.
	247
	248	Returns the best encoding scheme for the system, based on
	249	locale.getpreferredencoding() and some further tweaks.
	250	"""
	251	try:
	252	pref = locale.getpreferredencoding()
	253	'TEST'.encode(pref)
	254	except Exception:
	255	pref = 'UTF-8'
	256
	257	return pref
	258
	259
	260	def write_json_file(obj, fn):
	261	""" Encode obj as JSON and write it to fn, atomically if possible """
	262
	263	tf = tempfile.NamedTemporaryFile(
	264	prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
	265	suffix='.tmp', delete=False, mode='w', encoding='utf-8')
	266
	267	try:
	268	with tf:
	269	json.dump(obj, tf, ensure_ascii=False)
	270	if sys.platform == 'win32':
	271	# Need to remove existing file on Windows, else os.rename raises
	272	# WindowsError or FileExistsError.
	273	with contextlib.suppress(OSError):
	274	os.unlink(fn)
	275	with contextlib.suppress(OSError):
	276	mask = os.umask(0)
	277	os.umask(mask)
	278	os.chmod(tf.name, 0o666 & ~mask)
	279	os.rename(tf.name, fn)
	280	except Exception:
	281	with contextlib.suppress(OSError):
	282	os.remove(tf.name)
	283	raise
	284
	285
	286	def find_xpath_attr(node, xpath, key, val=None):
	287	""" Find the xpath xpath[@key=val] """
	288	assert re.match(r'^[a-zA-Z_-]+$', key)
	289	expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
	290	return node.find(expr)
	291
	292	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	293	# the namespace parameter
	294
	295
	296	def xpath_with_ns(path, ns_map):
	297	components = [c.split(':') for c in path.split('/')]
	298	replaced = []
	299	for c in components:
	300	if len(c) == 1:
	301	replaced.append(c[0])
	302	else:
	303	ns, tag = c
	304	replaced.append('{%s}%s' % (ns_map[ns], tag))
	305	return '/'.join(replaced)
	306
	307
	308	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	309	def _find_xpath(xpath):
	310	return node.find(xpath)
	311
	312	if isinstance(xpath, str):
	313	n = _find_xpath(xpath)
	314	else:
	315	for xp in xpath:
	316	n = _find_xpath(xp)
	317	if n is not None:
	318	break
	319
	320	if n is None:
	321	if default is not NO_DEFAULT:
	322	return default
	323	elif fatal:
	324	name = xpath if name is None else name
	325	raise ExtractorError('Could not find XML element %s' % name)
	326	else:
	327	return None
	328	return n
	329
	330
	331	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	332	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	333	if n is None or n == default:
	334	return n
	335	if n.text is None:
	336	if default is not NO_DEFAULT:
	337	return default
	338	elif fatal:
	339	name = xpath if name is None else name
	340	raise ExtractorError('Could not find XML element\'s text %s' % name)
	341	else:
	342	return None
	343	return n.text
	344
	345
	346	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	347	n = find_xpath_attr(node, xpath, key)
	348	if n is None:
	349	if default is not NO_DEFAULT:
	350	return default
	351	elif fatal:
	352	name = f'{xpath}[@{key}]' if name is None else name
	353	raise ExtractorError('Could not find XML attribute %s' % name)
	354	else:
	355	return None
	356	return n.attrib[key]
	357
	358
	359	def get_element_by_id(id, html, **kwargs):
	360	"""Return the content of the tag with the specified ID in the passed HTML document"""
	361	return get_element_by_attribute('id', id, html, **kwargs)
	362
	363
	364	def get_element_html_by_id(id, html, **kwargs):
	365	"""Return the html of the tag with the specified ID in the passed HTML document"""
	366	return get_element_html_by_attribute('id', id, html, **kwargs)
	367
	368
	369	def get_element_by_class(class_name, html):
	370	"""Return the content of the first tag with the specified class in the passed HTML document"""
	371	retval = get_elements_by_class(class_name, html)
	372	return retval[0] if retval else None
	373
	374
	375	def get_element_html_by_class(class_name, html):
	376	"""Return the html of the first tag with the specified class in the passed HTML document"""
	377	retval = get_elements_html_by_class(class_name, html)
	378	return retval[0] if retval else None
	379
	380
	381	def get_element_by_attribute(attribute, value, html, **kwargs):
	382	retval = get_elements_by_attribute(attribute, value, html, **kwargs)
	383	return retval[0] if retval else None
	384
	385
	386	def get_element_html_by_attribute(attribute, value, html, **kargs):
	387	retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
	388	return retval[0] if retval else None
	389
	390
	391	def get_elements_by_class(class_name, html, **kargs):
	392	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	393	return get_elements_by_attribute(
	394	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	395	html, escape_value=False)
	396
	397
	398	def get_elements_html_by_class(class_name, html):
	399	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	400	return get_elements_html_by_attribute(
	401	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	402	html, escape_value=False)
	403
	404
	405	def get_elements_by_attribute(args, *kwargs):
	406	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	407	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	408
	409
	410	def get_elements_html_by_attribute(args, *kwargs):
	411	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	412	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	413
	414
	415	def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
	416	"""
	417	Return the text (content) and the html (whole) of the tag with the specified
	418	attribute in the passed HTML document
	419	"""
	420	if not value:
	421	return
	422
	423	quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	424
	425	value = re.escape(value) if escape_value else value
	426
	427	partial_element_re = rf'''(?x)
	428	<(?P<tag>{tag})
	429	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	430	\s{re.escape(attribute)}\s=\s(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
	431	'''
	432
	433	for m in re.finditer(partial_element_re, html):
	434	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	435
	436	yield (
	437	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	438	whole
	439	)
	440
	441
	442	class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
	443	"""
	444	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	445	closing tag for the first opening tag it has encountered, and can be used
	446	as a context manager
	447	"""
	448
	449	class HTMLBreakOnClosingTagException(Exception):
	450	pass
	451
	452	def __init__(self):
	453	self.tagstack = collections.deque()
	454	html.parser.HTMLParser.__init__(self)
	455
	456	def __enter__(self):
	457	return self
	458
	459	def __exit__(self, *_):
	460	self.close()
	461
	462	def close(self):
	463	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	464	# so data remains buffered; we no longer have any interest in it, thus
	465	# override this method to discard it
	466	pass
	467
	468	def handle_starttag(self, tag, _):
	469	self.tagstack.append(tag)
	470
	471	def handle_endtag(self, tag):
	472	if not self.tagstack:
	473	raise compat_HTMLParseError('no tags in the stack')
	474	while self.tagstack:
	475	inner_tag = self.tagstack.pop()
	476	if inner_tag == tag:
	477	break
	478	else:
	479	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	480	if not self.tagstack:
	481	raise self.HTMLBreakOnClosingTagException()
	482
	483
	484	# XXX: This should be far less strict
	485	def get_element_text_and_html_by_tag(tag, html):
	486	"""
	487	For the first element with the specified tag in the passed HTML document
	488	return its' content (text) and the whole element (html)
	489	"""
	490	def find_or_raise(haystack, needle, exc):
	491	try:
	492	return haystack.index(needle)
	493	except ValueError:
	494	raise exc
	495	closing_tag = f'</{tag}>'
	496	whole_start = find_or_raise(
	497	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	498	content_start = find_or_raise(
	499	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	500	content_start += whole_start + 1
	501	with HTMLBreakOnClosingTagParser() as parser:
	502	parser.feed(html[whole_start:content_start])
	503	if not parser.tagstack or parser.tagstack[0] != tag:
	504	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	505	offset = content_start
	506	while offset < len(html):
	507	next_closing_tag_start = find_or_raise(
	508	html[offset:], closing_tag,
	509	compat_HTMLParseError(f'closing {tag} tag not found'))
	510	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	511	try:
	512	parser.feed(html[offset:offset + next_closing_tag_end])
	513	offset += next_closing_tag_end
	514	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	515	return html[content_start:offset + next_closing_tag_start], \
	516	html[whole_start:offset + next_closing_tag_end]
	517	raise compat_HTMLParseError('unexpected end of html')
	518
	519
	520	class HTMLAttributeParser(html.parser.HTMLParser):
	521	"""Trivial HTML parser to gather the attributes for a single element"""
	522
	523	def __init__(self):
	524	self.attrs = {}
	525	html.parser.HTMLParser.__init__(self)
	526
	527	def handle_starttag(self, tag, attrs):
	528	self.attrs = dict(attrs)
	529	raise compat_HTMLParseError('done')
	530
	531
	532	class HTMLListAttrsParser(html.parser.HTMLParser):
	533	"""HTML parser to gather the attributes for the elements of a list"""
	534
	535	def __init__(self):
	536	html.parser.HTMLParser.__init__(self)
	537	self.items = []
	538	self._level = 0
	539
	540	def handle_starttag(self, tag, attrs):
	541	if tag == 'li' and self._level == 0:
	542	self.items.append(dict(attrs))
	543	self._level += 1
	544
	545	def handle_endtag(self, tag):
	546	self._level -= 1
	547
	548
	549	def extract_attributes(html_element):
	550	"""Given a string for an HTML element such as
	551	<el
	552	a="foo" B="bar" c="&98;az" d=boz
	553	empty= noval entity="&"
	554	sq='"' dq="'"
	555	>
	556	Decode and return a dictionary of attributes.
	557	{
	558	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	559	'empty': '', 'noval': None, 'entity': '&',
	560	'sq': '"', 'dq': '\''
	561	}.
	562	"""
	563	parser = HTMLAttributeParser()
	564	with contextlib.suppress(compat_HTMLParseError):
	565	parser.feed(html_element)
	566	parser.close()
	567	return parser.attrs
	568
	569
	570	def parse_list(webpage):
	571	"""Given a string for an series of HTML <li> elements,
	572	return a dictionary of their attributes"""
	573	parser = HTMLListAttrsParser()
	574	parser.feed(webpage)
	575	parser.close()
	576	return parser.items
	577
	578
	579	def clean_html(html):
	580	"""Clean an HTML snippet into a readable string"""
	581
	582	if html is None: # Convenience for sanitizing descriptions etc.
	583	return html
	584
	585	html = re.sub(r'\s+', ' ', html)
	586	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	587	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	588	# Strip html tags
	589	html = re.sub('<.*?>', '', html)
	590	# Replace html entities
	591	html = unescapeHTML(html)
	592	return html.strip()
	593
	594
	595	class LenientJSONDecoder(json.JSONDecoder):
	596	def __init__(self, args, transform_source=None, ignore_extra=False, *kwargs):
	597	self.transform_source, self.ignore_extra = transform_source, ignore_extra
	598	super().__init__(args, *kwargs)
	599
	600	def decode(self, s):
	601	if self.transform_source:
	602	s = self.transform_source(s)
	603	try:
	604	if self.ignore_extra:
	605	return self.raw_decode(s.lstrip())[0]
	606	return super().decode(s)
	607	except json.JSONDecodeError as e:
	608	if e.pos is not None:
	609	raise type(e)(f'{e.msg} in {s[e.pos-10:e.pos+10]!r}', s, e.pos)
	610	raise
	611
	612
	613	def sanitize_open(filename, open_mode):
	614	"""Try to open the given filename, and slightly tweak it if this fails.
	615
	616	Attempts to open the given filename. If this fails, it tries to change
	617	the filename slightly, step by step, until it's either able to open it
	618	or it fails and raises a final exception, like the standard open()
	619	function.
	620
	621	It returns the tuple (stream, definitive_file_name).
	622	"""
	623	if filename == '-':
	624	if sys.platform == 'win32':
	625	import msvcrt
	626
	627	# stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
	628	with contextlib.suppress(io.UnsupportedOperation):
	629	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	630	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	631
	632	for attempt in range(2):
	633	try:
	634	try:
	635	if sys.platform == 'win32':
	636	# FIXME: An exclusive lock also locks the file from being read.
	637	# Since windows locks are mandatory, don't lock the file on windows (for now).
	638	# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
	639	raise LockingUnsupportedError()
	640	stream = locked_file(filename, open_mode, block=False).__enter__()
	641	except OSError:
	642	stream = open(filename, open_mode)
	643	return stream, filename
	644	except OSError as err:
	645	if attempt or err.errno in (errno.EACCES,):
	646	raise
	647	old_filename, filename = filename, sanitize_path(filename)
	648	if old_filename == filename:
	649	raise
	650
	651
	652	def timeconvert(timestr):
	653	"""Convert RFC 2822 defined time string into system timestamp"""
	654	timestamp = None
	655	timetuple = email.utils.parsedate_tz(timestr)
	656	if timetuple is not None:
	657	timestamp = email.utils.mktime_tz(timetuple)
	658	return timestamp
	659
	660
	661	def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
	662	"""Sanitizes a string so it could be used as part of a filename.
	663	@param restricted Use a stricter subset of allowed characters
	664	@param is_id Whether this is an ID that should be kept unchanged if possible.
	665	If unset, yt-dlp's new sanitization rules are in effect
	666	"""
	667	if s == '':
	668	return ''
	669
	670	def replace_insane(char):
	671	if restricted and char in ACCENT_CHARS:
	672	return ACCENT_CHARS[char]
	673	elif not restricted and char == '\n':
	674	return '\0 '
	675	elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?\|/\\':
	676	# Replace with their full-width unicode counterparts
	677	return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
	678	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	679	return ''
	680	elif char == '"':
	681	return '' if restricted else '\''
	682	elif char == ':':
	683	return '\0_\0-' if restricted else '\0 \0-'
	684	elif char in '\\/\|*<>':
	685	return '\0_'
	686	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
	687	return '\0_'
	688	return char
	689
	690	# Replace look-alike Unicode glyphs
	691	if restricted and (is_id is NO_DEFAULT or not is_id):
	692	s = unicodedata.normalize('NFKC', s)
	693	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
	694	result = ''.join(map(replace_insane, s))
	695	if is_id is NO_DEFAULT:
	696	result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
	697	STRIP_RE = r'(?:\0.\|[ _-])*'
	698	result = re.sub(f'^\0.{STRIP_RE}\|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
	699	result = result.replace('\0', '') or '_'
	700
	701	if not is_id:
	702	while '__' in result:
	703	result = result.replace('__', '_')
	704	result = result.strip('_')
	705	# Common case of "Foreign band name - English song title"
	706	if restricted and result.startswith('-_'):
	707	result = result[2:]
	708	if result.startswith('-'):
	709	result = '_' + result[len('-'):]
	710	result = result.lstrip('.')
	711	if not result:
	712	result = '_'
	713	return result
	714
	715
	716	def sanitize_path(s, force=False):
	717	"""Sanitizes and normalizes path on Windows"""
	718	if sys.platform == 'win32':
	719	force = False
	720	drive_or_unc, _ = os.path.splitdrive(s)
	721	elif force:
	722	drive_or_unc = ''
	723	else:
	724	return s
	725
	726	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	727	if drive_or_unc:
	728	norm_path.pop(0)
	729	sanitized_path = [
	730	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	731	for path_part in norm_path]
	732	if drive_or_unc:
	733	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	734	elif force and s and s[0] == os.path.sep:
	735	sanitized_path.insert(0, os.path.sep)
	736	return os.path.join(*sanitized_path)
	737
	738
	739	def sanitize_url(url, *, scheme='http'):
	740	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	741	# the number of unwanted failures due to missing protocol
	742	if url is None:
	743	return
	744	elif url.startswith('//'):
	745	return f'{scheme}:{url}'
	746	# Fix some common typos seen so far
	747	COMMON_TYPOS = (
	748	# https://github.com/ytdl-org/youtube-dl/issues/15649
	749	(r'^httpss://', r'https://'),
	750	# https://bx1.be/lives/direct-tv/
	751	(r'^rmtp([es]?)://', r'rtmp\1://'),
	752	)
	753	for mistake, fixup in COMMON_TYPOS:
	754	if re.match(mistake, url):
	755	return re.sub(mistake, fixup, url)
	756	return url
	757
	758
	759	def extract_basic_auth(url):
	760	parts = urllib.parse.urlsplit(url)
	761	if parts.username is None:
	762	return url, None
	763	url = urllib.parse.urlunsplit(parts._replace(netloc=(
	764	parts.hostname if parts.port is None
	765	else '%s:%d' % (parts.hostname, parts.port))))
	766	auth_payload = base64.b64encode(
	767	('%s:%s' % (parts.username, parts.password or '')).encode())
	768	return url, f'Basic {auth_payload.decode()}'
	769
	770
	771	def sanitized_Request(url, args, *kwargs):
	772	url, auth_header = extract_basic_auth(escape_url(sanitize_url(url)))
	773	if auth_header is not None:
	774	headers = args[1] if len(args) >= 2 else kwargs.setdefault('headers', {})
	775	headers['Authorization'] = auth_header
	776	return urllib.request.Request(url, args, *kwargs)
	777
	778
	779	def expand_path(s):
	780	"""Expand shell variables and ~"""
	781	return os.path.expandvars(compat_expanduser(s))
	782
	783
	784	def orderedSet(iterable, *, lazy=False):
	785	"""Remove all duplicates from the input iterable"""
	786	def _iter():
	787	seen = [] # Do not use set since the items can be unhashable
	788	for x in iterable:
	789	if x not in seen:
	790	seen.append(x)
	791	yield x
	792
	793	return _iter() if lazy else list(_iter())
	794
	795
	796	def _htmlentity_transform(entity_with_semicolon):
	797	"""Transforms an HTML entity to a character."""
	798	entity = entity_with_semicolon[:-1]
	799
	800	# Known non-numeric HTML entity
	801	if entity in html.entities.name2codepoint:
	802	return chr(html.entities.name2codepoint[entity])
	803
	804	# TODO: HTML5 allows entities without a semicolon.
	805	# E.g. '&Eacuteric' should be decoded as 'Éric'.
	806	if entity_with_semicolon in html.entities.html5:
	807	return html.entities.html5[entity_with_semicolon]
	808
	809	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	810	if mobj is not None:
	811	numstr = mobj.group(1)
	812	if numstr.startswith('x'):
	813	base = 16
	814	numstr = '0%s' % numstr
	815	else:
	816	base = 10
	817	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	818	with contextlib.suppress(ValueError):
	819	return chr(int(numstr, base))
	820
	821	# Unknown entity in name, return its literal representation
	822	return '&%s;' % entity
	823
	824
	825	def unescapeHTML(s):
	826	if s is None:
	827	return None
	828	assert isinstance(s, str)
	829
	830	return re.sub(
	831	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	832
	833
	834	def escapeHTML(text):
	835	return (
	836	text
	837	.replace('&', '&')
	838	.replace('<', '<')
	839	.replace('>', '>')
	840	.replace('"', '"')
	841	.replace("'", ''')
	842	)
	843
	844
	845	def process_communicate_or_kill(p, args, *kwargs):
	846	deprecation_warning(f'"{__name__}.process_communicate_or_kill" is deprecated and may be removed '
	847	f'in a future version. Use "{__name__}.Popen.communicate_or_kill" instead')
	848	return Popen.communicate_or_kill(p, args, *kwargs)
	849
	850
	851	class Popen(subprocess.Popen):
	852	if sys.platform == 'win32':
	853	_startupinfo = subprocess.STARTUPINFO()
	854	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	855	else:
	856	_startupinfo = None
	857
	858	@staticmethod
	859	def _fix_pyinstaller_ld_path(env):
	860	"""Restore LD_LIBRARY_PATH when using PyInstaller
	861	Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
	862	https://github.com/yt-dlp/yt-dlp/issues/4573
	863	"""
	864	if not hasattr(sys, '_MEIPASS'):
	865	return
	866
	867	def _fix(key):
	868	orig = env.get(f'{key}_ORIG')
	869	if orig is None:
	870	env.pop(key, None)
	871	else:
	872	env[key] = orig
	873
	874	_fix('LD_LIBRARY_PATH') # Linux
	875	_fix('DYLD_LIBRARY_PATH') # macOS
	876
	877	def __init__(self, args, env=None, text=False, *kwargs):
	878	if env is None:
	879	env = os.environ.copy()
	880	self._fix_pyinstaller_ld_path(env)
	881
	882	if text is True:
	883	kwargs['universal_newlines'] = True # For 3.6 compatibility
	884	kwargs.setdefault('encoding', 'utf-8')
	885	kwargs.setdefault('errors', 'replace')
	886	super().__init__(args, env=env, *kwargs, startupinfo=self._startupinfo)
	887
	888	def communicate_or_kill(self, args, *kwargs):
	889	try:
	890	return self.communicate(args, *kwargs)
	891	except BaseException: # Including KeyboardInterrupt
	892	self.kill(timeout=None)
	893	raise
	894
	895	def kill(self, *, timeout=0):
	896	super().kill()
	897	if timeout != 0:
	898	self.wait(timeout=timeout)
	899
	900	@classmethod
	901	def run(cls, args, timeout=None, *kwargs):
	902	with cls(args, *kwargs) as proc:
	903	default = '' if proc.text_mode else b''
	904	stdout, stderr = proc.communicate_or_kill(timeout=timeout)
	905	return stdout or default, stderr or default, proc.returncode
	906
	907
	908	def get_subprocess_encoding():
	909	if sys.platform == 'win32' and sys.getwindowsversion()[0] >= 5:
	910	# For subprocess calls, encode with locale encoding
	911	# Refer to http://stackoverflow.com/a/9951851/35070
	912	encoding = preferredencoding()
	913	else:
	914	encoding = sys.getfilesystemencoding()
	915	if encoding is None:
	916	encoding = 'utf-8'
	917	return encoding
	918
	919
	920	def encodeFilename(s, for_subprocess=False):
	921	assert isinstance(s, str)
	922	return s
	923
	924
	925	def decodeFilename(b, for_subprocess=False):
	926	return b
	927
	928
	929	def encodeArgument(s):
	930	# Legacy code that uses byte strings
	931	# Uncomment the following line after fixing all post processors
	932	# assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
	933	return s if isinstance(s, str) else s.decode('ascii')
	934
	935
	936	def decodeArgument(b):
	937	return b
	938
	939
	940	def decodeOption(optval):
	941	if optval is None:
	942	return optval
	943	if isinstance(optval, bytes):
	944	optval = optval.decode(preferredencoding())
	945
	946	assert isinstance(optval, str)
	947	return optval
	948
	949
	950	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	951
	952
	953	def timetuple_from_msec(msec):
	954	secs, msec = divmod(msec, 1000)
	955	mins, secs = divmod(secs, 60)
	956	hrs, mins = divmod(mins, 60)
	957	return _timetuple(hrs, mins, secs, msec)
	958
	959
	960	def formatSeconds(secs, delim=':', msec=False):
	961	time = timetuple_from_msec(secs * 1000)
	962	if time.hours:
	963	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	964	elif time.minutes:
	965	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	966	else:
	967	ret = '%d' % time.seconds
	968	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	969
	970
	971	def _ssl_load_windows_store_certs(ssl_context, storename):
	972	# Code adapted from _load_windows_store_certs in https://github.com/python/cpython/blob/main/Lib/ssl.py
	973	try:
	974	certs = [cert for cert, encoding, trust in ssl.enum_certificates(storename)
	975	if encoding == 'x509_asn' and (
	976	trust is True or ssl.Purpose.SERVER_AUTH.oid in trust)]
	977	except PermissionError:
	978	return
	979	for cert in certs:
	980	with contextlib.suppress(ssl.SSLError):
	981	ssl_context.load_verify_locations(cadata=cert)
	982
	983
	984	def make_HTTPS_handler(params, **kwargs):
	985	opts_check_certificate = not params.get('nocheckcertificate')
	986	context = ssl.SSLContext(ssl.PROTOCOL_TLS_CLIENT)
	987	context.check_hostname = opts_check_certificate
	988	if params.get('legacyserverconnect'):
	989	context.options \|= 4 # SSL_OP_LEGACY_SERVER_CONNECT
	990	# Allow use of weaker ciphers in Python 3.10+. See https://bugs.python.org/issue43998
	991	context.set_ciphers('DEFAULT')
	992	elif (
	993	sys.version_info < (3, 10)
	994	and ssl.OPENSSL_VERSION_INFO >= (1, 1, 1)
	995	and not ssl.OPENSSL_VERSION.startswith('LibreSSL')
	996	):
	997	# Backport the default SSL ciphers and minimum TLS version settings from Python 3.10 [1].
	998	# This is to ensure consistent behavior across Python versions, and help avoid fingerprinting
	999	# in some situations [2][3].
	1000	# Python 3.10 only supports OpenSSL 1.1.1+ [4]. Because this change is likely
	1001	# untested on older versions, we only apply this to OpenSSL 1.1.1+ to be safe.
	1002	# LibreSSL is excluded until further investigation due to cipher support issues [5][6].
	1003	# 1. https://github.com/python/cpython/commit/e983252b516edb15d4338b0a47631b59ef1e2536
	1004	# 2. https://github.com/yt-dlp/yt-dlp/issues/4627
	1005	# 3. https://github.com/yt-dlp/yt-dlp/pull/5294
	1006	# 4. https://peps.python.org/pep-0644/
	1007	# 5. https://peps.python.org/pep-0644/#libressl-support
	1008	# 6. https://github.com/yt-dlp/yt-dlp/commit/5b9f253fa0aee996cf1ed30185d4b502e00609c4#commitcomment-89054368
	1009	context.set_ciphers('@SECLEVEL=2:ECDH+AESGCM:ECDH+CHACHA20:ECDH+AES:DHE+AES:!aNULL:!eNULL:!aDSS:!SHA1:!AESCCM')
	1010	context.minimum_version = ssl.TLSVersion.TLSv1_2
	1011
	1012	context.verify_mode = ssl.CERT_REQUIRED if opts_check_certificate else ssl.CERT_NONE
	1013	if opts_check_certificate:
	1014	if has_certifi and 'no-certifi' not in params.get('compat_opts', []):
	1015	context.load_verify_locations(cafile=certifi.where())
	1016	else:
	1017	try:
	1018	context.load_default_certs()
	1019	# Work around the issue in load_default_certs when there are bad certificates. See:
	1020	# https://github.com/yt-dlp/yt-dlp/issues/1060,
	1021	# https://bugs.python.org/issue35665, https://bugs.python.org/issue45312
	1022	except ssl.SSLError:
	1023	# enum_certificates is not present in mingw python. See https://github.com/yt-dlp/yt-dlp/issues/1151
	1024	if sys.platform == 'win32' and hasattr(ssl, 'enum_certificates'):
	1025	for storename in ('CA', 'ROOT'):
	1026	_ssl_load_windows_store_certs(context, storename)
	1027	context.set_default_verify_paths()
	1028
	1029	client_certfile = params.get('client_certificate')
	1030	if client_certfile:
	1031	try:
	1032	context.load_cert_chain(
	1033	client_certfile, keyfile=params.get('client_certificate_key'),
	1034	password=params.get('client_certificate_password'))
	1035	except ssl.SSLError:
	1036	raise YoutubeDLError('Unable to load client certificate')
	1037
	1038	# Some servers may reject requests if ALPN extension is not sent. See:
	1039	# https://github.com/python/cpython/issues/85140
	1040	# https://github.com/yt-dlp/yt-dlp/issues/3878
	1041	with contextlib.suppress(NotImplementedError):
	1042	context.set_alpn_protocols(['http/1.1'])
	1043
	1044	return YoutubeDLHTTPSHandler(params, context=context, **kwargs)
	1045
	1046
	1047	def bug_reports_message(before=';'):
	1048	from .update import REPOSITORY
	1049
	1050	msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
	1051	'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
	1052
	1053	before = before.rstrip()
	1054	if not before or before.endswith(('.', '!', '?')):
	1055	msg = msg[0].title() + msg[1:]
	1056
	1057	return (before + ' ' if before else '') + msg
	1058
	1059
	1060	class YoutubeDLError(Exception):
	1061	"""Base exception for YoutubeDL errors."""
	1062	msg = None
	1063
	1064	def __init__(self, msg=None):
	1065	if msg is not None:
	1066	self.msg = msg
	1067	elif self.msg is None:
	1068	self.msg = type(self).__name__
	1069	super().__init__(self.msg)
	1070
	1071
	1072	network_exceptions = [urllib.error.URLError, http.client.HTTPException, socket.error]
	1073	if hasattr(ssl, 'CertificateError'):
	1074	network_exceptions.append(ssl.CertificateError)
	1075	network_exceptions = tuple(network_exceptions)
	1076
	1077
	1078	class ExtractorError(YoutubeDLError):
	1079	"""Error during info extraction."""
	1080
	1081	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	1082	""" tb, if given, is the original traceback (so that it can be printed out).
	1083	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	1084	"""
	1085	if sys.exc_info()[0] in network_exceptions:
	1086	expected = True
	1087
	1088	self.orig_msg = str(msg)
	1089	self.traceback = tb
	1090	self.expected = expected
	1091	self.cause = cause
	1092	self.video_id = video_id
	1093	self.ie = ie
	1094	self.exc_info = sys.exc_info() # preserve original exception
	1095	if isinstance(self.exc_info[1], ExtractorError):
	1096	self.exc_info = self.exc_info[1].exc_info
	1097	super().__init__(self.__msg)
	1098
	1099	@property
	1100	def __msg(self):
	1101	return ''.join((
	1102	format_field(self.ie, None, '[%s] '),
	1103	format_field(self.video_id, None, '%s: '),
	1104	self.orig_msg,
	1105	format_field(self.cause, None, ' (caused by %r)'),
	1106	'' if self.expected else bug_reports_message()))
	1107
	1108	def format_traceback(self):
	1109	return join_nonempty(
	1110	self.traceback and ''.join(traceback.format_tb(self.traceback)),
	1111	self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
	1112	delim='\n') or None
	1113
	1114	def __setattr__(self, name, value):
	1115	super().__setattr__(name, value)
	1116	if getattr(self, 'msg', None) and name not in ('msg', 'args'):
	1117	self.msg = self.__msg or type(self).__name__
	1118	self.args = (self.msg, ) # Cannot be property
	1119
	1120
	1121	class UnsupportedError(ExtractorError):
	1122	def __init__(self, url):
	1123	super().__init__(
	1124	'Unsupported URL: %s' % url, expected=True)
	1125	self.url = url
	1126
	1127
	1128	class RegexNotFoundError(ExtractorError):
	1129	"""Error when a regex didn't match"""
	1130	pass
	1131
	1132
	1133	class GeoRestrictedError(ExtractorError):
	1134	"""Geographic restriction Error exception.
	1135
	1136	This exception may be thrown when a video is not available from your
	1137	geographic location due to geographic restrictions imposed by a website.
	1138	"""
	1139
	1140	def __init__(self, msg, countries=None, **kwargs):
	1141	kwargs['expected'] = True
	1142	super().__init__(msg, **kwargs)
	1143	self.countries = countries
	1144
	1145
	1146	class UserNotLive(ExtractorError):
	1147	"""Error when a channel/user is not live"""
	1148
	1149	def __init__(self, msg=None, **kwargs):
	1150	kwargs['expected'] = True
	1151	super().__init__(msg or 'The channel is not currently live', **kwargs)
	1152
	1153
	1154	class DownloadError(YoutubeDLError):
	1155	"""Download Error exception.
	1156
	1157	This exception may be thrown by FileDownloader objects if they are not
	1158	configured to continue on errors. They will contain the appropriate
	1159	error message.
	1160	"""
	1161
	1162	def __init__(self, msg, exc_info=None):
	1163	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1164	super().__init__(msg)
	1165	self.exc_info = exc_info
	1166
	1167
	1168	class EntryNotInPlaylist(YoutubeDLError):
	1169	"""Entry not in playlist exception.
	1170
	1171	This exception will be thrown by YoutubeDL when a requested entry
	1172	is not found in the playlist info_dict
	1173	"""
	1174	msg = 'Entry not found in info'
	1175
	1176
	1177	class SameFileError(YoutubeDLError):
	1178	"""Same File exception.
	1179
	1180	This exception will be thrown by FileDownloader objects if they detect
	1181	multiple files would have to be downloaded to the same file on disk.
	1182	"""
	1183	msg = 'Fixed output name but more than one file to download'
	1184
	1185	def __init__(self, filename=None):
	1186	if filename is not None:
	1187	self.msg += f': {filename}'
	1188	super().__init__(self.msg)
	1189
	1190
	1191	class PostProcessingError(YoutubeDLError):
	1192	"""Post Processing exception.
	1193
	1194	This exception may be raised by PostProcessor's .run() method to
	1195	indicate an error in the postprocessing task.
	1196	"""
	1197
	1198
	1199	class DownloadCancelled(YoutubeDLError):
	1200	""" Exception raised when the download queue should be interrupted """
	1201	msg = 'The download was cancelled'
	1202
	1203
	1204	class ExistingVideoReached(DownloadCancelled):
	1205	""" --break-on-existing triggered """
	1206	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1207
	1208
	1209	class RejectedVideoReached(DownloadCancelled):
	1210	""" --break-on-reject triggered """
	1211	msg = 'Encountered a video that did not match filter, stopping due to --break-on-reject'
	1212
	1213
	1214	class MaxDownloadsReached(DownloadCancelled):
	1215	""" --max-downloads limit has been reached. """
	1216	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1217
	1218
	1219	class ReExtractInfo(YoutubeDLError):
	1220	""" Video info needs to be re-extracted. """
	1221
	1222	def __init__(self, msg, expected=False):
	1223	super().__init__(msg)
	1224	self.expected = expected
	1225
	1226
	1227	class ThrottledDownload(ReExtractInfo):
	1228	""" Download speed below --throttled-rate. """
	1229	msg = 'The download speed is below throttle limit'
	1230
	1231	def __init__(self):
	1232	super().__init__(self.msg, expected=False)
	1233
	1234
	1235	class UnavailableVideoError(YoutubeDLError):
	1236	"""Unavailable Format exception.
	1237
	1238	This exception will be thrown when a video is requested
	1239	in a format that is not available for that video.
	1240	"""
	1241	msg = 'Unable to download video'
	1242
	1243	def __init__(self, err=None):
	1244	if err is not None:
	1245	self.msg += f': {err}'
	1246	super().__init__(self.msg)
	1247
	1248
	1249	class ContentTooShortError(YoutubeDLError):
	1250	"""Content Too Short exception.
	1251
	1252	This exception may be raised by FileDownloader objects when a file they
	1253	download is too small for what the server announced first, indicating
	1254	the connection was probably interrupted.
	1255	"""
	1256
	1257	def __init__(self, downloaded, expected):
	1258	super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
	1259	# Both in bytes
	1260	self.downloaded = downloaded
	1261	self.expected = expected
	1262
	1263
	1264	class XAttrMetadataError(YoutubeDLError):
	1265	def __init__(self, code=None, msg='Unknown error'):
	1266	super().__init__(msg)
	1267	self.code = code
	1268	self.msg = msg
	1269
	1270	# Parsing code and msg
	1271	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1272	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1273	self.reason = 'NO_SPACE'
	1274	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1275	self.reason = 'VALUE_TOO_LONG'
	1276	else:
	1277	self.reason = 'NOT_SUPPORTED'
	1278
	1279
	1280	class XAttrUnavailableError(YoutubeDLError):
	1281	pass
	1282
	1283
	1284	def _create_http_connection(ydl_handler, http_class, is_https, args, *kwargs):
	1285	hc = http_class(args, *kwargs)
	1286	source_address = ydl_handler._params.get('source_address')
	1287
	1288	if source_address is not None:
	1289	# This is to workaround _create_connection() from socket where it will try all
	1290	# address data from getaddrinfo() including IPv6. This filters the result from
	1291	# getaddrinfo() based on the source_address value.
	1292	# This is based on the cpython socket.create_connection() function.
	1293	# https://github.com/python/cpython/blob/master/Lib/socket.py#L691
	1294	def _create_connection(address, timeout=socket._GLOBAL_DEFAULT_TIMEOUT, source_address=None):
	1295	host, port = address
	1296	err = None
	1297	addrs = socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM)
	1298	af = socket.AF_INET if '.' in source_address[0] else socket.AF_INET6
	1299	ip_addrs = [addr for addr in addrs if addr[0] == af]
	1300	if addrs and not ip_addrs:
	1301	ip_version = 'v4' if af == socket.AF_INET else 'v6'
	1302	raise OSError(
	1303	"No remote IP%s addresses available for connect, can't use '%s' as source address"
	1304	% (ip_version, source_address[0]))
	1305	for res in ip_addrs:
	1306	af, socktype, proto, canonname, sa = res
	1307	sock = None
	1308	try:
	1309	sock = socket.socket(af, socktype, proto)
	1310	if timeout is not socket._GLOBAL_DEFAULT_TIMEOUT:
	1311	sock.settimeout(timeout)
	1312	sock.bind(source_address)
	1313	sock.connect(sa)
	1314	err = None # Explicitly break reference cycle
	1315	return sock
	1316	except OSError as _:
	1317	err = _
	1318	if sock is not None:
	1319	sock.close()
	1320	if err is not None:
	1321	raise err
	1322	else:
	1323	raise OSError('getaddrinfo returns an empty list')
	1324	if hasattr(hc, '_create_connection'):
	1325	hc._create_connection = _create_connection
	1326	hc.source_address = (source_address, 0)
	1327
	1328	return hc
	1329
	1330
	1331	def handle_youtubedl_headers(headers):
	1332	filtered_headers = headers
	1333
	1334	if 'Youtubedl-no-compression' in filtered_headers:
	1335	filtered_headers = {k: v for k, v in filtered_headers.items() if k.lower() != 'accept-encoding'}
	1336	del filtered_headers['Youtubedl-no-compression']
	1337
	1338	return filtered_headers
	1339
	1340
	1341	class YoutubeDLHandler(urllib.request.HTTPHandler):
	1342	"""Handler for HTTP requests and responses.
	1343
	1344	This class, when installed with an OpenerDirector, automatically adds
	1345	the standard headers to every HTTP request and handles gzipped and
	1346	deflated responses from web servers. If compression is to be avoided in
	1347	a particular request, the original request in the program code only has
	1348	to include the HTTP header "Youtubedl-no-compression", which will be
	1349	removed before making the real request.
	1350
	1351	Part of this code was copied from:
	1352
	1353	http://techknack.net/python-urllib2-handlers/
	1354
	1355	Andrew Rowls, the author of that code, agreed to release it to the
	1356	public domain.
	1357	"""
	1358
	1359	def __init__(self, params, args, *kwargs):
	1360	urllib.request.HTTPHandler.__init__(self, args, *kwargs)
	1361	self._params = params
	1362
	1363	def http_open(self, req):
	1364	conn_class = http.client.HTTPConnection
	1365
	1366	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1367	if socks_proxy:
	1368	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1369	del req.headers['Ytdl-socks-proxy']
	1370
	1371	return self.do_open(functools.partial(
	1372	_create_http_connection, self, conn_class, False),
	1373	req)
	1374
	1375	@staticmethod
	1376	def deflate(data):
	1377	if not data:
	1378	return data
	1379	try:
	1380	return zlib.decompress(data, -zlib.MAX_WBITS)
	1381	except zlib.error:
	1382	return zlib.decompress(data)
	1383
	1384	@staticmethod
	1385	def brotli(data):
	1386	if not data:
	1387	return data
	1388	return brotli.decompress(data)
	1389
	1390	def http_request(self, req):
	1391	# According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
	1392	# always respected by websites, some tend to give out URLs with non percent-encoded
	1393	# non-ASCII characters (see telemb.py, ard.py [#3412])
	1394	# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
	1395	# To work around aforementioned issue we will replace request's original URL with
	1396	# percent-encoded one
	1397	# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
	1398	# the code of this workaround has been moved here from YoutubeDL.urlopen()
	1399	url = req.get_full_url()
	1400	url_escaped = escape_url(url)
	1401
	1402	# Substitute URL if any change after escaping
	1403	if url != url_escaped:
	1404	req = update_Request(req, url=url_escaped)
	1405
	1406	for h, v in self._params.get('http_headers', std_headers).items():
	1407	# Capitalize is needed because of Python bug 2275: http://bugs.python.org/issue2275
	1408	# The dict keys are capitalized because of this bug by urllib
	1409	if h.capitalize() not in req.headers:
	1410	req.add_header(h, v)
	1411
	1412	if 'Accept-encoding' not in req.headers:
	1413	req.add_header('Accept-encoding', ', '.join(SUPPORTED_ENCODINGS))
	1414
	1415	req.headers = handle_youtubedl_headers(req.headers)
	1416
	1417	return super().do_request_(req)
	1418
	1419	def http_response(self, req, resp):
	1420	old_resp = resp
	1421	# gzip
	1422	if resp.headers.get('Content-encoding', '') == 'gzip':
	1423	content = resp.read()
	1424	gz = gzip.GzipFile(fileobj=io.BytesIO(content), mode='rb')
	1425	try:
	1426	uncompressed = io.BytesIO(gz.read())
	1427	except OSError as original_ioerror:
	1428	# There may be junk add the end of the file
	1429	# See http://stackoverflow.com/q/4928560/35070 for details
	1430	for i in range(1, 1024):
	1431	try:
	1432	gz = gzip.GzipFile(fileobj=io.BytesIO(content[:-i]), mode='rb')
	1433	uncompressed = io.BytesIO(gz.read())
	1434	except OSError:
	1435	continue
	1436	break
	1437	else:
	1438	raise original_ioerror
	1439	resp = urllib.request.addinfourl(uncompressed, old_resp.headers, old_resp.url, old_resp.code)
	1440	resp.msg = old_resp.msg
	1441	del resp.headers['Content-encoding']
	1442	# deflate
	1443	if resp.headers.get('Content-encoding', '') == 'deflate':
	1444	gz = io.BytesIO(self.deflate(resp.read()))
	1445	resp = urllib.request.addinfourl(gz, old_resp.headers, old_resp.url, old_resp.code)
	1446	resp.msg = old_resp.msg
	1447	del resp.headers['Content-encoding']
	1448	# brotli
	1449	if resp.headers.get('Content-encoding', '') == 'br':
	1450	resp = urllib.request.addinfourl(
	1451	io.BytesIO(self.brotli(resp.read())), old_resp.headers, old_resp.url, old_resp.code)
	1452	resp.msg = old_resp.msg
	1453	del resp.headers['Content-encoding']
	1454	# Percent-encode redirect URL of Location HTTP header to satisfy RFC 3986 (see
	1455	# https://github.com/ytdl-org/youtube-dl/issues/6457).
	1456	if 300 <= resp.code < 400:
	1457	location = resp.headers.get('Location')
	1458	if location:
	1459	# As of RFC 2616 default charset is iso-8859-1 that is respected by python 3
	1460	location = location.encode('iso-8859-1').decode()
	1461	location_escaped = escape_url(location)
	1462	if location != location_escaped:
	1463	del resp.headers['Location']
	1464	resp.headers['Location'] = location_escaped
	1465	return resp
	1466
	1467	https_request = http_request
	1468	https_response = http_response
	1469
	1470
	1471	def make_socks_conn_class(base_class, socks_proxy):
	1472	assert issubclass(base_class, (
	1473	http.client.HTTPConnection, http.client.HTTPSConnection))
	1474
	1475	url_components = urllib.parse.urlparse(socks_proxy)
	1476	if url_components.scheme.lower() == 'socks5':
	1477	socks_type = ProxyType.SOCKS5
	1478	elif url_components.scheme.lower() in ('socks', 'socks4'):
	1479	socks_type = ProxyType.SOCKS4
	1480	elif url_components.scheme.lower() == 'socks4a':
	1481	socks_type = ProxyType.SOCKS4A
	1482
	1483	def unquote_if_non_empty(s):
	1484	if not s:
	1485	return s
	1486	return urllib.parse.unquote_plus(s)
	1487
	1488	proxy_args = (
	1489	socks_type,
	1490	url_components.hostname, url_components.port or 1080,
	1491	True, # Remote DNS
	1492	unquote_if_non_empty(url_components.username),
	1493	unquote_if_non_empty(url_components.password),
	1494	)
	1495
	1496	class SocksConnection(base_class):
	1497	def connect(self):
	1498	self.sock = sockssocket()
	1499	self.sock.setproxy(*proxy_args)
	1500	if isinstance(self.timeout, (int, float)):
	1501	self.sock.settimeout(self.timeout)
	1502	self.sock.connect((self.host, self.port))
	1503
	1504	if isinstance(self, http.client.HTTPSConnection):
	1505	if hasattr(self, '_context'): # Python > 2.6
	1506	self.sock = self._context.wrap_socket(
	1507	self.sock, server_hostname=self.host)
	1508	else:
	1509	self.sock = ssl.wrap_socket(self.sock)
	1510
	1511	return SocksConnection
	1512
	1513
	1514	class YoutubeDLHTTPSHandler(urllib.request.HTTPSHandler):
	1515	def __init__(self, params, https_conn_class=None, args, *kwargs):
	1516	urllib.request.HTTPSHandler.__init__(self, args, *kwargs)
	1517	self._https_conn_class = https_conn_class or http.client.HTTPSConnection
	1518	self._params = params
	1519
	1520	def https_open(self, req):
	1521	kwargs = {}
	1522	conn_class = self._https_conn_class
	1523
	1524	if hasattr(self, '_context'): # python > 2.6
	1525	kwargs['context'] = self._context
	1526	if hasattr(self, '_check_hostname'): # python 3.x
	1527	kwargs['check_hostname'] = self._check_hostname
	1528
	1529	socks_proxy = req.headers.get('Ytdl-socks-proxy')
	1530	if socks_proxy:
	1531	conn_class = make_socks_conn_class(conn_class, socks_proxy)
	1532	del req.headers['Ytdl-socks-proxy']
	1533
	1534	try:
	1535	return self.do_open(
	1536	functools.partial(_create_http_connection, self, conn_class, True), req, **kwargs)
	1537	except urllib.error.URLError as e:
	1538	if (isinstance(e.reason, ssl.SSLError)
	1539	and getattr(e.reason, 'reason', None) == 'SSLV3_ALERT_HANDSHAKE_FAILURE'):
	1540	raise YoutubeDLError('SSLV3_ALERT_HANDSHAKE_FAILURE: Try using --legacy-server-connect')
	1541	raise
	1542
	1543
	1544	def is_path_like(f):
	1545	return isinstance(f, (str, bytes, os.PathLike))
	1546
	1547
	1548	class YoutubeDLCookieJar(http.cookiejar.MozillaCookieJar):
	1549	"""
	1550	See [1] for cookie file format.
	1551
	1552	1. https://curl.haxx.se/docs/http-cookies.html
	1553	"""
	1554	_HTTPONLY_PREFIX = '#HttpOnly_'
	1555	_ENTRY_LEN = 7
	1556	_HEADER = '''# Netscape HTTP Cookie File
	1557	# This file is generated by yt-dlp. Do not edit.
	1558
	1559	'''
	1560	_CookieFileEntry = collections.namedtuple(
	1561	'CookieFileEntry',
	1562	('domain_name', 'include_subdomains', 'path', 'https_only', 'expires_at', 'name', 'value'))
	1563
	1564	def __init__(self, filename=None, args, *kwargs):
	1565	super().__init__(None, args, *kwargs)
	1566	if is_path_like(filename):
	1567	filename = os.fspath(filename)
	1568	self.filename = filename
	1569
	1570	@staticmethod
	1571	def _true_or_false(cndn):
	1572	return 'TRUE' if cndn else 'FALSE'
	1573
	1574	@contextlib.contextmanager
	1575	def open(self, file, *, write=False):
	1576	if is_path_like(file):
	1577	with open(file, 'w' if write else 'r', encoding='utf-8') as f:
	1578	yield f
	1579	else:
	1580	if write:
	1581	file.truncate(0)
	1582	yield file
	1583
	1584	def _really_save(self, f, ignore_discard=False, ignore_expires=False):
	1585	now = time.time()
	1586	for cookie in self:
	1587	if (not ignore_discard and cookie.discard
	1588	or not ignore_expires and cookie.is_expired(now)):
	1589	continue
	1590	name, value = cookie.name, cookie.value
	1591	if value is None:
	1592	# cookies.txt regards 'Set-Cookie: foo' as a cookie
	1593	# with no name, whereas http.cookiejar regards it as a
	1594	# cookie with no value.
	1595	name, value = '', name
	1596	f.write('%s\n' % '\t'.join((
	1597	cookie.domain,
	1598	self._true_or_false(cookie.domain.startswith('.')),
	1599	cookie.path,
	1600	self._true_or_false(cookie.secure),
	1601	str_or_none(cookie.expires, default=''),
	1602	name, value
	1603	)))
	1604
	1605	def save(self, filename=None, args, *kwargs):
	1606	"""
	1607	Save cookies to a file.
	1608	Code is taken from CPython 3.6
	1609	https://github.com/python/cpython/blob/8d999cbf4adea053be6dbb612b9844635c4dfb8e/Lib/http/cookiejar.py#L2091-L2117 """
	1610
	1611	if filename is None:
	1612	if self.filename is not None:
	1613	filename = self.filename
	1614	else:
	1615	raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
	1616
	1617	# Store session cookies with `expires` set to 0 instead of an empty string
	1618	for cookie in self:
	1619	if cookie.expires is None:
	1620	cookie.expires = 0
	1621
	1622	with self.open(filename, write=True) as f:
	1623	f.write(self._HEADER)
	1624	self._really_save(f, args, *kwargs)
	1625
	1626	def load(self, filename=None, ignore_discard=False, ignore_expires=False):
	1627	"""Load cookies from a file."""
	1628	if filename is None:
	1629	if self.filename is not None:
	1630	filename = self.filename
	1631	else:
	1632	raise ValueError(http.cookiejar.MISSING_FILENAME_TEXT)
	1633
	1634	def prepare_line(line):
	1635	if line.startswith(self._HTTPONLY_PREFIX):
	1636	line = line[len(self._HTTPONLY_PREFIX):]
	1637	# comments and empty lines are fine
	1638	if line.startswith('#') or not line.strip():
	1639	return line
	1640	cookie_list = line.split('\t')
	1641	if len(cookie_list) != self._ENTRY_LEN:
	1642	raise http.cookiejar.LoadError('invalid length %d' % len(cookie_list))
	1643	cookie = self._CookieFileEntry(*cookie_list)
	1644	if cookie.expires_at and not cookie.expires_at.isdigit():
	1645	raise http.cookiejar.LoadError('invalid expires at %s' % cookie.expires_at)
	1646	return line
	1647
	1648	cf = io.StringIO()
	1649	with self.open(filename) as f:
	1650	for line in f:
	1651	try:
	1652	cf.write(prepare_line(line))
	1653	except http.cookiejar.LoadError as e:
	1654	if f'{line.strip()} '[0] in '[{"':
	1655	raise http.cookiejar.LoadError(
	1656	'Cookies file must be Netscape formatted, not JSON. See '
	1657	'https://github.com/yt-dlp/yt-dlp/wiki/FAQ#how-do-i-pass-cookies-to-yt-dlp')
	1658	write_string(f'WARNING: skipping cookie file entry due to {e}: {line!r}\n')
	1659	continue
	1660	cf.seek(0)
	1661	self._really_load(cf, filename, ignore_discard, ignore_expires)
	1662	# Session cookies are denoted by either `expires` field set to
	1663	# an empty string or 0. MozillaCookieJar only recognizes the former
	1664	# (see [1]). So we need force the latter to be recognized as session
	1665	# cookies on our own.
	1666	# Session cookies may be important for cookies-based authentication,
	1667	# e.g. usually, when user does not check 'Remember me' check box while
	1668	# logging in on a site, some important cookies are stored as session
	1669	# cookies so that not recognizing them will result in failed login.
	1670	# 1. https://bugs.python.org/issue17164
	1671	for cookie in self:
	1672	# Treat `expires=0` cookies as session cookies
	1673	if cookie.expires == 0:
	1674	cookie.expires = None
	1675	cookie.discard = True
	1676
	1677
	1678	class YoutubeDLCookieProcessor(urllib.request.HTTPCookieProcessor):
	1679	def __init__(self, cookiejar=None):
	1680	urllib.request.HTTPCookieProcessor.__init__(self, cookiejar)
	1681
	1682	def http_response(self, request, response):
	1683	return urllib.request.HTTPCookieProcessor.http_response(self, request, response)
	1684
	1685	https_request = urllib.request.HTTPCookieProcessor.http_request
	1686	https_response = http_response
	1687
	1688
	1689	class YoutubeDLRedirectHandler(urllib.request.HTTPRedirectHandler):
	1690	"""YoutubeDL redirect handler
	1691
	1692	The code is based on HTTPRedirectHandler implementation from CPython [1].
	1693
	1694	This redirect handler solves two issues:
	1695	- ensures redirect URL is always unicode under python 2
	1696	- introduces support for experimental HTTP response status code
	1697	308 Permanent Redirect [2] used by some sites [3]
	1698
	1699	1. https://github.com/python/cpython/blob/master/Lib/urllib/request.py
	1700	2. https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/308
	1701	3. https://github.com/ytdl-org/youtube-dl/issues/28768
	1702	"""
	1703
	1704	http_error_301 = http_error_303 = http_error_307 = http_error_308 = urllib.request.HTTPRedirectHandler.http_error_302
	1705
	1706	def redirect_request(self, req, fp, code, msg, headers, newurl):
	1707	"""Return a Request or None in response to a redirect.
	1708
	1709	This is called by the http_error_30x methods when a
	1710	redirection response is received. If a redirection should
	1711	take place, return a new Request to allow http_error_30x to
	1712	perform the redirect. Otherwise, raise HTTPError if no-one
	1713	else should try to handle this url. Return None if you can't
	1714	but another Handler might.
	1715	"""
	1716	m = req.get_method()
	1717	if (not (code in (301, 302, 303, 307, 308) and m in ("GET", "HEAD")
	1718	or code in (301, 302, 303) and m == "POST")):
	1719	raise urllib.error.HTTPError(req.full_url, code, msg, headers, fp)
	1720	# Strictly (according to RFC 2616), 301 or 302 in response to
	1721	# a POST MUST NOT cause a redirection without confirmation
	1722	# from the user (of urllib.request, in this case). In practice,
	1723	# essentially all clients do redirect in this case, so we do
	1724	# the same.
	1725
	1726	# Be conciliant with URIs containing a space. This is mainly
	1727	# redundant with the more complete encoding done in http_error_302(),
	1728	# but it is kept for compatibility with other callers.
	1729	newurl = newurl.replace(' ', '%20')
	1730
	1731	CONTENT_HEADERS = ("content-length", "content-type")
	1732	# NB: don't use dict comprehension for python 2.6 compatibility
	1733	newheaders = {k: v for k, v in req.headers.items() if k.lower() not in CONTENT_HEADERS}
	1734
	1735	# A 303 must either use GET or HEAD for subsequent request
	1736	# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.4
	1737	if code == 303 and m != 'HEAD':
	1738	m = 'GET'
	1739	# 301 and 302 redirects are commonly turned into a GET from a POST
	1740	# for subsequent requests by browsers, so we'll do the same.
	1741	# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.2
	1742	# https://datatracker.ietf.org/doc/html/rfc7231#section-6.4.3
	1743	if code in (301, 302) and m == 'POST':
	1744	m = 'GET'
	1745
	1746	return urllib.request.Request(
	1747	newurl, headers=newheaders, origin_req_host=req.origin_req_host,
	1748	unverifiable=True, method=m)
	1749
	1750
	1751	def extract_timezone(date_str):
	1752	m = re.search(
	1753	r'''(?x)
	1754	^.{8,}? # >=8 char non-TZ prefix, if present
	1755	(?P<tz>Z\| # just the UTC Z, or
	1756	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1757	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1758	[ ]? # optional space
	1759	(?P<sign>\+\|-) # +/-
	1760	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1761	$)
	1762	''', date_str)
	1763	if not m:
	1764	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1765	timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
	1766	if timezone is not None:
	1767	date_str = date_str[:-len(m.group('tz'))]
	1768	timezone = datetime.timedelta(hours=timezone or 0)
	1769	else:
	1770	date_str = date_str[:-len(m.group('tz'))]
	1771	if not m.group('sign'):
	1772	timezone = datetime.timedelta()
	1773	else:
	1774	sign = 1 if m.group('sign') == '+' else -1
	1775	timezone = datetime.timedelta(
	1776	hours=sign * int(m.group('hours')),
	1777	minutes=sign * int(m.group('minutes')))
	1778	return timezone, date_str
	1779
	1780
	1781	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1782	""" Return a UNIX timestamp from the given date """
	1783
	1784	if date_str is None:
	1785	return None
	1786
	1787	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1788
	1789	if timezone is None:
	1790	timezone, date_str = extract_timezone(date_str)
	1791
	1792	with contextlib.suppress(ValueError):
	1793	date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
	1794	dt = datetime.datetime.strptime(date_str, date_format) - timezone
	1795	return calendar.timegm(dt.timetuple())
	1796
	1797
	1798	def date_formats(day_first=True):
	1799	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1800
	1801
	1802	def unified_strdate(date_str, day_first=True):
	1803	"""Return a string with the date in the format YYYYMMDD"""
	1804
	1805	if date_str is None:
	1806	return None
	1807	upload_date = None
	1808	# Replace commas
	1809	date_str = date_str.replace(',', ' ')
	1810	# Remove AM/PM + timezone
	1811	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1812	_, date_str = extract_timezone(date_str)
	1813
	1814	for expression in date_formats(day_first):
	1815	with contextlib.suppress(ValueError):
	1816	upload_date = datetime.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1817	if upload_date is None:
	1818	timetuple = email.utils.parsedate_tz(date_str)
	1819	if timetuple:
	1820	with contextlib.suppress(ValueError):
	1821	upload_date = datetime.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1822	if upload_date is not None:
	1823	return str(upload_date)
	1824
	1825
	1826	def unified_timestamp(date_str, day_first=True):
	1827	if date_str is None:
	1828	return None
	1829
	1830	date_str = re.sub(r'\s+', ' ', re.sub(
	1831	r'(?i)[,\|]\|(mon\|tues?\|wed(nes)?\|thu(rs)?\|fri\|sat(ur)?)(day)?', '', date_str))
	1832
	1833	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1834	timezone, date_str = extract_timezone(date_str)
	1835
	1836	# Remove AM/PM + timezone
	1837	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1838
	1839	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1840	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1841	if m:
	1842	date_str = date_str[:-len(m.group('tz'))]
	1843
	1844	# Python only supports microseconds, so remove nanoseconds
	1845	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1846	if m:
	1847	date_str = m.group(1)
	1848
	1849	for expression in date_formats(day_first):
	1850	with contextlib.suppress(ValueError):
	1851	dt = datetime.datetime.strptime(date_str, expression) - timezone + datetime.timedelta(hours=pm_delta)
	1852	return calendar.timegm(dt.timetuple())
	1853
	1854	timetuple = email.utils.parsedate_tz(date_str)
	1855	if timetuple:
	1856	return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
	1857
	1858
	1859	def determine_ext(url, default_ext='unknown_video'):
	1860	if url is None or '.' not in url:
	1861	return default_ext
	1862	guess = url.partition('?')[0].rpartition('.')[2]
	1863	if re.match(r'^[A-Za-z0-9]+$', guess):
	1864	return guess
	1865	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1866	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1867	return guess.rstrip('/')
	1868	else:
	1869	return default_ext
	1870
	1871
	1872	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1873	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1874
	1875
	1876	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1877	R"""
	1878	Return a datetime object from a string.
	1879	Supported format:
	1880	(now\|today\|yesterday\|DATE)([+-]\d+(microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?)?
	1881
	1882	@param format strftime format of DATE
	1883	@param precision Round the datetime object: auto\|microsecond\|second\|minute\|hour\|day
	1884	auto: round to the unit provided in date_str (if applicable).
	1885	"""
	1886	auto_precision = False
	1887	if precision == 'auto':
	1888	auto_precision = True
	1889	precision = 'microsecond'
	1890	today = datetime_round(datetime.datetime.utcnow(), precision)
	1891	if date_str in ('now', 'today'):
	1892	return today
	1893	if date_str == 'yesterday':
	1894	return today - datetime.timedelta(days=1)
	1895	match = re.match(
	1896	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?',
	1897	date_str)
	1898	if match is not None:
	1899	start_time = datetime_from_str(match.group('start'), precision, format)
	1900	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1901	unit = match.group('unit')
	1902	if unit == 'month' or unit == 'year':
	1903	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1904	unit = 'day'
	1905	else:
	1906	if unit == 'week':
	1907	unit = 'day'
	1908	time *= 7
	1909	delta = datetime.timedelta(**{unit + 's': time})
	1910	new_date = start_time + delta
	1911	if auto_precision:
	1912	return datetime_round(new_date, unit)
	1913	return new_date
	1914
	1915	return datetime_round(datetime.datetime.strptime(date_str, format), precision)
	1916
	1917
	1918	def date_from_str(date_str, format='%Y%m%d', strict=False):
	1919	R"""
	1920	Return a date object from a string using datetime_from_str
	1921
	1922	@param strict Restrict allowed patterns to "YYYYMMDD" and
	1923	(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?
	1924	"""
	1925	if strict and not re.fullmatch(r'\d{8}\|(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?', date_str):
	1926	raise ValueError(f'Invalid date format "{date_str}"')
	1927	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1928
	1929
	1930	def datetime_add_months(dt, months):
	1931	"""Increment/Decrement a datetime object by months."""
	1932	month = dt.month + months - 1
	1933	year = dt.year + month // 12
	1934	month = month % 12 + 1
	1935	day = min(dt.day, calendar.monthrange(year, month)[1])
	1936	return dt.replace(year, month, day)
	1937
	1938
	1939	def datetime_round(dt, precision='day'):
	1940	"""
	1941	Round a datetime object's time to a specific precision
	1942	"""
	1943	if precision == 'microsecond':
	1944	return dt
	1945
	1946	unit_seconds = {
	1947	'day': 86400,
	1948	'hour': 3600,
	1949	'minute': 60,
	1950	'second': 1,
	1951	}
	1952	roundto = lambda x, n: ((x + n / 2) // n) * n
	1953	timestamp = calendar.timegm(dt.timetuple())
	1954	return datetime.datetime.utcfromtimestamp(roundto(timestamp, unit_seconds[precision]))
	1955
	1956
	1957	def hyphenate_date(date_str):
	1958	"""
	1959	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1960	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1961	if match is not None:
	1962	return '-'.join(match.groups())
	1963	else:
	1964	return date_str
	1965
	1966
	1967	class DateRange:
	1968	"""Represents a time interval between two dates"""
	1969
	1970	def __init__(self, start=None, end=None):
	1971	"""start and end must be strings in the format accepted by date"""
	1972	if start is not None:
	1973	self.start = date_from_str(start, strict=True)
	1974	else:
	1975	self.start = datetime.datetime.min.date()
	1976	if end is not None:
	1977	self.end = date_from_str(end, strict=True)
	1978	else:
	1979	self.end = datetime.datetime.max.date()
	1980	if self.start > self.end:
	1981	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1982
	1983	@classmethod
	1984	def day(cls, day):
	1985	"""Returns a range that only contains the given day"""
	1986	return cls(day, day)
	1987
	1988	def __contains__(self, date):
	1989	"""Check if the date is in the range"""
	1990	if not isinstance(date, datetime.date):
	1991	date = date_from_str(date)
	1992	return self.start <= date <= self.end
	1993
	1994	def __str__(self):
	1995	return f'{self.start.isoformat()} - {self.end.isoformat()}'
	1996
	1997	def __eq__(self, other):
	1998	return (isinstance(other, DateRange)
	1999	and self.start == other.start and self.end == other.end)
	2000
	2001
	2002	def platform_name():
	2003	""" Returns the platform name as a str """
	2004	deprecation_warning(f'"{__name__}.platform_name" is deprecated, use "platform.platform" instead')
	2005	return platform.platform()
	2006
	2007
	2008	@functools.cache
	2009	def system_identifier():
	2010	python_implementation = platform.python_implementation()
	2011	if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
	2012	python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
	2013	libc_ver = []
	2014	with contextlib.suppress(OSError): # We may not have access to the executable
	2015	libc_ver = platform.libc_ver()
	2016
	2017	return 'Python %s (%s %s %s) - %s (%s%s)' % (
	2018	platform.python_version(),
	2019	python_implementation,
	2020	platform.machine(),
	2021	platform.architecture()[0],
	2022	platform.platform(),
	2023	ssl.OPENSSL_VERSION,
	2024	format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
	2025	)
	2026
	2027
	2028	@functools.cache
	2029	def get_windows_version():
	2030	''' Get Windows version. returns () if it's not running on Windows '''
	2031	if compat_os_name == 'nt':
	2032	return version_tuple(platform.win32_ver()[1])
	2033	else:
	2034	return ()
	2035
	2036
	2037	def write_string(s, out=None, encoding=None):
	2038	assert isinstance(s, str)
	2039	out = out or sys.stderr
	2040
	2041	if compat_os_name == 'nt' and supports_terminal_sequences(out):
	2042	s = re.sub(r'([\r\n]+)', r' \1', s)
	2043
	2044	enc, buffer = None, out
	2045	if 'b' in getattr(out, 'mode', ''):
	2046	enc = encoding or preferredencoding()
	2047	elif hasattr(out, 'buffer'):
	2048	buffer = out.buffer
	2049	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	2050
	2051	buffer.write(s.encode(enc, 'ignore') if enc else s)
	2052	out.flush()
	2053
	2054
	2055	def deprecation_warning(msg, , printer=None, stacklevel=0, *kwargs):
	2056	from . import _IN_CLI
	2057	if _IN_CLI:
	2058	if msg in deprecation_warning._cache:
	2059	return
	2060	deprecation_warning._cache.add(msg)
	2061	if printer:
	2062	return printer(f'{msg}{bug_reports_message()}', **kwargs)
	2063	return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
	2064	else:
	2065	import warnings
	2066	warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
	2067
	2068
	2069	deprecation_warning._cache = set()
	2070
	2071
	2072	def bytes_to_intlist(bs):
	2073	if not bs:
	2074	return []
	2075	if isinstance(bs[0], int): # Python 3
	2076	return list(bs)
	2077	else:
	2078	return [ord(c) for c in bs]
	2079
	2080
	2081	def intlist_to_bytes(xs):
	2082	if not xs:
	2083	return b''
	2084	return struct.pack('%dB' % len(xs), *xs)
	2085
	2086
	2087	class LockingUnsupportedError(OSError):
	2088	msg = 'File locking is not supported'
	2089
	2090	def __init__(self):
	2091	super().__init__(self.msg)
	2092
	2093
	2094	# Cross-platform file locking
	2095	if sys.platform == 'win32':
	2096	import ctypes
	2097	import ctypes.wintypes
	2098	import msvcrt
	2099
	2100	class OVERLAPPED(ctypes.Structure):
	2101	_fields_ = [
	2102	('Internal', ctypes.wintypes.LPVOID),
	2103	('InternalHigh', ctypes.wintypes.LPVOID),
	2104	('Offset', ctypes.wintypes.DWORD),
	2105	('OffsetHigh', ctypes.wintypes.DWORD),
	2106	('hEvent', ctypes.wintypes.HANDLE),
	2107	]
	2108
	2109	kernel32 = ctypes.WinDLL('kernel32')
	2110	LockFileEx = kernel32.LockFileEx
	2111	LockFileEx.argtypes = [
	2112	ctypes.wintypes.HANDLE, # hFile
	2113	ctypes.wintypes.DWORD, # dwFlags
	2114	ctypes.wintypes.DWORD, # dwReserved
	2115	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2116	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2117	ctypes.POINTER(OVERLAPPED) # Overlapped
	2118	]
	2119	LockFileEx.restype = ctypes.wintypes.BOOL
	2120	UnlockFileEx = kernel32.UnlockFileEx
	2121	UnlockFileEx.argtypes = [
	2122	ctypes.wintypes.HANDLE, # hFile
	2123	ctypes.wintypes.DWORD, # dwReserved
	2124	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	2125	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	2126	ctypes.POINTER(OVERLAPPED) # Overlapped
	2127	]
	2128	UnlockFileEx.restype = ctypes.wintypes.BOOL
	2129	whole_low = 0xffffffff
	2130	whole_high = 0x7fffffff
	2131
	2132	def _lock_file(f, exclusive, block):
	2133	overlapped = OVERLAPPED()
	2134	overlapped.Offset = 0
	2135	overlapped.OffsetHigh = 0
	2136	overlapped.hEvent = 0
	2137	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	2138
	2139	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	2140	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	2141	0, whole_low, whole_high, f._lock_file_overlapped_p):
	2142	# NB: No argument form of "ctypes.FormatError" does not work on PyPy
	2143	raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
	2144
	2145	def _unlock_file(f):
	2146	assert f._lock_file_overlapped_p
	2147	handle = msvcrt.get_osfhandle(f.fileno())
	2148	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
	2149	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	2150
	2151	else:
	2152	try:
	2153	import fcntl
	2154
	2155	def _lock_file(f, exclusive, block):
	2156	flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
	2157	if not block:
	2158	flags \|= fcntl.LOCK_NB
	2159	try:
	2160	fcntl.flock(f, flags)
	2161	except BlockingIOError:
	2162	raise
	2163	except OSError: # AOSP does not have flock()
	2164	fcntl.lockf(f, flags)
	2165
	2166	def _unlock_file(f):
	2167	try:
	2168	fcntl.flock(f, fcntl.LOCK_UN)
	2169	except OSError:
	2170	fcntl.lockf(f, fcntl.LOCK_UN)
	2171
	2172	except ImportError:
	2173
	2174	def _lock_file(f, exclusive, block):
	2175	raise LockingUnsupportedError()
	2176
	2177	def _unlock_file(f):
	2178	raise LockingUnsupportedError()
	2179
	2180
	2181	class locked_file:
	2182	locked = False
	2183
	2184	def __init__(self, filename, mode, block=True, encoding=None):
	2185	if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
	2186	raise NotImplementedError(mode)
	2187	self.mode, self.block = mode, block
	2188
	2189	writable = any(f in mode for f in 'wax+')
	2190	readable = any(f in mode for f in 'r+')
	2191	flags = functools.reduce(operator.ior, (
	2192	getattr(os, 'O_CLOEXEC', 0), # UNIX only
	2193	getattr(os, 'O_BINARY', 0), # Windows only
	2194	getattr(os, 'O_NOINHERIT', 0), # Windows only
	2195	os.O_CREAT if writable else 0, # O_TRUNC only after locking
	2196	os.O_APPEND if 'a' in mode else 0,
	2197	os.O_EXCL if 'x' in mode else 0,
	2198	os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
	2199	))
	2200
	2201	self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
	2202
	2203	def __enter__(self):
	2204	exclusive = 'r' not in self.mode
	2205	try:
	2206	_lock_file(self.f, exclusive, self.block)
	2207	self.locked = True
	2208	except OSError:
	2209	self.f.close()
	2210	raise
	2211	if 'w' in self.mode:
	2212	try:
	2213	self.f.truncate()
	2214	except OSError as e:
	2215	if e.errno not in (
	2216	errno.ESPIPE, # Illegal seek - expected for FIFO
	2217	errno.EINVAL, # Invalid argument - expected for /dev/null
	2218	):
	2219	raise
	2220	return self
	2221
	2222	def unlock(self):
	2223	if not self.locked:
	2224	return
	2225	try:
	2226	_unlock_file(self.f)
	2227	finally:
	2228	self.locked = False
	2229
	2230	def __exit__(self, *_):
	2231	try:
	2232	self.unlock()
	2233	finally:
	2234	self.f.close()
	2235
	2236	open = __enter__
	2237	close = __exit__
	2238
	2239	def __getattr__(self, attr):
	2240	return getattr(self.f, attr)
	2241
	2242	def __iter__(self):
	2243	return iter(self.f)
	2244
	2245
	2246	@functools.cache
	2247	def get_filesystem_encoding():
	2248	encoding = sys.getfilesystemencoding()
	2249	return encoding if encoding is not None else 'utf-8'
	2250
	2251
	2252	def shell_quote(args):
	2253	quoted_args = []
	2254	encoding = get_filesystem_encoding()
	2255	for a in args:
	2256	if isinstance(a, bytes):
	2257	# We may get a filename encoded with 'encodeFilename'
	2258	a = a.decode(encoding)
	2259	quoted_args.append(compat_shlex_quote(a))
	2260	return ' '.join(quoted_args)
	2261
	2262
	2263	def smuggle_url(url, data):
	2264	""" Pass additional data in a URL for internal use. """
	2265
	2266	url, idata = unsmuggle_url(url, {})
	2267	data.update(idata)
	2268	sdata = urllib.parse.urlencode(
	2269	{'__youtubedl_smuggle': json.dumps(data)})
	2270	return url + '#' + sdata
	2271
	2272
	2273	def unsmuggle_url(smug_url, default=None):
	2274	if '#__youtubedl_smuggle' not in smug_url:
	2275	return smug_url, default
	2276	url, _, sdata = smug_url.rpartition('#')
	2277	jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
	2278	data = json.loads(jsond)
	2279	return url, data
	2280
	2281
	2282	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	2283	""" Formats numbers with decimal sufixes like K, M, etc """
	2284	num, factor = float_or_none(num), float(factor)
	2285	if num is None or num < 0:
	2286	return None
	2287	POSSIBLE_SUFFIXES = 'kMGTPEZY'
	2288	exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
	2289	suffix = ['', *POSSIBLE_SUFFIXES][exponent]
	2290	if factor == 1024:
	2291	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	2292	converted = num / (factor ** exponent)
	2293	return fmt % (converted, suffix)
	2294
	2295
	2296	def format_bytes(bytes):
	2297	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	2298
	2299
	2300	def lookup_unit_table(unit_table, s, strict=False):
	2301	num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
	2302	units_re = '\|'.join(re.escape(u) for u in unit_table)
	2303	m = (re.fullmatch if strict else re.match)(
	2304	rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
	2305	if not m:
	2306	return None
	2307
	2308	num = float(m.group('num').replace(',', '.'))
	2309	mult = unit_table[m.group('unit')]
	2310	return round(num * mult)
	2311
	2312
	2313	def parse_bytes(s):
	2314	"""Parse a string indicating a byte quantity into an integer"""
	2315	return lookup_unit_table(
	2316	{u: 1024*i for i, u in enumerate(['', 'KMGTPEZY'])},
	2317	s.upper(), strict=True)
	2318
	2319
	2320	def parse_filesize(s):
	2321	if s is None:
	2322	return None
	2323
	2324	# The lower-case forms are of course incorrect and unofficial,
	2325	# but we support those too
	2326	_UNIT_TABLE = {
	2327	'B': 1,
	2328	'b': 1,
	2329	'bytes': 1,
	2330	'KiB': 1024,
	2331	'KB': 1000,
	2332	'kB': 1024,
	2333	'Kb': 1000,
	2334	'kb': 1000,
	2335	'kilobytes': 1000,
	2336	'kibibytes': 1024,
	2337	'MiB': 1024 ** 2,
	2338	'MB': 1000 ** 2,
	2339	'mB': 1024 ** 2,
	2340	'Mb': 1000 ** 2,
	2341	'mb': 1000 ** 2,
	2342	'megabytes': 1000 ** 2,
	2343	'mebibytes': 1024 ** 2,
	2344	'GiB': 1024 ** 3,
	2345	'GB': 1000 ** 3,
	2346	'gB': 1024 ** 3,
	2347	'Gb': 1000 ** 3,
	2348	'gb': 1000 ** 3,
	2349	'gigabytes': 1000 ** 3,
	2350	'gibibytes': 1024 ** 3,
	2351	'TiB': 1024 ** 4,
	2352	'TB': 1000 ** 4,
	2353	'tB': 1024 ** 4,
	2354	'Tb': 1000 ** 4,
	2355	'tb': 1000 ** 4,
	2356	'terabytes': 1000 ** 4,
	2357	'tebibytes': 1024 ** 4,
	2358	'PiB': 1024 ** 5,
	2359	'PB': 1000 ** 5,
	2360	'pB': 1024 ** 5,
	2361	'Pb': 1000 ** 5,
	2362	'pb': 1000 ** 5,
	2363	'petabytes': 1000 ** 5,
	2364	'pebibytes': 1024 ** 5,
	2365	'EiB': 1024 ** 6,
	2366	'EB': 1000 ** 6,
	2367	'eB': 1024 ** 6,
	2368	'Eb': 1000 ** 6,
	2369	'eb': 1000 ** 6,
	2370	'exabytes': 1000 ** 6,
	2371	'exbibytes': 1024 ** 6,
	2372	'ZiB': 1024 ** 7,
	2373	'ZB': 1000 ** 7,
	2374	'zB': 1024 ** 7,
	2375	'Zb': 1000 ** 7,
	2376	'zb': 1000 ** 7,
	2377	'zettabytes': 1000 ** 7,
	2378	'zebibytes': 1024 ** 7,
	2379	'YiB': 1024 ** 8,
	2380	'YB': 1000 ** 8,
	2381	'yB': 1024 ** 8,
	2382	'Yb': 1000 ** 8,
	2383	'yb': 1000 ** 8,
	2384	'yottabytes': 1000 ** 8,
	2385	'yobibytes': 1024 ** 8,
	2386	}
	2387
	2388	return lookup_unit_table(_UNIT_TABLE, s)
	2389
	2390
	2391	def parse_count(s):
	2392	if s is None:
	2393	return None
	2394
	2395	s = re.sub(r'^[^\d]+\s', '', s).strip()
	2396
	2397	if re.match(r'^[\d,.]+$', s):
	2398	return str_to_int(s)
	2399
	2400	_UNIT_TABLE = {
	2401	'k': 1000,
	2402	'K': 1000,
	2403	'm': 1000 ** 2,
	2404	'M': 1000 ** 2,
	2405	'kk': 1000 ** 2,
	2406	'KK': 1000 ** 2,
	2407	'b': 1000 ** 3,
	2408	'B': 1000 ** 3,
	2409	}
	2410
	2411	ret = lookup_unit_table(_UNIT_TABLE, s)
	2412	if ret is not None:
	2413	return ret
	2414
	2415	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	2416	if mobj:
	2417	return str_to_int(mobj.group(1))
	2418
	2419
	2420	def parse_resolution(s, *, lenient=False):
	2421	if s is None:
	2422	return {}
	2423
	2424	if lenient:
	2425	mobj = re.search(r'(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)', s)
	2426	else:
	2427	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	2428	if mobj:
	2429	return {
	2430	'width': int(mobj.group('w')),
	2431	'height': int(mobj.group('h')),
	2432	}
	2433
	2434	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	2435	if mobj:
	2436	return {'height': int(mobj.group(1))}
	2437
	2438	mobj = re.search(r'\b([48])[kK]\b', s)
	2439	if mobj:
	2440	return {'height': int(mobj.group(1)) * 540}
	2441
	2442	return {}
	2443
	2444
	2445	def parse_bitrate(s):
	2446	if not isinstance(s, str):
	2447	return
	2448	mobj = re.search(r'\b(\d+)\s*kbps', s)
	2449	if mobj:
	2450	return int(mobj.group(1))
	2451
	2452
	2453	def month_by_name(name, lang='en'):
	2454	""" Return the number of a month by (locale-independently) English name """
	2455
	2456	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	2457
	2458	try:
	2459	return month_names.index(name) + 1
	2460	except ValueError:
	2461	return None
	2462
	2463
	2464	def month_by_abbreviation(abbrev):
	2465	""" Return the number of a month by (locale-independently) English
	2466	abbreviations """
	2467
	2468	try:
	2469	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	2470	except ValueError:
	2471	return None
	2472
	2473
	2474	def fix_xml_ampersands(xml_str):
	2475	"""Replace all the '&' by '&' in XML"""
	2476	return re.sub(
	2477	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	2478	'&',
	2479	xml_str)
	2480
	2481
	2482	def setproctitle(title):
	2483	assert isinstance(title, str)
	2484
	2485	# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
	2486	try:
	2487	import ctypes
	2488	except ImportError:
	2489	return
	2490
	2491	try:
	2492	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	2493	except OSError:
	2494	return
	2495	except TypeError:
	2496	# LoadLibrary in Windows Python 2.7.13 only expects
	2497	# a bytestring, but since unicode_literals turns
	2498	# every string into a unicode string, it fails.
	2499	return
	2500	title_bytes = title.encode()
	2501	buf = ctypes.create_string_buffer(len(title_bytes))
	2502	buf.value = title_bytes
	2503	try:
	2504	libc.prctl(15, buf, 0, 0, 0)
	2505	except AttributeError:
	2506	return # Strange libc, just skip this
	2507
	2508
	2509	def remove_start(s, start):
	2510	return s[len(start):] if s is not None and s.startswith(start) else s
	2511
	2512
	2513	def remove_end(s, end):
	2514	return s[:-len(end)] if s is not None and s.endswith(end) else s
	2515
	2516
	2517	def remove_quotes(s):
	2518	if s is None or len(s) < 2:
	2519	return s
	2520	for quote in ('"', "'", ):
	2521	if s[0] == quote and s[-1] == quote:
	2522	return s[1:-1]
	2523	return s
	2524
	2525
	2526	def get_domain(url):
	2527	"""
	2528	This implementation is inconsistent, but is kept for compatibility.
	2529	Use this only for "webpage_url_domain"
	2530	"""
	2531	return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
	2532
	2533
	2534	def url_basename(url):
	2535	path = urllib.parse.urlparse(url).path
	2536	return path.strip('/').split('/')[-1]
	2537
	2538
	2539	def base_url(url):
	2540	return re.match(r'https?://[^?#]+/', url).group()
	2541
	2542
	2543	def urljoin(base, path):
	2544	if isinstance(path, bytes):
	2545	path = path.decode()
	2546	if not isinstance(path, str) or not path:
	2547	return None
	2548	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	2549	return path
	2550	if isinstance(base, bytes):
	2551	base = base.decode()
	2552	if not isinstance(base, str) or not re.match(
	2553	r'^(?:https?:)?//', base):
	2554	return None
	2555	return urllib.parse.urljoin(base, path)
	2556
	2557
	2558	class HEADRequest(urllib.request.Request):
	2559	def get_method(self):
	2560	return 'HEAD'
	2561
	2562
	2563	class PUTRequest(urllib.request.Request):
	2564	def get_method(self):
	2565	return 'PUT'
	2566
	2567
	2568	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	2569	if get_attr and v is not None:
	2570	v = getattr(v, get_attr, None)
	2571	try:
	2572	return int(v) * invscale // scale
	2573	except (ValueError, TypeError, OverflowError):
	2574	return default
	2575
	2576
	2577	def str_or_none(v, default=None):
	2578	return default if v is None else str(v)
	2579
	2580
	2581	def str_to_int(int_str):
	2582	""" A more relaxed version of int_or_none """
	2583	if isinstance(int_str, int):
	2584	return int_str
	2585	elif isinstance(int_str, str):
	2586	int_str = re.sub(r'[,\.\+]', '', int_str)
	2587	return int_or_none(int_str)
	2588
	2589
	2590	def float_or_none(v, scale=1, invscale=1, default=None):
	2591	if v is None:
	2592	return default
	2593	try:
	2594	return float(v) * invscale / scale
	2595	except (ValueError, TypeError):
	2596	return default
	2597
	2598
	2599	def bool_or_none(v, default=None):
	2600	return v if isinstance(v, bool) else default
	2601
	2602
	2603	def strip_or_none(v, default=None):
	2604	return v.strip() if isinstance(v, str) else default
	2605
	2606
	2607	def url_or_none(url):
	2608	if not url or not isinstance(url, str):
	2609	return None
	2610	url = url.strip()
	2611	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2612
	2613
	2614	def request_to_url(req):
	2615	if isinstance(req, urllib.request.Request):
	2616	return req.get_full_url()
	2617	else:
	2618	return req
	2619
	2620
	2621	def strftime_or_none(timestamp, date_format, default=None):
	2622	datetime_object = None
	2623	try:
	2624	if isinstance(timestamp, (int, float)): # unix timestamp
	2625	# Using naive datetime here can break timestamp() in Windows
	2626	# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
	2627	datetime_object = datetime.datetime.fromtimestamp(timestamp, datetime.timezone.utc)
	2628	elif isinstance(timestamp, str): # assume YYYYMMDD
	2629	datetime_object = datetime.datetime.strptime(timestamp, '%Y%m%d')
	2630	date_format = re.sub( # Support %s on windows
	2631	r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
	2632	return datetime_object.strftime(date_format)
	2633	except (ValueError, TypeError, AttributeError):
	2634	return default
	2635
	2636
	2637	def parse_duration(s):
	2638	if not isinstance(s, str):
	2639	return None
	2640	s = s.strip()
	2641	if not s:
	2642	return None
	2643
	2644	days, hours, mins, secs, ms = [None] * 5
	2645	m = re.match(r'''(?x)
	2646	(?P<before_secs>
	2647	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2648	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2649	(?P<ms>[.:][0-9]+)?Z?$
	2650	''', s)
	2651	if m:
	2652	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2653	else:
	2654	m = re.match(
	2655	r'''(?ix)(?:P?
	2656	(?:
	2657	[0-9]+\sy(?:ears?)?,?\s
	2658	)?
	2659	(?:
	2660	[0-9]+\sm(?:onths?)?,?\s
	2661	)?
	2662	(?:
	2663	[0-9]+\sw(?:eeks?)?,?\s
	2664	)?
	2665	(?:
	2666	(?P<days>[0-9]+)\sd(?:ays?)?,?\s
	2667	)?
	2668	T)?
	2669	(?:
	2670	(?P<hours>[0-9]+)\sh(?:ours?)?,?\s
	2671	)?
	2672	(?:
	2673	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?,?\s
	2674	)?
	2675	(?:
	2676	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2677	)?Z?$''', s)
	2678	if m:
	2679	days, hours, mins, secs, ms = m.groups()
	2680	else:
	2681	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2682	if m:
	2683	hours, mins = m.groups()
	2684	else:
	2685	return None
	2686
	2687	if ms:
	2688	ms = ms.replace(':', '.')
	2689	return sum(float(part or 0) * mult for part, mult in (
	2690	(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
	2691
	2692
	2693	def prepend_extension(filename, ext, expected_real_ext=None):
	2694	name, real_ext = os.path.splitext(filename)
	2695	return (
	2696	f'{name}.{ext}{real_ext}'
	2697	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2698	else f'{filename}.{ext}')
	2699
	2700
	2701	def replace_extension(filename, ext, expected_real_ext=None):
	2702	name, real_ext = os.path.splitext(filename)
	2703	return '{}.{}'.format(
	2704	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2705	ext)
	2706
	2707
	2708	def check_executable(exe, args=[]):
	2709	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2710	args can be a list of arguments for a short output (like -version) """
	2711	try:
	2712	Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	2713	except OSError:
	2714	return False
	2715	return exe
	2716
	2717
	2718	def _get_exe_version_output(exe, args):
	2719	try:
	2720	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2721	# SIGTTOU if yt-dlp is run in the background.
	2722	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2723	stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
	2724	stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
	2725	if ret:
	2726	return None
	2727	except OSError:
	2728	return False
	2729	return stdout
	2730
	2731
	2732	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2733	assert isinstance(output, str)
	2734	if version_re is None:
	2735	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2736	m = re.search(version_re, output)
	2737	if m:
	2738	return m.group(1)
	2739	else:
	2740	return unrecognized
	2741
	2742
	2743	def get_exe_version(exe, args=['--version'],
	2744	version_re=None, unrecognized=('present', 'broken')):
	2745	""" Returns the version of the specified executable,
	2746	or False if the executable is not present """
	2747	unrecognized = variadic(unrecognized)
	2748	assert len(unrecognized) in (1, 2)
	2749	out = _get_exe_version_output(exe, args)
	2750	if out is None:
	2751	return unrecognized[-1]
	2752	return out and detect_exe_version(out, version_re, unrecognized[0])
	2753
	2754
	2755	def frange(start=0, stop=None, step=1):
	2756	"""Float range"""
	2757	if stop is None:
	2758	start, stop = 0, start
	2759	sign = [-1, 1][step > 0] if step else 0
	2760	while sign * start < sign * stop:
	2761	yield start
	2762	start += step
	2763
	2764
	2765	class LazyList(collections.abc.Sequence):
	2766	"""Lazy immutable list from an iterable
	2767	Note that slices of a LazyList are lists and not LazyList"""
	2768
	2769	class IndexError(IndexError):
	2770	pass
	2771
	2772	def __init__(self, iterable, *, reverse=False, _cache=None):
	2773	self._iterable = iter(iterable)
	2774	self._cache = [] if _cache is None else _cache
	2775	self._reversed = reverse
	2776
	2777	def __iter__(self):
	2778	if self._reversed:
	2779	# We need to consume the entire iterable to iterate in reverse
	2780	yield from self.exhaust()
	2781	return
	2782	yield from self._cache
	2783	for item in self._iterable:
	2784	self._cache.append(item)
	2785	yield item
	2786
	2787	def _exhaust(self):
	2788	self._cache.extend(self._iterable)
	2789	self._iterable = [] # Discard the emptied iterable to make it pickle-able
	2790	return self._cache
	2791
	2792	def exhaust(self):
	2793	"""Evaluate the entire iterable"""
	2794	return self._exhaust()[::-1 if self._reversed else 1]
	2795
	2796	@staticmethod
	2797	def _reverse_index(x):
	2798	return None if x is None else ~x
	2799
	2800	def __getitem__(self, idx):
	2801	if isinstance(idx, slice):
	2802	if self._reversed:
	2803	idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
	2804	start, stop, step = idx.start, idx.stop, idx.step or 1
	2805	elif isinstance(idx, int):
	2806	if self._reversed:
	2807	idx = self._reverse_index(idx)
	2808	start, stop, step = idx, idx, 0
	2809	else:
	2810	raise TypeError('indices must be integers or slices')
	2811	if ((start or 0) < 0 or (stop or 0) < 0
	2812	or (start is None and step < 0)
	2813	or (stop is None and step > 0)):
	2814	# We need to consume the entire iterable to be able to slice from the end
	2815	# Obviously, never use this with infinite iterables
	2816	self._exhaust()
	2817	try:
	2818	return self._cache[idx]
	2819	except IndexError as e:
	2820	raise self.IndexError(e) from e
	2821	n = max(start or 0, stop or 0) - len(self._cache) + 1
	2822	if n > 0:
	2823	self._cache.extend(itertools.islice(self._iterable, n))
	2824	try:
	2825	return self._cache[idx]
	2826	except IndexError as e:
	2827	raise self.IndexError(e) from e
	2828
	2829	def __bool__(self):
	2830	try:
	2831	self[-1] if self._reversed else self[0]
	2832	except self.IndexError:
	2833	return False
	2834	return True
	2835
	2836	def __len__(self):
	2837	self._exhaust()
	2838	return len(self._cache)
	2839
	2840	def __reversed__(self):
	2841	return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
	2842
	2843	def __copy__(self):
	2844	return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
	2845
	2846	def __repr__(self):
	2847	# repr and str should mimic a list. So we exhaust the iterable
	2848	return repr(self.exhaust())
	2849
	2850	def __str__(self):
	2851	return repr(self.exhaust())
	2852
	2853
	2854	class PagedList:
	2855
	2856	class IndexError(IndexError):
	2857	pass
	2858
	2859	def __len__(self):
	2860	# This is only useful for tests
	2861	return len(self.getslice())
	2862
	2863	def __init__(self, pagefunc, pagesize, use_cache=True):
	2864	self._pagefunc = pagefunc
	2865	self._pagesize = pagesize
	2866	self._pagecount = float('inf')
	2867	self._use_cache = use_cache
	2868	self._cache = {}
	2869
	2870	def getpage(self, pagenum):
	2871	page_results = self._cache.get(pagenum)
	2872	if page_results is None:
	2873	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
	2874	if self._use_cache:
	2875	self._cache[pagenum] = page_results
	2876	return page_results
	2877
	2878	def getslice(self, start=0, end=None):
	2879	return list(self._getslice(start, end))
	2880
	2881	def _getslice(self, start, end):
	2882	raise NotImplementedError('This method must be implemented by subclasses')
	2883
	2884	def __getitem__(self, idx):
	2885	assert self._use_cache, 'Indexing PagedList requires cache'
	2886	if not isinstance(idx, int) or idx < 0:
	2887	raise TypeError('indices must be non-negative integers')
	2888	entries = self.getslice(idx, idx + 1)
	2889	if not entries:
	2890	raise self.IndexError()
	2891	return entries[0]
	2892
	2893
	2894	class OnDemandPagedList(PagedList):
	2895	"""Download pages until a page with less than maximum results"""
	2896
	2897	def _getslice(self, start, end):
	2898	for pagenum in itertools.count(start // self._pagesize):
	2899	firstid = pagenum * self._pagesize
	2900	nextfirstid = pagenum * self._pagesize + self._pagesize
	2901	if start >= nextfirstid:
	2902	continue
	2903
	2904	startv = (
	2905	start % self._pagesize
	2906	if firstid <= start < nextfirstid
	2907	else 0)
	2908	endv = (
	2909	((end - 1) % self._pagesize) + 1
	2910	if (end is not None and firstid <= end <= nextfirstid)
	2911	else None)
	2912
	2913	try:
	2914	page_results = self.getpage(pagenum)
	2915	except Exception:
	2916	self._pagecount = pagenum - 1
	2917	raise
	2918	if startv != 0 or endv is not None:
	2919	page_results = page_results[startv:endv]
	2920	yield from page_results
	2921
	2922	# A little optimization - if current page is not "full", ie. does
	2923	# not contain page_size videos then we can assume that this page
	2924	# is the last one - there are no more ids on further pages -
	2925	# i.e. no need to query again.
	2926	if len(page_results) + startv < self._pagesize:
	2927	break
	2928
	2929	# If we got the whole page, but the next page is not interesting,
	2930	# break out early as well
	2931	if end == nextfirstid:
	2932	break
	2933
	2934
	2935	class InAdvancePagedList(PagedList):
	2936	"""PagedList with total number of pages known in advance"""
	2937
	2938	def __init__(self, pagefunc, pagecount, pagesize):
	2939	PagedList.__init__(self, pagefunc, pagesize, True)
	2940	self._pagecount = pagecount
	2941
	2942	def _getslice(self, start, end):
	2943	start_page = start // self._pagesize
	2944	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2945	skip_elems = start - start_page * self._pagesize
	2946	only_more = None if end is None else end - start
	2947	for pagenum in range(start_page, end_page):
	2948	page_results = self.getpage(pagenum)
	2949	if skip_elems:
	2950	page_results = page_results[skip_elems:]
	2951	skip_elems = None
	2952	if only_more is not None:
	2953	if len(page_results) < only_more:
	2954	only_more -= len(page_results)
	2955	else:
	2956	yield from page_results[:only_more]
	2957	break
	2958	yield from page_results
	2959
	2960
	2961	class PlaylistEntries:
	2962	MissingEntry = object()
	2963	is_exhausted = False
	2964
	2965	def __init__(self, ydl, info_dict):
	2966	self.ydl = ydl
	2967
	2968	# _entries must be assigned now since infodict can change during iteration
	2969	entries = info_dict.get('entries')
	2970	if entries is None:
	2971	raise EntryNotInPlaylist('There are no entries')
	2972	elif isinstance(entries, list):
	2973	self.is_exhausted = True
	2974
	2975	requested_entries = info_dict.get('requested_entries')
	2976	self.is_incomplete = requested_entries is not None
	2977	if self.is_incomplete:
	2978	assert self.is_exhausted
	2979	self._entries = [self.MissingEntry] * max(requested_entries or [0])
	2980	for i, entry in zip(requested_entries, entries):
	2981	self._entries[i - 1] = entry
	2982	elif isinstance(entries, (list, PagedList, LazyList)):
	2983	self._entries = entries
	2984	else:
	2985	self._entries = LazyList(entries)
	2986
	2987	PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
	2988	(?P<start>[+-]?\d+)?
	2989	(?P<range>[:-]
	2990	(?P<end>[+-]?\d+\|inf(?:inite)?)?
	2991	(?::(?P<step>[+-]?\d+))?
	2992	)?''')
	2993
	2994	@classmethod
	2995	def parse_playlist_items(cls, string):
	2996	for segment in string.split(','):
	2997	if not segment:
	2998	raise ValueError('There is two or more consecutive commas')
	2999	mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
	3000	if not mobj:
	3001	raise ValueError(f'{segment!r} is not a valid specification')
	3002	start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
	3003	if int_or_none(step) == 0:
	3004	raise ValueError(f'Step in {segment!r} cannot be zero')
	3005	yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
	3006
	3007	def get_requested_items(self):
	3008	playlist_items = self.ydl.params.get('playlist_items')
	3009	playlist_start = self.ydl.params.get('playliststart', 1)
	3010	playlist_end = self.ydl.params.get('playlistend')
	3011	# For backwards compatibility, interpret -1 as whole list
	3012	if playlist_end in (-1, None):
	3013	playlist_end = ''
	3014	if not playlist_items:
	3015	playlist_items = f'{playlist_start}:{playlist_end}'
	3016	elif playlist_start != 1 or playlist_end:
	3017	self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
	3018
	3019	for index in self.parse_playlist_items(playlist_items):
	3020	for i, entry in self[index]:
	3021	yield i, entry
	3022	if not entry:
	3023	continue
	3024	try:
	3025	# TODO: Add auto-generated fields
	3026	self.ydl._match_entry(entry, incomplete=True, silent=True)
	3027	except (ExistingVideoReached, RejectedVideoReached):
	3028	return
	3029
	3030	def get_full_count(self):
	3031	if self.is_exhausted and not self.is_incomplete:
	3032	return len(self)
	3033	elif isinstance(self._entries, InAdvancePagedList):
	3034	if self._entries._pagesize == 1:
	3035	return self._entries._pagecount
	3036
	3037	@functools.cached_property
	3038	def _getter(self):
	3039	if isinstance(self._entries, list):
	3040	def get_entry(i):
	3041	try:
	3042	entry = self._entries[i]
	3043	except IndexError:
	3044	entry = self.MissingEntry
	3045	if not self.is_incomplete:
	3046	raise self.IndexError()
	3047	if entry is self.MissingEntry:
	3048	raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
	3049	return entry
	3050	else:
	3051	def get_entry(i):
	3052	try:
	3053	return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
	3054	except (LazyList.IndexError, PagedList.IndexError):
	3055	raise self.IndexError()
	3056	return get_entry
	3057
	3058	def __getitem__(self, idx):
	3059	if isinstance(idx, int):
	3060	idx = slice(idx, idx)
	3061
	3062	# NB: PlaylistEntries[1:10] => (0, 1, ... 9)
	3063	step = 1 if idx.step is None else idx.step
	3064	if idx.start is None:
	3065	start = 0 if step > 0 else len(self) - 1
	3066	else:
	3067	start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
	3068
	3069	# NB: Do not call len(self) when idx == [:]
	3070	if idx.stop is None:
	3071	stop = 0 if step < 0 else float('inf')
	3072	else:
	3073	stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
	3074	stop += [-1, 1][step > 0]
	3075
	3076	for i in frange(start, stop, step):
	3077	if i < 0:
	3078	continue
	3079	try:
	3080	entry = self._getter(i)
	3081	except self.IndexError:
	3082	self.is_exhausted = True
	3083	if step > 0:
	3084	break
	3085	continue
	3086	yield i + 1, entry
	3087
	3088	def __len__(self):
	3089	return len(tuple(self[:]))
	3090
	3091	class IndexError(IndexError):
	3092	pass
	3093
	3094
	3095	def uppercase_escape(s):
	3096	unicode_escape = codecs.getdecoder('unicode_escape')
	3097	return re.sub(
	3098	r'\\U[0-9a-fA-F]{8}',
	3099	lambda m: unicode_escape(m.group(0))[0],
	3100	s)
	3101
	3102
	3103	def lowercase_escape(s):
	3104	unicode_escape = codecs.getdecoder('unicode_escape')
	3105	return re.sub(
	3106	r'\\u[0-9a-fA-F]{4}',
	3107	lambda m: unicode_escape(m.group(0))[0],
	3108	s)
	3109
	3110
	3111	def escape_rfc3986(s):
	3112	"""Escape non-ASCII characters as suggested by RFC 3986"""
	3113	return urllib.parse.quote(s, b"%/;:@&=+$,!~*'()?#[]")
	3114
	3115
	3116	def escape_url(url):
	3117	"""Escape URL as suggested by RFC 3986"""
	3118	url_parsed = urllib.parse.urlparse(url)
	3119	return url_parsed._replace(
	3120	netloc=url_parsed.netloc.encode('idna').decode('ascii'),
	3121	path=escape_rfc3986(url_parsed.path),
	3122	params=escape_rfc3986(url_parsed.params),
	3123	query=escape_rfc3986(url_parsed.query),
	3124	fragment=escape_rfc3986(url_parsed.fragment)
	3125	).geturl()
	3126
	3127
	3128	def parse_qs(url, **kwargs):
	3129	return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
	3130
	3131
	3132	def read_batch_urls(batch_fd):
	3133	def fixup(url):
	3134	if not isinstance(url, str):
	3135	url = url.decode('utf-8', 'replace')
	3136	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	3137	for bom in BOM_UTF8:
	3138	if url.startswith(bom):
	3139	url = url[len(bom):]
	3140	url = url.lstrip()
	3141	if not url or url.startswith(('#', ';', ']')):
	3142	return False
	3143	# "#" cannot be stripped out since it is part of the URI
	3144	# However, it can be safely stripped out if following a whitespace
	3145	return re.split(r'\s#', url, 1)[0].rstrip()
	3146
	3147	with contextlib.closing(batch_fd) as fd:
	3148	return [url for url in map(fixup, fd) if url]
	3149
	3150
	3151	def urlencode_postdata(args, *kargs):
	3152	return urllib.parse.urlencode(args, *kargs).encode('ascii')
	3153
	3154
	3155	def update_url_query(url, query):
	3156	if not query:
	3157	return url
	3158	parsed_url = urllib.parse.urlparse(url)
	3159	qs = urllib.parse.parse_qs(parsed_url.query)
	3160	qs.update(query)
	3161	return urllib.parse.urlunparse(parsed_url._replace(
	3162	query=urllib.parse.urlencode(qs, True)))
	3163
	3164
	3165	def update_Request(req, url=None, data=None, headers=None, query=None):
	3166	req_headers = req.headers.copy()
	3167	req_headers.update(headers or {})
	3168	req_data = data or req.data
	3169	req_url = update_url_query(url or req.get_full_url(), query)
	3170	req_get_method = req.get_method()
	3171	if req_get_method == 'HEAD':
	3172	req_type = HEADRequest
	3173	elif req_get_method == 'PUT':
	3174	req_type = PUTRequest
	3175	else:
	3176	req_type = urllib.request.Request
	3177	new_req = req_type(
	3178	req_url, data=req_data, headers=req_headers,
	3179	origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
	3180	if hasattr(req, 'timeout'):
	3181	new_req.timeout = req.timeout
	3182	return new_req
	3183
	3184
	3185	def _multipart_encode_impl(data, boundary):
	3186	content_type = 'multipart/form-data; boundary=%s' % boundary
	3187
	3188	out = b''
	3189	for k, v in data.items():
	3190	out += b'--' + boundary.encode('ascii') + b'\r\n'
	3191	if isinstance(k, str):
	3192	k = k.encode()
	3193	if isinstance(v, str):
	3194	v = v.encode()
	3195	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	3196	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	3197	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	3198	if boundary.encode('ascii') in content:
	3199	raise ValueError('Boundary overlaps with data')
	3200	out += content
	3201
	3202	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	3203
	3204	return out, content_type
	3205
	3206
	3207	def multipart_encode(data, boundary=None):
	3208	'''
	3209	Encode a dict to RFC 7578-compliant form-data
	3210
	3211	data:
	3212	A dict where keys and values can be either Unicode or bytes-like
	3213	objects.
	3214	boundary:
	3215	If specified a Unicode object, it's used as the boundary. Otherwise
	3216	a random boundary is generated.
	3217
	3218	Reference: https://tools.ietf.org/html/rfc7578
	3219	'''
	3220	has_specified_boundary = boundary is not None
	3221
	3222	while True:
	3223	if boundary is None:
	3224	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	3225
	3226	try:
	3227	out, content_type = _multipart_encode_impl(data, boundary)
	3228	break
	3229	except ValueError:
	3230	if has_specified_boundary:
	3231	raise
	3232	boundary = None
	3233
	3234	return out, content_type
	3235
	3236
	3237	def variadic(x, allowed_types=(str, bytes, dict)):
	3238	return x if isinstance(x, collections.abc.Iterable) and not isinstance(x, allowed_types) else (x,)
	3239
	3240
	3241	def dict_get(d, key_or_keys, default=None, skip_false_values=True):
	3242	for val in map(d.get, variadic(key_or_keys)):
	3243	if val is not None and (val or not skip_false_values):
	3244	return val
	3245	return default
	3246
	3247
	3248	def try_call(*funcs, expected_type=None, args=[], kwargs={}):
	3249	for f in funcs:
	3250	try:
	3251	val = f(args, *kwargs)
	3252	except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
	3253	pass
	3254	else:
	3255	if expected_type is None or isinstance(val, expected_type):
	3256	return val
	3257
	3258
	3259	def try_get(src, getter, expected_type=None):
	3260	return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
	3261
	3262
	3263	def filter_dict(dct, cndn=lambda _, v: v is not None):
	3264	return {k: v for k, v in dct.items() if cndn(k, v)}
	3265
	3266
	3267	def merge_dicts(*dicts):
	3268	merged = {}
	3269	for a_dict in dicts:
	3270	for k, v in a_dict.items():
	3271	if (v is not None and k not in merged
	3272	or isinstance(v, str) and merged[k] == ''):
	3273	merged[k] = v
	3274	return merged
	3275
	3276
	3277	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	3278	return string if isinstance(string, str) else str(string, encoding, errors)
	3279
	3280
	3281	US_RATINGS = {
	3282	'G': 0,
	3283	'PG': 10,
	3284	'PG-13': 13,
	3285	'R': 16,
	3286	'NC': 18,
	3287	}
	3288
	3289
	3290	TV_PARENTAL_GUIDELINES = {
	3291	'TV-Y': 0,
	3292	'TV-Y7': 7,
	3293	'TV-G': 0,
	3294	'TV-PG': 0,
	3295	'TV-14': 14,
	3296	'TV-MA': 17,
	3297	}
	3298
	3299
	3300	def parse_age_limit(s):
	3301	# isinstance(False, int) is True. So type() must be used instead
	3302	if type(s) is int: # noqa: E721
	3303	return s if 0 <= s <= 21 else None
	3304	elif not isinstance(s, str):
	3305	return None
	3306	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	3307	if m:
	3308	return int(m.group('age'))
	3309	s = s.upper()
	3310	if s in US_RATINGS:
	3311	return US_RATINGS[s]
	3312	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	3313	if m:
	3314	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	3315	return None
	3316
	3317
	3318	def strip_jsonp(code):
	3319	return re.sub(
	3320	r'''(?sx)^
	3321	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	3322	(?:\s&&\s(?P=func_name))?
	3323	\s$\s(?P<callback_data>.*)$;?
	3324	\s?(?://[^\n])*$''',
	3325	r'\g<callback_data>', code)
	3326
	3327
	3328	def js_to_json(code, vars={}, *, strict=False):
	3329	# vars is a dict of var, val pairs to substitute
	3330	STRING_QUOTES = '\'"'
	3331	STRING_RE = '\|'.join(rf'{q}(?:\\.\|[^\\{q}])*{q}' for q in STRING_QUOTES)
	3332	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	3333	SKIP_RE = fr'\s(?:{COMMENT_RE})?\s'
	3334	INTEGER_TABLE = (
	3335	(fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
	3336	(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
	3337	)
	3338
	3339	def process_escape(match):
	3340	JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
	3341	escape = match.group(1) or match.group(2)
	3342
	3343	return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
	3344	else R'\u00' if escape == 'x'
	3345	else '' if escape == '\n'
	3346	else escape)
	3347
	3348	def fix_kv(m):
	3349	v = m.group(0)
	3350	if v in ('true', 'false', 'null'):
	3351	return v
	3352	elif v in ('undefined', 'void 0'):
	3353	return 'null'
	3354	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	3355	return ''
	3356
	3357	if v[0] in STRING_QUOTES:
	3358	escaped = re.sub(r'(?s)(")\|\\(.)', process_escape, v[1:-1])
	3359	return f'"{escaped}"'
	3360
	3361	for regex, base in INTEGER_TABLE:
	3362	im = re.match(regex, v)
	3363	if im:
	3364	i = int(im.group(1), base)
	3365	return f'"{i}":' if v.endswith(':') else str(i)
	3366
	3367	if v in vars:
	3368	try:
	3369	if not strict:
	3370	json.loads(vars[v])
	3371	except json.JSONDecodeError:
	3372	return json.dumps(vars[v])
	3373	else:
	3374	return vars[v]
	3375
	3376	if not strict:
	3377	return f'"{v}"'
	3378
	3379	raise ValueError(f'Unknown value: {v}')
	3380
	3381	def create_map(mobj):
	3382	return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
	3383
	3384	code = re.sub(r'new Map$(\[.*?\])?$', create_map, code)
	3385	if not strict:
	3386	code = re.sub(r'new Date$(".+")$', r'\g<1>', code)
	3387	code = re.sub(r'new \w+$(.*?)$', lambda m: json.dumps(m.group(0)), code)
	3388	code = re.sub(r'parseInt$[^\d]+(\d+)[^\d]+$', r'\1', code)
	3389	code = re.sub(r'$function\([^)]$\s\{[^}]\}\s\)\s$\s(["\'][^)]["\'])\s$', r'\1', code)
	3390
	3391	return re.sub(rf'''(?sx)
	3392	{STRING_RE}\|
	3393	{COMMENT_RE}\|,(?={SKIP_RE}[\]}}])\|
	3394	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	3395	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{SKIP_RE}:)?\|
	3396	[0-9]+(?={SKIP_RE}:)\|
	3397	!+
	3398	''', fix_kv, code)
	3399
	3400
	3401	def qualities(quality_ids):
	3402	""" Get a numeric quality value out of a list of possible values """
	3403	def q(qid):
	3404	try:
	3405	return quality_ids.index(qid)
	3406	except ValueError:
	3407	return -1
	3408	return q
	3409
	3410
	3411	POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
	3412
	3413
	3414	DEFAULT_OUTTMPL = {
	3415	'default': '%(title)s [%(id)s].%(ext)s',
	3416	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	3417	}
	3418	OUTTMPL_TYPES = {
	3419	'chapter': None,
	3420	'subtitle': None,
	3421	'thumbnail': None,
	3422	'description': 'description',
	3423	'annotation': 'annotations.xml',
	3424	'infojson': 'info.json',
	3425	'link': None,
	3426	'pl_video': None,
	3427	'pl_thumbnail': None,
	3428	'pl_description': 'description',
	3429	'pl_infojson': 'info.json',
	3430	}
	3431
	3432	# As of [1] format syntax is:
	3433	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	3434	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	3435	STR_FORMAT_RE_TMPL = r'''(?x)
	3436	(?<!%)(?P<prefix>(?:%%)*)
	3437	%
	3438	(?P<has_key>$(?P<key>{0})$)?
	3439	(?P<format>
	3440	(?P<conversion>[#0\-+ ]+)?
	3441	(?P<min_width>\d+)?
	3442	(?P<precision>\.\d+)?
	3443	(?P<len_mod>[hlL])? # unused in python
	3444	{1} # conversion type
	3445	)
	3446	'''
	3447
	3448
	3449	STR_FORMAT_TYPES = 'diouxXeEfFgGcrs'
	3450
	3451
	3452	def limit_length(s, length):
	3453	""" Add ellipses to overly long strings """
	3454	if s is None:
	3455	return None
	3456	ELLIPSES = '...'
	3457	if len(s) > length:
	3458	return s[:length - len(ELLIPSES)] + ELLIPSES
	3459	return s
	3460
	3461
	3462	def version_tuple(v):
	3463	return tuple(int(e) for e in re.split(r'[-.]', v))
	3464
	3465
	3466	def is_outdated_version(version, limit, assume_new=True):
	3467	if not version:
	3468	return not assume_new
	3469	try:
	3470	return version_tuple(version) < version_tuple(limit)
	3471	except ValueError:
	3472	return not assume_new
	3473
	3474
	3475	def ytdl_is_updateable():
	3476	""" Returns if yt-dlp can be updated with -U """
	3477
	3478	from .update import is_non_updateable
	3479
	3480	return not is_non_updateable()
	3481
	3482
	3483	def args_to_str(args):
	3484	# Get a short string representation for a subprocess command
	3485	return ' '.join(compat_shlex_quote(a) for a in args)
	3486
	3487
	3488	def error_to_compat_str(err):
	3489	return str(err)
	3490
	3491
	3492	def error_to_str(err):
	3493	return f'{type(err).__name__}: {err}'
	3494
	3495
	3496	def mimetype2ext(mt, default=NO_DEFAULT):
	3497	if not isinstance(mt, str):
	3498	if default is not NO_DEFAULT:
	3499	return default
	3500	return None
	3501
	3502	MAP = {
	3503	# video
	3504	'3gpp': '3gp',
	3505	'mp2t': 'ts',
	3506	'mp4': 'mp4',
	3507	'mpeg': 'mpeg',
	3508	'mpegurl': 'm3u8',
	3509	'quicktime': 'mov',
	3510	'webm': 'webm',
	3511	'vp9': 'vp9',
	3512	'x-flv': 'flv',
	3513	'x-m4v': 'm4v',
	3514	'x-matroska': 'mkv',
	3515	'x-mng': 'mng',
	3516	'x-mp4-fragmented': 'mp4',
	3517	'x-ms-asf': 'asf',
	3518	'x-ms-wmv': 'wmv',
	3519	'x-msvideo': 'avi',
	3520
	3521	# application (streaming playlists)
	3522	'dash+xml': 'mpd',
	3523	'f4m+xml': 'f4m',
	3524	'hds+xml': 'f4m',
	3525	'vnd.apple.mpegurl': 'm3u8',
	3526	'vnd.ms-sstr+xml': 'ism',
	3527	'x-mpegurl': 'm3u8',
	3528
	3529	# audio
	3530	'audio/mp4': 'm4a',
	3531	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
	3532	# Using .mp3 as it's the most popular one
	3533	'audio/mpeg': 'mp3',
	3534	'audio/webm': 'webm',
	3535	'audio/x-matroska': 'mka',
	3536	'audio/x-mpegurl': 'm3u',
	3537	'midi': 'mid',
	3538	'ogg': 'ogg',
	3539	'wav': 'wav',
	3540	'wave': 'wav',
	3541	'x-aac': 'aac',
	3542	'x-flac': 'flac',
	3543	'x-m4a': 'm4a',
	3544	'x-realaudio': 'ra',
	3545	'x-wav': 'wav',
	3546
	3547	# image
	3548	'avif': 'avif',
	3549	'bmp': 'bmp',
	3550	'gif': 'gif',
	3551	'jpeg': 'jpg',
	3552	'png': 'png',
	3553	'svg+xml': 'svg',
	3554	'tiff': 'tif',
	3555	'vnd.wap.wbmp': 'wbmp',
	3556	'webp': 'webp',
	3557	'x-icon': 'ico',
	3558	'x-jng': 'jng',
	3559	'x-ms-bmp': 'bmp',
	3560
	3561	# caption
	3562	'filmstrip+json': 'fs',
	3563	'smptett+xml': 'tt',
	3564	'ttaf+xml': 'dfxp',
	3565	'ttml+xml': 'ttml',
	3566	'x-ms-sami': 'sami',
	3567
	3568	# misc
	3569	'gzip': 'gz',
	3570	'json': 'json',
	3571	'xml': 'xml',
	3572	'zip': 'zip',
	3573	}
	3574
	3575	mimetype = mt.partition(';')[0].strip().lower()
	3576	_, _, subtype = mimetype.rpartition('/')
	3577
	3578	ext = traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
	3579	if ext:
	3580	return ext
	3581	elif default is not NO_DEFAULT:
	3582	return default
	3583	return subtype.replace('+', '.')
	3584
	3585
	3586	def ext2mimetype(ext_or_url):
	3587	if not ext_or_url:
	3588	return None
	3589	if '.' not in ext_or_url:
	3590	ext_or_url = f'file.{ext_or_url}'
	3591	return mimetypes.guess_type(ext_or_url)[0]
	3592
	3593
	3594	def parse_codecs(codecs_str):
	3595	# http://tools.ietf.org/html/rfc6381
	3596	if not codecs_str:
	3597	return {}
	3598	split_codecs = list(filter(None, map(
	3599	str.strip, codecs_str.strip().strip(',').split(','))))
	3600	vcodec, acodec, scodec, hdr = None, None, None, None
	3601	for full_codec in split_codecs:
	3602	parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
	3603	if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	3604	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	3605	if vcodec:
	3606	continue
	3607	vcodec = full_codec
	3608	if parts[0] in ('dvh1', 'dvhe'):
	3609	hdr = 'DV'
	3610	elif parts[0] == 'av1' and traverse_obj(parts, 3) == '10':
	3611	hdr = 'HDR10'
	3612	elif parts[:2] == ['vp9', '2']:
	3613	hdr = 'HDR10'
	3614	elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
	3615	'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3616	acodec = acodec or full_codec
	3617	elif parts[0] in ('stpp', 'wvtt'):
	3618	scodec = scodec or full_codec
	3619	else:
	3620	write_string(f'WARNING: Unknown codec {full_codec}\n')
	3621	if vcodec or acodec or scodec:
	3622	return {
	3623	'vcodec': vcodec or 'none',
	3624	'acodec': acodec or 'none',
	3625	'dynamic_range': hdr,
	3626	**({'scodec': scodec} if scodec is not None else {}),
	3627	}
	3628	elif len(split_codecs) == 2:
	3629	return {
	3630	'vcodec': split_codecs[0],
	3631	'acodec': split_codecs[1],
	3632	}
	3633	return {}
	3634
	3635
	3636	def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
	3637	assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
	3638
	3639	allow_mkv = not preferences or 'mkv' in preferences
	3640
	3641	if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
	3642	return 'mkv' # TODO: any other format allows this?
	3643
	3644	# TODO: All codecs supported by parse_codecs isn't handled here
	3645	COMPATIBLE_CODECS = {
	3646	'mp4': {
	3647	'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
	3648	'h264', 'aacl', 'ec-3', # Set in ISM
	3649	},
	3650	'webm': {
	3651	'av1', 'vp9', 'vp8', 'opus', 'vrbs',
	3652	'vp9x', 'vp8x', # in the webm spec
	3653	},
	3654	}
	3655
	3656	sanitize_codec = functools.partial(try_get, getter=lambda x: x[0].split('.')[0].replace('0', ''))
	3657	vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
	3658
	3659	for ext in preferences or COMPATIBLE_CODECS.keys():
	3660	codec_set = COMPATIBLE_CODECS.get(ext, set())
	3661	if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
	3662	return ext
	3663
	3664	COMPATIBLE_EXTS = (
	3665	{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
	3666	{'webm', 'weba'},
	3667	)
	3668	for ext in preferences or vexts:
	3669	current_exts = {ext, vexts, aexts}
	3670	if ext == 'mkv' or current_exts == {ext} or any(
	3671	ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
	3672	return ext
	3673	return 'mkv' if allow_mkv else preferences[-1]
	3674
	3675
	3676	def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
	3677	getheader = url_handle.headers.get
	3678
	3679	cd = getheader('Content-Disposition')
	3680	if cd:
	3681	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3682	if m:
	3683	e = determine_ext(m.group('filename'), default_ext=None)
	3684	if e:
	3685	return e
	3686
	3687	meta_ext = getheader('x-amz-meta-name')
	3688	if meta_ext:
	3689	e = meta_ext.rpartition('.')[2]
	3690	if e:
	3691	return e
	3692
	3693	return mimetype2ext(getheader('Content-Type'), default=default)
	3694
	3695
	3696	def encode_data_uri(data, mime_type):
	3697	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3698
	3699
	3700	def age_restricted(content_limit, age_limit):
	3701	""" Returns True iff the content should be blocked """
	3702
	3703	if age_limit is None: # No limit set
	3704	return False
	3705	if content_limit is None:
	3706	return False # Content available for everyone
	3707	return age_limit < content_limit
	3708
	3709
	3710	# List of known byte-order-marks (BOM)
	3711	BOMS = [
	3712	(b'\xef\xbb\xbf', 'utf-8'),
	3713	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3714	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3715	(b'\xff\xfe', 'utf-16-le'),
	3716	(b'\xfe\xff', 'utf-16-be'),
	3717	]
	3718
	3719
	3720	def is_html(first_bytes):
	3721	""" Detect whether a file contains HTML by examining its first bytes. """
	3722
	3723	encoding = 'utf-8'
	3724	for bom, enc in BOMS:
	3725	while first_bytes.startswith(bom):
	3726	encoding, first_bytes = enc, first_bytes[len(bom):]
	3727
	3728	return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
	3729
	3730
	3731	def determine_protocol(info_dict):
	3732	protocol = info_dict.get('protocol')
	3733	if protocol is not None:
	3734	return protocol
	3735
	3736	url = sanitize_url(info_dict['url'])
	3737	if url.startswith('rtmp'):
	3738	return 'rtmp'
	3739	elif url.startswith('mms'):
	3740	return 'mms'
	3741	elif url.startswith('rtsp'):
	3742	return 'rtsp'
	3743
	3744	ext = determine_ext(url)
	3745	if ext == 'm3u8':
	3746	return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
	3747	elif ext == 'f4m':
	3748	return 'f4m'
	3749
	3750	return urllib.parse.urlparse(url).scheme
	3751
	3752
	3753	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3754	""" Render a list of rows, each as a list of values.
	3755	Text after a \t will be right aligned """
	3756	def width(string):
	3757	return len(remove_terminal_sequences(string).replace('\t', ''))
	3758
	3759	def get_max_lens(table):
	3760	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3761
	3762	def filter_using_list(row, filterArray):
	3763	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3764
	3765	max_lens = get_max_lens(data) if hide_empty else []
	3766	header_row = filter_using_list(header_row, max_lens)
	3767	data = [filter_using_list(row, max_lens) for row in data]
	3768
	3769	table = [header_row] + data
	3770	max_lens = get_max_lens(table)
	3771	extra_gap += 1
	3772	if delim:
	3773	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3774	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
	3775	for row in table:
	3776	for pos, text in enumerate(map(str, row)):
	3777	if '\t' in text:
	3778	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3779	else:
	3780	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3781	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3782	return ret
	3783
	3784
	3785	def _match_one(filter_part, dct, incomplete):
	3786	# TODO: Generalize code with YoutubeDL._build_format_filter
	3787	STRING_OPERATORS = {
	3788	'*=': operator.contains,
	3789	'^=': lambda attr, value: attr.startswith(value),
	3790	'$=': lambda attr, value: attr.endswith(value),
	3791	'~=': lambda attr, value: re.search(value, attr),
	3792	}
	3793	COMPARISON_OPERATORS = {
	3794	**STRING_OPERATORS,
	3795	'<=': operator.le, # "<=" must be defined above "<"
	3796	'<': operator.lt,
	3797	'>=': operator.ge,
	3798	'>': operator.gt,
	3799	'=': operator.eq,
	3800	}
	3801
	3802	if isinstance(incomplete, bool):
	3803	is_incomplete = lambda _: incomplete
	3804	else:
	3805	is_incomplete = lambda k: k in incomplete
	3806
	3807	operator_rex = re.compile(r'''(?x)
	3808	(?P<key>[a-z_]+)
	3809	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3810	(?:
	3811	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3812	(?P<strval>.+?)
	3813	)
	3814	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3815	m = operator_rex.fullmatch(filter_part.strip())
	3816	if m:
	3817	m = m.groupdict()
	3818	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3819	if m['negation']:
	3820	op = lambda attr, value: not unnegated_op(attr, value)
	3821	else:
	3822	op = unnegated_op
	3823	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3824	if m['quote']:
	3825	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3826	actual_value = dct.get(m['key'])
	3827	numeric_comparison = None
	3828	if isinstance(actual_value, (int, float)):
	3829	# If the original field is a string and matching comparisonvalue is
	3830	# a number we should respect the origin of the original field
	3831	# and process comparison value as a string (see
	3832	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3833	try:
	3834	numeric_comparison = int(comparison_value)
	3835	except ValueError:
	3836	numeric_comparison = parse_filesize(comparison_value)
	3837	if numeric_comparison is None:
	3838	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3839	if numeric_comparison is None:
	3840	numeric_comparison = parse_duration(comparison_value)
	3841	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3842	raise ValueError('Operator %s only supports string values!' % m['op'])
	3843	if actual_value is None:
	3844	return is_incomplete(m['key']) or m['none_inclusive']
	3845	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3846
	3847	UNARY_OPERATORS = {
	3848	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3849	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3850	}
	3851	operator_rex = re.compile(r'''(?x)
	3852	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3853	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3854	m = operator_rex.fullmatch(filter_part.strip())
	3855	if m:
	3856	op = UNARY_OPERATORS[m.group('op')]
	3857	actual_value = dct.get(m.group('key'))
	3858	if is_incomplete(m.group('key')) and actual_value is None:
	3859	return True
	3860	return op(actual_value)
	3861
	3862	raise ValueError('Invalid filter part %r' % filter_part)
	3863
	3864
	3865	def match_str(filter_str, dct, incomplete=False):
	3866	""" Filter a dictionary with a simple string syntax.
	3867	@returns Whether the filter passes
	3868	@param incomplete Set of keys that is expected to be missing from dct.
	3869	Can be True/False to indicate all/none of the keys may be missing.
	3870	All conditions on incomplete keys pass if the key is missing
	3871	"""
	3872	return all(
	3873	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3874	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3875
	3876
	3877	def match_filter_func(filters):
	3878	if not filters:
	3879	return None
	3880	filters = set(variadic(filters))
	3881
	3882	interactive = '-' in filters
	3883	if interactive:
	3884	filters.remove('-')
	3885
	3886	def _match_func(info_dict, incomplete=False):
	3887	if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
	3888	return NO_DEFAULT if interactive and not incomplete else None
	3889	else:
	3890	video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
	3891	filter_str = ') \| ('.join(map(str.strip, filters))
	3892	return f'{video_title} does not pass filter ({filter_str}), skipping ..'
	3893	return _match_func
	3894
	3895
	3896	class download_range_func:
	3897	def __init__(self, chapters, ranges):
	3898	self.chapters, self.ranges = chapters, ranges
	3899
	3900	def __call__(self, info_dict, ydl):
	3901	if not self.ranges and not self.chapters:
	3902	yield {}
	3903
	3904	warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
	3905	else 'Cannot match chapters since chapter information is unavailable')
	3906	for regex in self.chapters or []:
	3907	for i, chapter in enumerate(info_dict.get('chapters') or []):
	3908	if re.search(regex, chapter['title']):
	3909	warning = None
	3910	yield {**chapter, 'index': i}
	3911	if self.chapters and warning:
	3912	ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
	3913
	3914	yield from ({'start_time': start, 'end_time': end} for start, end in self.ranges or [])
	3915
	3916	def __eq__(self, other):
	3917	return (isinstance(other, download_range_func)
	3918	and self.chapters == other.chapters and self.ranges == other.ranges)
	3919
	3920	def __repr__(self):
	3921	return f'{type(self).__name__}({self.chapters}, {self.ranges})'
	3922
	3923
	3924	def parse_dfxp_time_expr(time_expr):
	3925	if not time_expr:
	3926	return
	3927
	3928	mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
	3929	if mobj:
	3930	return float(mobj.group('time_offset'))
	3931
	3932	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3933	if mobj:
	3934	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3935
	3936
	3937	def srt_subtitles_timecode(seconds):
	3938	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3939
	3940
	3941	def ass_subtitles_timecode(seconds):
	3942	time = timetuple_from_msec(seconds * 1000)
	3943	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3944
	3945
	3946	def dfxp2srt(dfxp_data):
	3947	'''
	3948	@param dfxp_data A bytes-like object containing DFXP data
	3949	@returns A unicode object containing converted SRT data
	3950	'''
	3951	LEGACY_NAMESPACES = (
	3952	(b'http://www.w3.org/ns/ttml', [
	3953	b'http://www.w3.org/2004/11/ttaf1',
	3954	b'http://www.w3.org/2006/04/ttaf1',
	3955	b'http://www.w3.org/2006/10/ttaf1',
	3956	]),
	3957	(b'http://www.w3.org/ns/ttml#styling', [
	3958	b'http://www.w3.org/ns/ttml#style',
	3959	]),
	3960	)
	3961
	3962	SUPPORTED_STYLING = [
	3963	'color',
	3964	'fontFamily',
	3965	'fontSize',
	3966	'fontStyle',
	3967	'fontWeight',
	3968	'textDecoration'
	3969	]
	3970
	3971	_x = functools.partial(xpath_with_ns, ns_map={
	3972	'xml': 'http://www.w3.org/XML/1998/namespace',
	3973	'ttml': 'http://www.w3.org/ns/ttml',
	3974	'tts': 'http://www.w3.org/ns/ttml#styling',
	3975	})
	3976
	3977	styles = {}
	3978	default_style = {}
	3979
	3980	class TTMLPElementParser:
	3981	_out = ''
	3982	_unclosed_elements = []
	3983	_applied_styles = []
	3984
	3985	def start(self, tag, attrib):
	3986	if tag in (_x('ttml:br'), 'br'):
	3987	self._out += '\n'
	3988	else:
	3989	unclosed_elements = []
	3990	style = {}
	3991	element_style_id = attrib.get('style')
	3992	if default_style:
	3993	style.update(default_style)
	3994	if element_style_id:
	3995	style.update(styles.get(element_style_id, {}))
	3996	for prop in SUPPORTED_STYLING:
	3997	prop_val = attrib.get(_x('tts:' + prop))
	3998	if prop_val:
	3999	style[prop] = prop_val
	4000	if style:
	4001	font = ''
	4002	for k, v in sorted(style.items()):
	4003	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	4004	continue
	4005	if k == 'color':
	4006	font += ' color="%s"' % v
	4007	elif k == 'fontSize':
	4008	font += ' size="%s"' % v
	4009	elif k == 'fontFamily':
	4010	font += ' face="%s"' % v
	4011	elif k == 'fontWeight' and v == 'bold':
	4012	self._out += '<b>'
	4013	unclosed_elements.append('b')
	4014	elif k == 'fontStyle' and v == 'italic':
	4015	self._out += '<i>'
	4016	unclosed_elements.append('i')
	4017	elif k == 'textDecoration' and v == 'underline':
	4018	self._out += '<u>'
	4019	unclosed_elements.append('u')
	4020	if font:
	4021	self._out += '<font' + font + '>'
	4022	unclosed_elements.append('font')
	4023	applied_style = {}
	4024	if self._applied_styles:
	4025	applied_style.update(self._applied_styles[-1])
	4026	applied_style.update(style)
	4027	self._applied_styles.append(applied_style)
	4028	self._unclosed_elements.append(unclosed_elements)
	4029
	4030	def end(self, tag):
	4031	if tag not in (_x('ttml:br'), 'br'):
	4032	unclosed_elements = self._unclosed_elements.pop()
	4033	for element in reversed(unclosed_elements):
	4034	self._out += '</%s>' % element
	4035	if unclosed_elements and self._applied_styles:
	4036	self._applied_styles.pop()
	4037
	4038	def data(self, data):
	4039	self._out += data
	4040
	4041	def close(self):
	4042	return self._out.strip()
	4043
	4044	def parse_node(node):
	4045	target = TTMLPElementParser()
	4046	parser = xml.etree.ElementTree.XMLParser(target=target)
	4047	parser.feed(xml.etree.ElementTree.tostring(node))
	4048	return parser.close()
	4049
	4050	for k, v in LEGACY_NAMESPACES:
	4051	for ns in v:
	4052	dfxp_data = dfxp_data.replace(ns, k)
	4053
	4054	dfxp = compat_etree_fromstring(dfxp_data)
	4055	out = []
	4056	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	4057
	4058	if not paras:
	4059	raise ValueError('Invalid dfxp/TTML subtitle')
	4060
	4061	repeat = False
	4062	while True:
	4063	for style in dfxp.findall(_x('.//ttml:style')):
	4064	style_id = style.get('id') or style.get(_x('xml:id'))
	4065	if not style_id:
	4066	continue
	4067	parent_style_id = style.get('style')
	4068	if parent_style_id:
	4069	if parent_style_id not in styles:
	4070	repeat = True
	4071	continue
	4072	styles[style_id] = styles[parent_style_id].copy()
	4073	for prop in SUPPORTED_STYLING:
	4074	prop_val = style.get(_x('tts:' + prop))
	4075	if prop_val:
	4076	styles.setdefault(style_id, {})[prop] = prop_val
	4077	if repeat:
	4078	repeat = False
	4079	else:
	4080	break
	4081
	4082	for p in ('body', 'div'):
	4083	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	4084	if ele is None:
	4085	continue
	4086	style = styles.get(ele.get('style'))
	4087	if not style:
	4088	continue
	4089	default_style.update(style)
	4090
	4091	for para, index in zip(paras, itertools.count(1)):
	4092	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	4093	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	4094	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	4095	if begin_time is None:
	4096	continue
	4097	if not end_time:
	4098	if not dur:
	4099	continue
	4100	end_time = begin_time + dur
	4101	out.append('%d\n%s --> %s\n%s\n\n' % (
	4102	index,
	4103	srt_subtitles_timecode(begin_time),
	4104	srt_subtitles_timecode(end_time),
	4105	parse_node(para)))
	4106
	4107	return ''.join(out)
	4108
	4109
	4110	def cli_option(params, command_option, param, separator=None):
	4111	param = params.get(param)
	4112	return ([] if param is None
	4113	else [command_option, str(param)] if separator is None
	4114	else [f'{command_option}{separator}{param}'])
	4115
	4116
	4117	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	4118	param = params.get(param)
	4119	assert param in (True, False, None)
	4120	return cli_option({True: true_value, False: false_value}, command_option, param, separator)
	4121
	4122
	4123	def cli_valueless_option(params, command_option, param, expected_value=True):
	4124	return [command_option] if params.get(param) == expected_value else []
	4125
	4126
	4127	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	4128	if isinstance(argdict, (list, tuple)): # for backward compatibility
	4129	if use_compat:
	4130	return argdict
	4131	else:
	4132	argdict = None
	4133	if argdict is None:
	4134	return default
	4135	assert isinstance(argdict, dict)
	4136
	4137	assert isinstance(keys, (list, tuple))
	4138	for key_list in keys:
	4139	arg_list = list(filter(
	4140	lambda x: x is not None,
	4141	[argdict.get(key.lower()) for key in variadic(key_list)]))
	4142	if arg_list:
	4143	return [arg for args in arg_list for arg in args]
	4144	return default
	4145
	4146
	4147	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	4148	main_key, exe = main_key.lower(), exe.lower()
	4149	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	4150	keys = [f'{root_key}{k}' for k in (keys or [''])]
	4151	if root_key in keys:
	4152	if main_key != exe:
	4153	keys.append((main_key, exe))
	4154	keys.append('default')
	4155	else:
	4156	use_compat = False
	4157	return cli_configuration_args(argdict, keys, default, use_compat)
	4158
	4159
	4160	class ISO639Utils:
	4161	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	4162	_lang_map = {
	4163	'aa': 'aar',
	4164	'ab': 'abk',
	4165	'ae': 'ave',
	4166	'af': 'afr',
	4167	'ak': 'aka',
	4168	'am': 'amh',
	4169	'an': 'arg',
	4170	'ar': 'ara',
	4171	'as': 'asm',
	4172	'av': 'ava',
	4173	'ay': 'aym',
	4174	'az': 'aze',
	4175	'ba': 'bak',
	4176	'be': 'bel',
	4177	'bg': 'bul',
	4178	'bh': 'bih',
	4179	'bi': 'bis',
	4180	'bm': 'bam',
	4181	'bn': 'ben',
	4182	'bo': 'bod',
	4183	'br': 'bre',
	4184	'bs': 'bos',
	4185	'ca': 'cat',
	4186	'ce': 'che',
	4187	'ch': 'cha',
	4188	'co': 'cos',
	4189	'cr': 'cre',
	4190	'cs': 'ces',
	4191	'cu': 'chu',
	4192	'cv': 'chv',
	4193	'cy': 'cym',
	4194	'da': 'dan',
	4195	'de': 'deu',
	4196	'dv': 'div',
	4197	'dz': 'dzo',
	4198	'ee': 'ewe',
	4199	'el': 'ell',
	4200	'en': 'eng',
	4201	'eo': 'epo',
	4202	'es': 'spa',
	4203	'et': 'est',
	4204	'eu': 'eus',
	4205	'fa': 'fas',
	4206	'ff': 'ful',
	4207	'fi': 'fin',
	4208	'fj': 'fij',
	4209	'fo': 'fao',
	4210	'fr': 'fra',
	4211	'fy': 'fry',
	4212	'ga': 'gle',
	4213	'gd': 'gla',
	4214	'gl': 'glg',
	4215	'gn': 'grn',
	4216	'gu': 'guj',
	4217	'gv': 'glv',
	4218	'ha': 'hau',
	4219	'he': 'heb',
	4220	'iw': 'heb', # Replaced by he in 1989 revision
	4221	'hi': 'hin',
	4222	'ho': 'hmo',
	4223	'hr': 'hrv',
	4224	'ht': 'hat',
	4225	'hu': 'hun',
	4226	'hy': 'hye',
	4227	'hz': 'her',
	4228	'ia': 'ina',
	4229	'id': 'ind',
	4230	'in': 'ind', # Replaced by id in 1989 revision
	4231	'ie': 'ile',
	4232	'ig': 'ibo',
	4233	'ii': 'iii',
	4234	'ik': 'ipk',
	4235	'io': 'ido',
	4236	'is': 'isl',
	4237	'it': 'ita',
	4238	'iu': 'iku',
	4239	'ja': 'jpn',
	4240	'jv': 'jav',
	4241	'ka': 'kat',
	4242	'kg': 'kon',
	4243	'ki': 'kik',
	4244	'kj': 'kua',
	4245	'kk': 'kaz',
	4246	'kl': 'kal',
	4247	'km': 'khm',
	4248	'kn': 'kan',
	4249	'ko': 'kor',
	4250	'kr': 'kau',
	4251	'ks': 'kas',
	4252	'ku': 'kur',
	4253	'kv': 'kom',
	4254	'kw': 'cor',
	4255	'ky': 'kir',
	4256	'la': 'lat',
	4257	'lb': 'ltz',
	4258	'lg': 'lug',
	4259	'li': 'lim',
	4260	'ln': 'lin',
	4261	'lo': 'lao',
	4262	'lt': 'lit',
	4263	'lu': 'lub',
	4264	'lv': 'lav',
	4265	'mg': 'mlg',
	4266	'mh': 'mah',
	4267	'mi': 'mri',
	4268	'mk': 'mkd',
	4269	'ml': 'mal',
	4270	'mn': 'mon',
	4271	'mr': 'mar',
	4272	'ms': 'msa',
	4273	'mt': 'mlt',
	4274	'my': 'mya',
	4275	'na': 'nau',
	4276	'nb': 'nob',
	4277	'nd': 'nde',
	4278	'ne': 'nep',
	4279	'ng': 'ndo',
	4280	'nl': 'nld',
	4281	'nn': 'nno',
	4282	'no': 'nor',
	4283	'nr': 'nbl',
	4284	'nv': 'nav',
	4285	'ny': 'nya',
	4286	'oc': 'oci',
	4287	'oj': 'oji',
	4288	'om': 'orm',
	4289	'or': 'ori',
	4290	'os': 'oss',
	4291	'pa': 'pan',
	4292	'pi': 'pli',
	4293	'pl': 'pol',
	4294	'ps': 'pus',
	4295	'pt': 'por',
	4296	'qu': 'que',
	4297	'rm': 'roh',
	4298	'rn': 'run',
	4299	'ro': 'ron',
	4300	'ru': 'rus',
	4301	'rw': 'kin',
	4302	'sa': 'san',
	4303	'sc': 'srd',
	4304	'sd': 'snd',
	4305	'se': 'sme',
	4306	'sg': 'sag',
	4307	'si': 'sin',
	4308	'sk': 'slk',
	4309	'sl': 'slv',
	4310	'sm': 'smo',
	4311	'sn': 'sna',
	4312	'so': 'som',
	4313	'sq': 'sqi',
	4314	'sr': 'srp',
	4315	'ss': 'ssw',
	4316	'st': 'sot',
	4317	'su': 'sun',
	4318	'sv': 'swe',
	4319	'sw': 'swa',
	4320	'ta': 'tam',
	4321	'te': 'tel',
	4322	'tg': 'tgk',
	4323	'th': 'tha',
	4324	'ti': 'tir',
	4325	'tk': 'tuk',
	4326	'tl': 'tgl',
	4327	'tn': 'tsn',
	4328	'to': 'ton',
	4329	'tr': 'tur',
	4330	'ts': 'tso',
	4331	'tt': 'tat',
	4332	'tw': 'twi',
	4333	'ty': 'tah',
	4334	'ug': 'uig',
	4335	'uk': 'ukr',
	4336	'ur': 'urd',
	4337	'uz': 'uzb',
	4338	've': 'ven',
	4339	'vi': 'vie',
	4340	'vo': 'vol',
	4341	'wa': 'wln',
	4342	'wo': 'wol',
	4343	'xh': 'xho',
	4344	'yi': 'yid',
	4345	'ji': 'yid', # Replaced by yi in 1989 revision
	4346	'yo': 'yor',
	4347	'za': 'zha',
	4348	'zh': 'zho',
	4349	'zu': 'zul',
	4350	}
	4351
	4352	@classmethod
	4353	def short2long(cls, code):
	4354	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	4355	return cls._lang_map.get(code[:2])
	4356
	4357	@classmethod
	4358	def long2short(cls, code):
	4359	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	4360	for short_name, long_name in cls._lang_map.items():
	4361	if long_name == code:
	4362	return short_name
	4363
	4364
	4365	class ISO3166Utils:
	4366	# From http://data.okfn.org/data/core/country-list
	4367	_country_map = {
	4368	'AF': 'Afghanistan',
	4369	'AX': 'Åland Islands',
	4370	'AL': 'Albania',
	4371	'DZ': 'Algeria',
	4372	'AS': 'American Samoa',
	4373	'AD': 'Andorra',
	4374	'AO': 'Angola',
	4375	'AI': 'Anguilla',
	4376	'AQ': 'Antarctica',
	4377	'AG': 'Antigua and Barbuda',
	4378	'AR': 'Argentina',
	4379	'AM': 'Armenia',
	4380	'AW': 'Aruba',
	4381	'AU': 'Australia',
	4382	'AT': 'Austria',
	4383	'AZ': 'Azerbaijan',
	4384	'BS': 'Bahamas',
	4385	'BH': 'Bahrain',
	4386	'BD': 'Bangladesh',
	4387	'BB': 'Barbados',
	4388	'BY': 'Belarus',
	4389	'BE': 'Belgium',
	4390	'BZ': 'Belize',
	4391	'BJ': 'Benin',
	4392	'BM': 'Bermuda',
	4393	'BT': 'Bhutan',
	4394	'BO': 'Bolivia, Plurinational State of',
	4395	'BQ': 'Bonaire, Sint Eustatius and Saba',
	4396	'BA': 'Bosnia and Herzegovina',
	4397	'BW': 'Botswana',
	4398	'BV': 'Bouvet Island',
	4399	'BR': 'Brazil',
	4400	'IO': 'British Indian Ocean Territory',
	4401	'BN': 'Brunei Darussalam',
	4402	'BG': 'Bulgaria',
	4403	'BF': 'Burkina Faso',
	4404	'BI': 'Burundi',
	4405	'KH': 'Cambodia',
	4406	'CM': 'Cameroon',
	4407	'CA': 'Canada',
	4408	'CV': 'Cape Verde',
	4409	'KY': 'Cayman Islands',
	4410	'CF': 'Central African Republic',
	4411	'TD': 'Chad',
	4412	'CL': 'Chile',
	4413	'CN': 'China',
	4414	'CX': 'Christmas Island',
	4415	'CC': 'Cocos (Keeling) Islands',
	4416	'CO': 'Colombia',
	4417	'KM': 'Comoros',
	4418	'CG': 'Congo',
	4419	'CD': 'Congo, the Democratic Republic of the',
	4420	'CK': 'Cook Islands',
	4421	'CR': 'Costa Rica',
	4422	'CI': 'Côte d\'Ivoire',
	4423	'HR': 'Croatia',
	4424	'CU': 'Cuba',
	4425	'CW': 'Curaçao',
	4426	'CY': 'Cyprus',
	4427	'CZ': 'Czech Republic',
	4428	'DK': 'Denmark',
	4429	'DJ': 'Djibouti',
	4430	'DM': 'Dominica',
	4431	'DO': 'Dominican Republic',
	4432	'EC': 'Ecuador',
	4433	'EG': 'Egypt',
	4434	'SV': 'El Salvador',
	4435	'GQ': 'Equatorial Guinea',
	4436	'ER': 'Eritrea',
	4437	'EE': 'Estonia',
	4438	'ET': 'Ethiopia',
	4439	'FK': 'Falkland Islands (Malvinas)',
	4440	'FO': 'Faroe Islands',
	4441	'FJ': 'Fiji',
	4442	'FI': 'Finland',
	4443	'FR': 'France',
	4444	'GF': 'French Guiana',
	4445	'PF': 'French Polynesia',
	4446	'TF': 'French Southern Territories',
	4447	'GA': 'Gabon',
	4448	'GM': 'Gambia',
	4449	'GE': 'Georgia',
	4450	'DE': 'Germany',
	4451	'GH': 'Ghana',
	4452	'GI': 'Gibraltar',
	4453	'GR': 'Greece',
	4454	'GL': 'Greenland',
	4455	'GD': 'Grenada',
	4456	'GP': 'Guadeloupe',
	4457	'GU': 'Guam',
	4458	'GT': 'Guatemala',
	4459	'GG': 'Guernsey',
	4460	'GN': 'Guinea',
	4461	'GW': 'Guinea-Bissau',
	4462	'GY': 'Guyana',
	4463	'HT': 'Haiti',
	4464	'HM': 'Heard Island and McDonald Islands',
	4465	'VA': 'Holy See (Vatican City State)',
	4466	'HN': 'Honduras',
	4467	'HK': 'Hong Kong',
	4468	'HU': 'Hungary',
	4469	'IS': 'Iceland',
	4470	'IN': 'India',
	4471	'ID': 'Indonesia',
	4472	'IR': 'Iran, Islamic Republic of',
	4473	'IQ': 'Iraq',
	4474	'IE': 'Ireland',
	4475	'IM': 'Isle of Man',
	4476	'IL': 'Israel',
	4477	'IT': 'Italy',
	4478	'JM': 'Jamaica',
	4479	'JP': 'Japan',
	4480	'JE': 'Jersey',
	4481	'JO': 'Jordan',
	4482	'KZ': 'Kazakhstan',
	4483	'KE': 'Kenya',
	4484	'KI': 'Kiribati',
	4485	'KP': 'Korea, Democratic People\'s Republic of',
	4486	'KR': 'Korea, Republic of',
	4487	'KW': 'Kuwait',
	4488	'KG': 'Kyrgyzstan',
	4489	'LA': 'Lao People\'s Democratic Republic',
	4490	'LV': 'Latvia',
	4491	'LB': 'Lebanon',
	4492	'LS': 'Lesotho',
	4493	'LR': 'Liberia',
	4494	'LY': 'Libya',
	4495	'LI': 'Liechtenstein',
	4496	'LT': 'Lithuania',
	4497	'LU': 'Luxembourg',
	4498	'MO': 'Macao',
	4499	'MK': 'Macedonia, the Former Yugoslav Republic of',
	4500	'MG': 'Madagascar',
	4501	'MW': 'Malawi',
	4502	'MY': 'Malaysia',
	4503	'MV': 'Maldives',
	4504	'ML': 'Mali',
	4505	'MT': 'Malta',
	4506	'MH': 'Marshall Islands',
	4507	'MQ': 'Martinique',
	4508	'MR': 'Mauritania',
	4509	'MU': 'Mauritius',
	4510	'YT': 'Mayotte',
	4511	'MX': 'Mexico',
	4512	'FM': 'Micronesia, Federated States of',
	4513	'MD': 'Moldova, Republic of',
	4514	'MC': 'Monaco',
	4515	'MN': 'Mongolia',
	4516	'ME': 'Montenegro',
	4517	'MS': 'Montserrat',
	4518	'MA': 'Morocco',
	4519	'MZ': 'Mozambique',
	4520	'MM': 'Myanmar',
	4521	'NA': 'Namibia',
	4522	'NR': 'Nauru',
	4523	'NP': 'Nepal',
	4524	'NL': 'Netherlands',
	4525	'NC': 'New Caledonia',
	4526	'NZ': 'New Zealand',
	4527	'NI': 'Nicaragua',
	4528	'NE': 'Niger',
	4529	'NG': 'Nigeria',
	4530	'NU': 'Niue',
	4531	'NF': 'Norfolk Island',
	4532	'MP': 'Northern Mariana Islands',
	4533	'NO': 'Norway',
	4534	'OM': 'Oman',
	4535	'PK': 'Pakistan',
	4536	'PW': 'Palau',
	4537	'PS': 'Palestine, State of',
	4538	'PA': 'Panama',
	4539	'PG': 'Papua New Guinea',
	4540	'PY': 'Paraguay',
	4541	'PE': 'Peru',
	4542	'PH': 'Philippines',
	4543	'PN': 'Pitcairn',
	4544	'PL': 'Poland',
	4545	'PT': 'Portugal',
	4546	'PR': 'Puerto Rico',
	4547	'QA': 'Qatar',
	4548	'RE': 'Réunion',
	4549	'RO': 'Romania',
	4550	'RU': 'Russian Federation',
	4551	'RW': 'Rwanda',
	4552	'BL': 'Saint Barthélemy',
	4553	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	4554	'KN': 'Saint Kitts and Nevis',
	4555	'LC': 'Saint Lucia',
	4556	'MF': 'Saint Martin (French part)',
	4557	'PM': 'Saint Pierre and Miquelon',
	4558	'VC': 'Saint Vincent and the Grenadines',
	4559	'WS': 'Samoa',
	4560	'SM': 'San Marino',
	4561	'ST': 'Sao Tome and Principe',
	4562	'SA': 'Saudi Arabia',
	4563	'SN': 'Senegal',
	4564	'RS': 'Serbia',
	4565	'SC': 'Seychelles',
	4566	'SL': 'Sierra Leone',
	4567	'SG': 'Singapore',
	4568	'SX': 'Sint Maarten (Dutch part)',
	4569	'SK': 'Slovakia',
	4570	'SI': 'Slovenia',
	4571	'SB': 'Solomon Islands',
	4572	'SO': 'Somalia',
	4573	'ZA': 'South Africa',
	4574	'GS': 'South Georgia and the South Sandwich Islands',
	4575	'SS': 'South Sudan',
	4576	'ES': 'Spain',
	4577	'LK': 'Sri Lanka',
	4578	'SD': 'Sudan',
	4579	'SR': 'Suriname',
	4580	'SJ': 'Svalbard and Jan Mayen',
	4581	'SZ': 'Swaziland',
	4582	'SE': 'Sweden',
	4583	'CH': 'Switzerland',
	4584	'SY': 'Syrian Arab Republic',
	4585	'TW': 'Taiwan, Province of China',
	4586	'TJ': 'Tajikistan',
	4587	'TZ': 'Tanzania, United Republic of',
	4588	'TH': 'Thailand',
	4589	'TL': 'Timor-Leste',
	4590	'TG': 'Togo',
	4591	'TK': 'Tokelau',
	4592	'TO': 'Tonga',
	4593	'TT': 'Trinidad and Tobago',
	4594	'TN': 'Tunisia',
	4595	'TR': 'Turkey',
	4596	'TM': 'Turkmenistan',
	4597	'TC': 'Turks and Caicos Islands',
	4598	'TV': 'Tuvalu',
	4599	'UG': 'Uganda',
	4600	'UA': 'Ukraine',
	4601	'AE': 'United Arab Emirates',
	4602	'GB': 'United Kingdom',
	4603	'US': 'United States',
	4604	'UM': 'United States Minor Outlying Islands',
	4605	'UY': 'Uruguay',
	4606	'UZ': 'Uzbekistan',
	4607	'VU': 'Vanuatu',
	4608	'VE': 'Venezuela, Bolivarian Republic of',
	4609	'VN': 'Viet Nam',
	4610	'VG': 'Virgin Islands, British',
	4611	'VI': 'Virgin Islands, U.S.',
	4612	'WF': 'Wallis and Futuna',
	4613	'EH': 'Western Sahara',
	4614	'YE': 'Yemen',
	4615	'ZM': 'Zambia',
	4616	'ZW': 'Zimbabwe',
	4617	# Not ISO 3166 codes, but used for IP blocks
	4618	'AP': 'Asia/Pacific Region',
	4619	'EU': 'Europe',
	4620	}
	4621
	4622	@classmethod
	4623	def short2full(cls, code):
	4624	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4625	return cls._country_map.get(code.upper())
	4626
	4627
	4628	class GeoUtils:
	4629	# Major IPv4 address blocks per country
	4630	_country_ip_map = {
	4631	'AD': '46.172.224.0/19',
	4632	'AE': '94.200.0.0/13',
	4633	'AF': '149.54.0.0/17',
	4634	'AG': '209.59.64.0/18',
	4635	'AI': '204.14.248.0/21',
	4636	'AL': '46.99.0.0/16',
	4637	'AM': '46.70.0.0/15',
	4638	'AO': '105.168.0.0/13',
	4639	'AP': '182.50.184.0/21',
	4640	'AQ': '23.154.160.0/24',
	4641	'AR': '181.0.0.0/12',
	4642	'AS': '202.70.112.0/20',
	4643	'AT': '77.116.0.0/14',
	4644	'AU': '1.128.0.0/11',
	4645	'AW': '181.41.0.0/18',
	4646	'AX': '185.217.4.0/22',
	4647	'AZ': '5.197.0.0/16',
	4648	'BA': '31.176.128.0/17',
	4649	'BB': '65.48.128.0/17',
	4650	'BD': '114.130.0.0/16',
	4651	'BE': '57.0.0.0/8',
	4652	'BF': '102.178.0.0/15',
	4653	'BG': '95.42.0.0/15',
	4654	'BH': '37.131.0.0/17',
	4655	'BI': '154.117.192.0/18',
	4656	'BJ': '137.255.0.0/16',
	4657	'BL': '185.212.72.0/23',
	4658	'BM': '196.12.64.0/18',
	4659	'BN': '156.31.0.0/16',
	4660	'BO': '161.56.0.0/16',
	4661	'BQ': '161.0.80.0/20',
	4662	'BR': '191.128.0.0/12',
	4663	'BS': '24.51.64.0/18',
	4664	'BT': '119.2.96.0/19',
	4665	'BW': '168.167.0.0/16',
	4666	'BY': '178.120.0.0/13',
	4667	'BZ': '179.42.192.0/18',
	4668	'CA': '99.224.0.0/11',
	4669	'CD': '41.243.0.0/16',
	4670	'CF': '197.242.176.0/21',
	4671	'CG': '160.113.0.0/16',
	4672	'CH': '85.0.0.0/13',
	4673	'CI': '102.136.0.0/14',
	4674	'CK': '202.65.32.0/19',
	4675	'CL': '152.172.0.0/14',
	4676	'CM': '102.244.0.0/14',
	4677	'CN': '36.128.0.0/10',
	4678	'CO': '181.240.0.0/12',
	4679	'CR': '201.192.0.0/12',
	4680	'CU': '152.206.0.0/15',
	4681	'CV': '165.90.96.0/19',
	4682	'CW': '190.88.128.0/17',
	4683	'CY': '31.153.0.0/16',
	4684	'CZ': '88.100.0.0/14',
	4685	'DE': '53.0.0.0/8',
	4686	'DJ': '197.241.0.0/17',
	4687	'DK': '87.48.0.0/12',
	4688	'DM': '192.243.48.0/20',
	4689	'DO': '152.166.0.0/15',
	4690	'DZ': '41.96.0.0/12',
	4691	'EC': '186.68.0.0/15',
	4692	'EE': '90.190.0.0/15',
	4693	'EG': '156.160.0.0/11',
	4694	'ER': '196.200.96.0/20',
	4695	'ES': '88.0.0.0/11',
	4696	'ET': '196.188.0.0/14',
	4697	'EU': '2.16.0.0/13',
	4698	'FI': '91.152.0.0/13',
	4699	'FJ': '144.120.0.0/16',
	4700	'FK': '80.73.208.0/21',
	4701	'FM': '119.252.112.0/20',
	4702	'FO': '88.85.32.0/19',
	4703	'FR': '90.0.0.0/9',
	4704	'GA': '41.158.0.0/15',
	4705	'GB': '25.0.0.0/8',
	4706	'GD': '74.122.88.0/21',
	4707	'GE': '31.146.0.0/16',
	4708	'GF': '161.22.64.0/18',
	4709	'GG': '62.68.160.0/19',
	4710	'GH': '154.160.0.0/12',
	4711	'GI': '95.164.0.0/16',
	4712	'GL': '88.83.0.0/19',
	4713	'GM': '160.182.0.0/15',
	4714	'GN': '197.149.192.0/18',
	4715	'GP': '104.250.0.0/19',
	4716	'GQ': '105.235.224.0/20',
	4717	'GR': '94.64.0.0/13',
	4718	'GT': '168.234.0.0/16',
	4719	'GU': '168.123.0.0/16',
	4720	'GW': '197.214.80.0/20',
	4721	'GY': '181.41.64.0/18',
	4722	'HK': '113.252.0.0/14',
	4723	'HN': '181.210.0.0/16',
	4724	'HR': '93.136.0.0/13',
	4725	'HT': '148.102.128.0/17',
	4726	'HU': '84.0.0.0/14',
	4727	'ID': '39.192.0.0/10',
	4728	'IE': '87.32.0.0/12',
	4729	'IL': '79.176.0.0/13',
	4730	'IM': '5.62.80.0/20',
	4731	'IN': '117.192.0.0/10',
	4732	'IO': '203.83.48.0/21',
	4733	'IQ': '37.236.0.0/14',
	4734	'IR': '2.176.0.0/12',
	4735	'IS': '82.221.0.0/16',
	4736	'IT': '79.0.0.0/10',
	4737	'JE': '87.244.64.0/18',
	4738	'JM': '72.27.0.0/17',
	4739	'JO': '176.29.0.0/16',
	4740	'JP': '133.0.0.0/8',
	4741	'KE': '105.48.0.0/12',
	4742	'KG': '158.181.128.0/17',
	4743	'KH': '36.37.128.0/17',
	4744	'KI': '103.25.140.0/22',
	4745	'KM': '197.255.224.0/20',
	4746	'KN': '198.167.192.0/19',
	4747	'KP': '175.45.176.0/22',
	4748	'KR': '175.192.0.0/10',
	4749	'KW': '37.36.0.0/14',
	4750	'KY': '64.96.0.0/15',
	4751	'KZ': '2.72.0.0/13',
	4752	'LA': '115.84.64.0/18',
	4753	'LB': '178.135.0.0/16',
	4754	'LC': '24.92.144.0/20',
	4755	'LI': '82.117.0.0/19',
	4756	'LK': '112.134.0.0/15',
	4757	'LR': '102.183.0.0/16',
	4758	'LS': '129.232.0.0/17',
	4759	'LT': '78.56.0.0/13',
	4760	'LU': '188.42.0.0/16',
	4761	'LV': '46.109.0.0/16',
	4762	'LY': '41.252.0.0/14',
	4763	'MA': '105.128.0.0/11',
	4764	'MC': '88.209.64.0/18',
	4765	'MD': '37.246.0.0/16',
	4766	'ME': '178.175.0.0/17',
	4767	'MF': '74.112.232.0/21',
	4768	'MG': '154.126.0.0/17',
	4769	'MH': '117.103.88.0/21',
	4770	'MK': '77.28.0.0/15',
	4771	'ML': '154.118.128.0/18',
	4772	'MM': '37.111.0.0/17',
	4773	'MN': '49.0.128.0/17',
	4774	'MO': '60.246.0.0/16',
	4775	'MP': '202.88.64.0/20',
	4776	'MQ': '109.203.224.0/19',
	4777	'MR': '41.188.64.0/18',
	4778	'MS': '208.90.112.0/22',
	4779	'MT': '46.11.0.0/16',
	4780	'MU': '105.16.0.0/12',
	4781	'MV': '27.114.128.0/18',
	4782	'MW': '102.70.0.0/15',
	4783	'MX': '187.192.0.0/11',
	4784	'MY': '175.136.0.0/13',
	4785	'MZ': '197.218.0.0/15',
	4786	'NA': '41.182.0.0/16',
	4787	'NC': '101.101.0.0/18',
	4788	'NE': '197.214.0.0/18',
	4789	'NF': '203.17.240.0/22',
	4790	'NG': '105.112.0.0/12',
	4791	'NI': '186.76.0.0/15',
	4792	'NL': '145.96.0.0/11',
	4793	'NO': '84.208.0.0/13',
	4794	'NP': '36.252.0.0/15',
	4795	'NR': '203.98.224.0/19',
	4796	'NU': '49.156.48.0/22',
	4797	'NZ': '49.224.0.0/14',
	4798	'OM': '5.36.0.0/15',
	4799	'PA': '186.72.0.0/15',
	4800	'PE': '186.160.0.0/14',
	4801	'PF': '123.50.64.0/18',
	4802	'PG': '124.240.192.0/19',
	4803	'PH': '49.144.0.0/13',
	4804	'PK': '39.32.0.0/11',
	4805	'PL': '83.0.0.0/11',
	4806	'PM': '70.36.0.0/20',
	4807	'PR': '66.50.0.0/16',
	4808	'PS': '188.161.0.0/16',
	4809	'PT': '85.240.0.0/13',
	4810	'PW': '202.124.224.0/20',
	4811	'PY': '181.120.0.0/14',
	4812	'QA': '37.210.0.0/15',
	4813	'RE': '102.35.0.0/16',
	4814	'RO': '79.112.0.0/13',
	4815	'RS': '93.86.0.0/15',
	4816	'RU': '5.136.0.0/13',
	4817	'RW': '41.186.0.0/16',
	4818	'SA': '188.48.0.0/13',
	4819	'SB': '202.1.160.0/19',
	4820	'SC': '154.192.0.0/11',
	4821	'SD': '102.120.0.0/13',
	4822	'SE': '78.64.0.0/12',
	4823	'SG': '8.128.0.0/10',
	4824	'SI': '188.196.0.0/14',
	4825	'SK': '78.98.0.0/15',
	4826	'SL': '102.143.0.0/17',
	4827	'SM': '89.186.32.0/19',
	4828	'SN': '41.82.0.0/15',
	4829	'SO': '154.115.192.0/18',
	4830	'SR': '186.179.128.0/17',
	4831	'SS': '105.235.208.0/21',
	4832	'ST': '197.159.160.0/19',
	4833	'SV': '168.243.0.0/16',
	4834	'SX': '190.102.0.0/20',
	4835	'SY': '5.0.0.0/16',
	4836	'SZ': '41.84.224.0/19',
	4837	'TC': '65.255.48.0/20',
	4838	'TD': '154.68.128.0/19',
	4839	'TG': '196.168.0.0/14',
	4840	'TH': '171.96.0.0/13',
	4841	'TJ': '85.9.128.0/18',
	4842	'TK': '27.96.24.0/21',
	4843	'TL': '180.189.160.0/20',
	4844	'TM': '95.85.96.0/19',
	4845	'TN': '197.0.0.0/11',
	4846	'TO': '175.176.144.0/21',
	4847	'TR': '78.160.0.0/11',
	4848	'TT': '186.44.0.0/15',
	4849	'TV': '202.2.96.0/19',
	4850	'TW': '120.96.0.0/11',
	4851	'TZ': '156.156.0.0/14',
	4852	'UA': '37.52.0.0/14',
	4853	'UG': '102.80.0.0/13',
	4854	'US': '6.0.0.0/8',
	4855	'UY': '167.56.0.0/13',
	4856	'UZ': '84.54.64.0/18',
	4857	'VA': '212.77.0.0/19',
	4858	'VC': '207.191.240.0/21',
	4859	'VE': '186.88.0.0/13',
	4860	'VG': '66.81.192.0/20',
	4861	'VI': '146.226.0.0/16',
	4862	'VN': '14.160.0.0/11',
	4863	'VU': '202.80.32.0/20',
	4864	'WF': '117.20.32.0/21',
	4865	'WS': '202.4.32.0/19',
	4866	'YE': '134.35.0.0/16',
	4867	'YT': '41.242.116.0/22',
	4868	'ZA': '41.0.0.0/11',
	4869	'ZM': '102.144.0.0/13',
	4870	'ZW': '102.177.192.0/18',
	4871	}
	4872
	4873	@classmethod
	4874	def random_ipv4(cls, code_or_block):
	4875	if len(code_or_block) == 2:
	4876	block = cls._country_ip_map.get(code_or_block.upper())
	4877	if not block:
	4878	return None
	4879	else:
	4880	block = code_or_block
	4881	addr, preflen = block.split('/')
	4882	addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
	4883	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4884	return str(socket.inet_ntoa(
	4885	struct.pack('!L', random.randint(addr_min, addr_max))))
	4886
	4887
	4888	class PerRequestProxyHandler(urllib.request.ProxyHandler):
	4889	def __init__(self, proxies=None):
	4890	# Set default handlers
	4891	for type in ('http', 'https'):
	4892	setattr(self, '%s_open' % type,
	4893	lambda r, proxy='__noproxy__', type=type, meth=self.proxy_open:
	4894	meth(r, proxy, type))
	4895	urllib.request.ProxyHandler.__init__(self, proxies)
	4896
	4897	def proxy_open(self, req, proxy, type):
	4898	req_proxy = req.headers.get('Ytdl-request-proxy')
	4899	if req_proxy is not None:
	4900	proxy = req_proxy
	4901	del req.headers['Ytdl-request-proxy']
	4902
	4903	if proxy == '__noproxy__':
	4904	return None # No Proxy
	4905	if urllib.parse.urlparse(proxy).scheme.lower() in ('socks', 'socks4', 'socks4a', 'socks5'):
	4906	req.add_header('Ytdl-socks-proxy', proxy)
	4907	# yt-dlp's http/https handlers do wrapping the socket with socks
	4908	return None
	4909	return urllib.request.ProxyHandler.proxy_open(
	4910	self, req, proxy, type)
	4911
	4912
	4913	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4914	# released into Public Domain
	4915	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4916
	4917	def long_to_bytes(n, blocksize=0):
	4918	"""long_to_bytes(n:long, blocksize:int) : string
	4919	Convert a long integer to a byte string.
	4920
	4921	If optional blocksize is given and greater than zero, pad the front of the
	4922	byte string with binary zeros so that the length is a multiple of
	4923	blocksize.
	4924	"""
	4925	# after much testing, this algorithm was deemed to be the fastest
	4926	s = b''
	4927	n = int(n)
	4928	while n > 0:
	4929	s = struct.pack('>I', n & 0xffffffff) + s
	4930	n = n >> 32
	4931	# strip off leading zeros
	4932	for i in range(len(s)):
	4933	if s[i] != b'\000'[0]:
	4934	break
	4935	else:
	4936	# only happens when n == 0
	4937	s = b'\000'
	4938	i = 0
	4939	s = s[i:]
	4940	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4941	# de-padding being done above, but sigh...
	4942	if blocksize > 0 and len(s) % blocksize:
	4943	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4944	return s
	4945
	4946
	4947	def bytes_to_long(s):
	4948	"""bytes_to_long(string) : long
	4949	Convert a byte string to a long integer.
	4950
	4951	This is (essentially) the inverse of long_to_bytes().
	4952	"""
	4953	acc = 0
	4954	length = len(s)
	4955	if length % 4:
	4956	extra = (4 - length % 4)
	4957	s = b'\000' * extra + s
	4958	length = length + extra
	4959	for i in range(0, length, 4):
	4960	acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
	4961	return acc
	4962
	4963
	4964	def ohdave_rsa_encrypt(data, exponent, modulus):
	4965	'''
	4966	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4967
	4968	Input:
	4969	data: data to encrypt, bytes-like object
	4970	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4971	Output: hex string of encrypted data
	4972
	4973	Limitation: supports one block encryption only
	4974	'''
	4975
	4976	payload = int(binascii.hexlify(data[::-1]), 16)
	4977	encrypted = pow(payload, exponent, modulus)
	4978	return '%x' % encrypted
	4979
	4980
	4981	def pkcs1pad(data, length):
	4982	"""
	4983	Padding input data with PKCS#1 scheme
	4984
	4985	@param {int[]} data input data
	4986	@param {int} length target length
	4987	@returns {int[]} padded data
	4988	"""
	4989	if len(data) > length - 11:
	4990	raise ValueError('Input data too long for PKCS#1 padding')
	4991
	4992	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4993	return [0, 2] + pseudo_random + [0] + data
	4994
	4995
	4996	def _base_n_table(n, table):
	4997	if not table and not n:
	4998	raise ValueError('Either table or n must be specified')
	4999	table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
	5000
	5001	if n and n != len(table):
	5002	raise ValueError(f'base {n} exceeds table length {len(table)}')
	5003	return table
	5004
	5005
	5006	def encode_base_n(num, n=None, table=None):
	5007	"""Convert given int to a base-n string"""
	5008	table = _base_n_table(n, table)
	5009	if not num:
	5010	return table[0]
	5011
	5012	result, base = '', len(table)
	5013	while num:
	5014	result = table[num % base] + result
	5015	num = num // base
	5016	return result
	5017
	5018
	5019	def decode_base_n(string, n=None, table=None):
	5020	"""Convert given base-n string to int"""
	5021	table = {char: index for index, char in enumerate(_base_n_table(n, table))}
	5022	result, base = 0, len(table)
	5023	for char in string:
	5024	result = result * base + table[char]
	5025	return result
	5026
	5027
	5028	def decode_base(value, digits):
	5029	deprecation_warning(f'{__name__}.decode_base is deprecated and may be removed '
	5030	f'in a future version. Use {__name__}.decode_base_n instead')
	5031	return decode_base_n(value, table=digits)
	5032
	5033
	5034	def decode_packed_codes(code):
	5035	mobj = re.search(PACKED_CODES_RE, code)
	5036	obfuscated_code, base, count, symbols = mobj.groups()
	5037	base = int(base)
	5038	count = int(count)
	5039	symbols = symbols.split('\|')
	5040	symbol_table = {}
	5041
	5042	while count:
	5043	count -= 1
	5044	base_n_count = encode_base_n(count, base)
	5045	symbol_table[base_n_count] = symbols[count] or base_n_count
	5046
	5047	return re.sub(
	5048	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	5049	obfuscated_code)
	5050
	5051
	5052	def caesar(s, alphabet, shift):
	5053	if shift == 0:
	5054	return s
	5055	l = len(alphabet)
	5056	return ''.join(
	5057	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	5058	for c in s)
	5059
	5060
	5061	def rot47(s):
	5062	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	5063
	5064
	5065	def parse_m3u8_attributes(attrib):
	5066	info = {}
	5067	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	5068	if val.startswith('"'):
	5069	val = val[1:-1]
	5070	info[key] = val
	5071	return info
	5072
	5073
	5074	def urshift(val, n):
	5075	return val >> n if val >= 0 else (val + 0x100000000) >> n
	5076
	5077
	5078	# Based on png2str() written by @gdkchan and improved by @yokrysty
	5079	# Originally posted at https://github.com/ytdl-org/youtube-dl/issues/9706
	5080	def decode_png(png_data):
	5081	# Reference: https://www.w3.org/TR/PNG/
	5082	header = png_data[8:]
	5083
	5084	if png_data[:8] != b'\x89PNG\x0d\x0a\x1a\x0a' or header[4:8] != b'IHDR':
	5085	raise OSError('Not a valid PNG file.')
	5086
	5087	int_map = {1: '>B', 2: '>H', 4: '>I'}
	5088	unpack_integer = lambda x: struct.unpack(int_map[len(x)], x)[0]
	5089
	5090	chunks = []
	5091
	5092	while header:
	5093	length = unpack_integer(header[:4])
	5094	header = header[4:]
	5095
	5096	chunk_type = header[:4]
	5097	header = header[4:]
	5098
	5099	chunk_data = header[:length]
	5100	header = header[length:]
	5101
	5102	header = header[4:] # Skip CRC
	5103
	5104	chunks.append({
	5105	'type': chunk_type,
	5106	'length': length,
	5107	'data': chunk_data
	5108	})
	5109
	5110	ihdr = chunks[0]['data']
	5111
	5112	width = unpack_integer(ihdr[:4])
	5113	height = unpack_integer(ihdr[4:8])
	5114
	5115	idat = b''
	5116
	5117	for chunk in chunks:
	5118	if chunk['type'] == b'IDAT':
	5119	idat += chunk['data']
	5120
	5121	if not idat:
	5122	raise OSError('Unable to read PNG data.')
	5123
	5124	decompressed_data = bytearray(zlib.decompress(idat))
	5125
	5126	stride = width * 3
	5127	pixels = []
	5128
	5129	def _get_pixel(idx):
	5130	x = idx % stride
	5131	y = idx // stride
	5132	return pixels[y][x]
	5133
	5134	for y in range(height):
	5135	basePos = y * (1 + stride)
	5136	filter_type = decompressed_data[basePos]
	5137
	5138	current_row = []
	5139
	5140	pixels.append(current_row)
	5141
	5142	for x in range(stride):
	5143	color = decompressed_data[1 + basePos + x]
	5144	basex = y * stride + x
	5145	left = 0
	5146	up = 0
	5147
	5148	if x > 2:
	5149	left = _get_pixel(basex - 3)
	5150	if y > 0:
	5151	up = _get_pixel(basex - stride)
	5152
	5153	if filter_type == 1: # Sub
	5154	color = (color + left) & 0xff
	5155	elif filter_type == 2: # Up
	5156	color = (color + up) & 0xff
	5157	elif filter_type == 3: # Average
	5158	color = (color + ((left + up) >> 1)) & 0xff
	5159	elif filter_type == 4: # Paeth
	5160	a = left
	5161	b = up
	5162	c = 0
	5163
	5164	if x > 2 and y > 0:
	5165	c = _get_pixel(basex - stride - 3)
	5166
	5167	p = a + b - c
	5168
	5169	pa = abs(p - a)
	5170	pb = abs(p - b)
	5171	pc = abs(p - c)
	5172
	5173	if pa <= pb and pa <= pc:
	5174	color = (color + a) & 0xff
	5175	elif pb <= pc:
	5176	color = (color + b) & 0xff
	5177	else:
	5178	color = (color + c) & 0xff
	5179
	5180	current_row.append(color)
	5181
	5182	return width, height, pixels
	5183
	5184
	5185	def write_xattr(path, key, value):
	5186	# Windows: Write xattrs to NTFS Alternate Data Streams:
	5187	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	5188	if compat_os_name == 'nt':
	5189	assert ':' not in key
	5190	assert os.path.exists(path)
	5191
	5192	try:
	5193	with open(f'{path}:{key}', 'wb') as f:
	5194	f.write(value)
	5195	except OSError as e:
	5196	raise XAttrMetadataError(e.errno, e.strerror)
	5197	return
	5198
	5199	# UNIX Method 1. Use xattrs/pyxattrs modules
	5200
	5201	setxattr = None
	5202	if getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
	5203	# Unicode arguments are not supported in pyxattr until version 0.5.0
	5204	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	5205	if version_tuple(xattr.__version__) >= (0, 5, 0):
	5206	setxattr = xattr.set
	5207	elif xattr:
	5208	setxattr = xattr.setxattr
	5209
	5210	if setxattr:
	5211	try:
	5212	setxattr(path, key, value)
	5213	except OSError as e:
	5214	raise XAttrMetadataError(e.errno, e.strerror)
	5215	return
	5216
	5217	# UNIX Method 2. Use setfattr/xattr executables
	5218	exe = ('setfattr' if check_executable('setfattr', ['--version'])
	5219	else 'xattr' if check_executable('xattr', ['-h']) else None)
	5220	if not exe:
	5221	raise XAttrUnavailableError(
	5222	'Couldn\'t find a tool to set the xattrs. Install either the python "xattr" or "pyxattr" modules or the '
	5223	+ ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
	5224
	5225	value = value.decode()
	5226	try:
	5227	_, stderr, returncode = Popen.run(
	5228	[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
	5229	text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	5230	except OSError as e:
	5231	raise XAttrMetadataError(e.errno, e.strerror)
	5232	if returncode:
	5233	raise XAttrMetadataError(returncode, stderr)
	5234
	5235
	5236	def random_birthday(year_field, month_field, day_field):
	5237	start_date = datetime.date(1950, 1, 1)
	5238	end_date = datetime.date(1995, 12, 31)
	5239	offset = random.randint(0, (end_date - start_date).days)
	5240	random_date = start_date + datetime.timedelta(offset)
	5241	return {
	5242	year_field: str(random_date.year),
	5243	month_field: str(random_date.month),
	5244	day_field: str(random_date.day),
	5245	}
	5246
	5247
	5248	def find_available_port(interface=''):
	5249	try:
	5250	with socket.socket() as sock:
	5251	sock.bind((interface, 0))
	5252	return sock.getsockname()[1]
	5253	except OSError:
	5254	return None
	5255
	5256
	5257	# Templates for internet shortcut files, which are plain text files.
	5258	DOT_URL_LINK_TEMPLATE = '''\
	5259	[InternetShortcut]
	5260	URL=%(url)s
	5261	'''
	5262
	5263	DOT_WEBLOC_LINK_TEMPLATE = '''\
	5264	<?xml version="1.0" encoding="UTF-8"?>
	5265	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	5266	<plist version="1.0">
	5267	<dict>
	5268	\t<key>URL</key>
	5269	\t<string>%(url)s</string>
	5270	</dict>
	5271	</plist>
	5272	'''
	5273
	5274	DOT_DESKTOP_LINK_TEMPLATE = '''\
	5275	[Desktop Entry]
	5276	Encoding=UTF-8
	5277	Name=%(filename)s
	5278	Type=Link
	5279	URL=%(url)s
	5280	Icon=text-html
	5281	'''
	5282
	5283	LINK_TEMPLATES = {
	5284	'url': DOT_URL_LINK_TEMPLATE,
	5285	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	5286	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	5287	}
	5288
	5289
	5290	def iri_to_uri(iri):
	5291	"""
	5292	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	5293
	5294	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	5295	"""
	5296
	5297	iri_parts = urllib.parse.urlparse(iri)
	5298
	5299	if '[' in iri_parts.netloc:
	5300	raise ValueError('IPv6 URIs are not, yet, supported.')
	5301	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	5302
	5303	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	5304
	5305	net_location = ''
	5306	if iri_parts.username:
	5307	net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
	5308	if iri_parts.password is not None:
	5309	net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
	5310	net_location += '@'
	5311
	5312	net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
	5313	# The 'idna' encoding produces ASCII text.
	5314	if iri_parts.port is not None and iri_parts.port != 80:
	5315	net_location += ':' + str(iri_parts.port)
	5316
	5317	return urllib.parse.urlunparse(
	5318	(iri_parts.scheme,
	5319	net_location,
	5320
	5321	urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	5322
	5323	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	5324	urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	5325
	5326	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	5327	urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	5328
	5329	urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	5330
	5331	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	5332
	5333
	5334	def to_high_limit_path(path):
	5335	if sys.platform in ['win32', 'cygwin']:
	5336	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	5337	return '\\\\?\\' + os.path.abspath(path)
	5338
	5339	return path
	5340
	5341
	5342	def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
	5343	val = traverse_obj(obj, *variadic(field))
	5344	if (not val and val != 0) if ignore is NO_DEFAULT else val in variadic(ignore):
	5345	return default
	5346	return template % func(val)
	5347
	5348
	5349	def clean_podcast_url(url):
	5350	return re.sub(r'''(?x)
	5351	(?:
	5352	(?:
	5353	chtbl\.com/track\|
	5354	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	5355	play\.podtrac\.com
	5356	)/[^/]+\|
	5357	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	5358	flex\.acast\.com\|
	5359	pd(?:
	5360	cn\.co\| # https://podcorn.com/analytics-prefix/
	5361	st\.fm # https://podsights.com/docs/
	5362	)/e
	5363	)/''', '', url)
	5364
	5365
	5366	_HEX_TABLE = '0123456789abcdef'
	5367
	5368
	5369	def random_uuidv4():
	5370	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	5371
	5372
	5373	def make_dir(path, to_screen=None):
	5374	try:
	5375	dn = os.path.dirname(path)
	5376	if dn and not os.path.exists(dn):
	5377	os.makedirs(dn)
	5378	return True
	5379	except OSError as err:
	5380	if callable(to_screen) is not None:
	5381	to_screen('unable to create directory ' + error_to_compat_str(err))
	5382	return False
	5383
	5384
	5385	def get_executable_path():
	5386	from .update import _get_variant_and_executable_path
	5387
	5388	return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
	5389
	5390
	5391	def get_user_config_dirs(package_name):
	5392	# .config (e.g. ~/.config/package_name)
	5393	xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
	5394	yield os.path.join(xdg_config_home, package_name)
	5395
	5396	# appdata (%APPDATA%/package_name)
	5397	appdata_dir = os.getenv('appdata')
	5398	if appdata_dir:
	5399	yield os.path.join(appdata_dir, package_name)
	5400
	5401	# home (~/.package_name)
	5402	yield os.path.join(compat_expanduser('~'), f'.{package_name}')
	5403
	5404
	5405	def get_system_config_dirs(package_name):
	5406	# /etc/package_name
	5407	yield os.path.join('/etc', package_name)
	5408
	5409
	5410	def traverse_obj(
	5411	obj, *paths, default=NO_DEFAULT, expected_type=None, get_all=True,
	5412	casesense=True, is_user_input=False, traverse_string=False):
	5413	"""
	5414	Safely traverse nested `dict`s and `Sequence`s
	5415
	5416	>>> obj = [{}, {"key": "value"}]
	5417	>>> traverse_obj(obj, (1, "key"))
	5418	"value"
	5419
	5420	Each of the provided `paths` is tested and the first producing a valid result will be returned.
	5421	The next path will also be tested if the path branched but no results could be found.
	5422	Supported values for traversal are `Mapping`, `Sequence` and `re.Match`.
	5423	Unhelpful values (`{}`, `None`) are treated as the absence of a value and discarded.
	5424
	5425	The paths will be wrapped in `variadic`, so that `'key'` is conveniently the same as `('key', )`.
	5426
	5427	The keys in the path can be one of:
	5428	- `None`: Return the current object.
	5429	- `set`: Requires the only item in the set to be a type or function,
	5430	like `{type}`/`{func}`. If a `type`, returns only values
	5431	of this type. If a function, returns `func(obj)`.
	5432	- `str`/`int`: Return `obj[key]`. For `re.Match`, return `obj.group(key)`.
	5433	- `slice`: Branch out and return all values in `obj[key]`.
	5434	- `Ellipsis`: Branch out and return a list of all values.
	5435	- `tuple`/`list`: Branch out and return a list of all matching values.
	5436	Read as: `[traverse_obj(obj, branch) for branch in branches]`.
	5437	- `function`: Branch out and return values filtered by the function.
	5438	Read as: `[value for key, value in obj if function(key, value)]`.
	5439	For `Sequence`s, `key` is the index of the value.
	5440	For `re.Match`es, `key` is the group number (0 = full match)
	5441	as well as additionally any group names, if given.
	5442	- `dict` Transform the current object and return a matching dict.
	5443	Read as: `{key: traverse_obj(obj, path) for key, path in dct.items()}`.
	5444
	5445	`tuple`, `list`, and `dict` all support nested paths and branches.
	5446
	5447	@params paths Paths which to traverse by.
	5448	@param default Value to return if the paths do not match.
	5449	If the last key in the path is a `dict`, it will apply to each value inside
	5450	the dict instead, depth first. Try to avoid if using nested `dict` keys.
	5451	@param expected_type If a `type`, only accept final values of this type.
	5452	If any other callable, try to call the function on each result.
	5453	If the last key in the path is a `dict`, it will apply to each value inside
	5454	the dict instead, recursively. This does respect branching paths.
	5455	@param get_all If `False`, return the first matching result, otherwise all matching ones.
	5456	@param casesense If `False`, consider string dictionary keys as case insensitive.
	5457
	5458	The following are only meant to be used by YoutubeDL.prepare_outtmpl and are not part of the API
	5459
	5460	@param is_user_input Whether the keys are generated from user input.
	5461	If `True` strings get converted to `int`/`slice` if needed.
	5462	@param traverse_string Whether to traverse into objects as strings.
	5463	If `True`, any non-compatible object will first be
	5464	converted into a string and then traversed into.
	5465	The return value of that path will be a string instead,
	5466	not respecting any further branching.
	5467
	5468
	5469	@returns The result of the object traversal.
	5470	If successful, `get_all=True`, and the path branches at least once,
	5471	then a list of results is returned instead.
	5472	If no `default` is given and the last path branches, a `list` of results
	5473	is always returned. If a path ends on a `dict` that result will always be a `dict`.
	5474	"""
	5475	is_sequence = lambda x: isinstance(x, collections.abc.Sequence) and not isinstance(x, (str, bytes))
	5476	casefold = lambda k: k.casefold() if isinstance(k, str) else k
	5477
	5478	if isinstance(expected_type, type):
	5479	type_test = lambda val: val if isinstance(val, expected_type) else None
	5480	else:
	5481	type_test = lambda val: try_call(expected_type or IDENTITY, args=(val,))
	5482
	5483	def apply_key(key, obj, is_last):
	5484	branching = False
	5485	result = None
	5486
	5487	if obj is None and traverse_string:
	5488	pass
	5489
	5490	elif key is None:
	5491	result = obj
	5492
	5493	elif isinstance(key, set):
	5494	assert len(key) == 1, 'Set should only be used to wrap a single item'
	5495	item = next(iter(key))
	5496	if isinstance(item, type):
	5497	if isinstance(obj, item):
	5498	result = obj
	5499	else:
	5500	result = try_call(item, args=(obj,))
	5501
	5502	elif isinstance(key, (list, tuple)):
	5503	branching = True
	5504	result = itertools.chain.from_iterable(
	5505	apply_path(obj, branch, is_last)[0] for branch in key)
	5506
	5507	elif key is ...:
	5508	branching = True
	5509	if isinstance(obj, collections.abc.Mapping):
	5510	result = obj.values()
	5511	elif is_sequence(obj):
	5512	result = obj
	5513	elif isinstance(obj, re.Match):
	5514	result = obj.groups()
	5515	elif traverse_string:
	5516	branching = False
	5517	result = str(obj)
	5518	else:
	5519	result = ()
	5520
	5521	elif callable(key):
	5522	branching = True
	5523	if isinstance(obj, collections.abc.Mapping):
	5524	iter_obj = obj.items()
	5525	elif is_sequence(obj):
	5526	iter_obj = enumerate(obj)
	5527	elif isinstance(obj, re.Match):
	5528	iter_obj = itertools.chain(
	5529	enumerate((obj.group(), *obj.groups())),
	5530	obj.groupdict().items())
	5531	elif traverse_string:
	5532	branching = False
	5533	iter_obj = enumerate(str(obj))
	5534	else:
	5535	iter_obj = ()
	5536
	5537	result = (v for k, v in iter_obj if try_call(key, args=(k, v)))
	5538	if not branching: # string traversal
	5539	result = ''.join(result)
	5540
	5541	elif isinstance(key, dict):
	5542	iter_obj = ((k, _traverse_obj(obj, v, False, is_last)) for k, v in key.items())
	5543	result = {
	5544	k: v if v is not None else default for k, v in iter_obj
	5545	if v is not None or default is not NO_DEFAULT
	5546	} or None
	5547
	5548	elif isinstance(obj, collections.abc.Mapping):
	5549	result = (obj.get(key) if casesense or (key in obj) else
	5550	next((v for k, v in obj.items() if casefold(k) == key), None))
	5551
	5552	elif isinstance(obj, re.Match):
	5553	if isinstance(key, int) or casesense:
	5554	with contextlib.suppress(IndexError):
	5555	result = obj.group(key)
	5556
	5557	elif isinstance(key, str):
	5558	result = next((v for k, v in obj.groupdict().items() if casefold(k) == key), None)
	5559
	5560	elif isinstance(key, (int, slice)):
	5561	if is_sequence(obj):
	5562	branching = isinstance(key, slice)
	5563	with contextlib.suppress(IndexError):
	5564	result = obj[key]
	5565	elif traverse_string:
	5566	with contextlib.suppress(IndexError):
	5567	result = str(obj)[key]
	5568
	5569	return branching, result if branching else (result,)
	5570
	5571	def lazy_last(iterable):
	5572	iterator = iter(iterable)
	5573	prev = next(iterator, NO_DEFAULT)
	5574	if prev is NO_DEFAULT:
	5575	return
	5576
	5577	for item in iterator:
	5578	yield False, prev
	5579	prev = item
	5580
	5581	yield True, prev
	5582
	5583	def apply_path(start_obj, path, test_type):
	5584	objs = (start_obj,)
	5585	has_branched = False
	5586
	5587	key = None
	5588	for last, key in lazy_last(variadic(path, (str, bytes, dict, set))):
	5589	if is_user_input and isinstance(key, str):
	5590	if key == ':':
	5591	key = ...
	5592	elif ':' in key:
	5593	key = slice(*map(int_or_none, key.split(':')))
	5594	elif int_or_none(key) is not None:
	5595	key = int(key)
	5596
	5597	if not casesense and isinstance(key, str):
	5598	key = key.casefold()
	5599
	5600	if __debug__ and callable(key):
	5601	# Verify function signature
	5602	inspect.signature(key).bind(None, None)
	5603
	5604	new_objs = []
	5605	for obj in objs:
	5606	branching, results = apply_key(key, obj, last)
	5607	has_branched \|= branching
	5608	new_objs.append(results)
	5609
	5610	objs = itertools.chain.from_iterable(new_objs)
	5611
	5612	if test_type and not isinstance(key, (dict, list, tuple)):
	5613	objs = map(type_test, objs)
	5614
	5615	return objs, has_branched, isinstance(key, dict)
	5616
	5617	def _traverse_obj(obj, path, allow_empty, test_type):
	5618	results, has_branched, is_dict = apply_path(obj, path, test_type)
	5619	results = LazyList(item for item in results if item not in (None, {}))
	5620	if get_all and has_branched:
	5621	if results:
	5622	return results.exhaust()
	5623	if allow_empty:
	5624	return [] if default is NO_DEFAULT else default
	5625	return None
	5626
	5627	return results[0] if results else {} if allow_empty and is_dict else None
	5628
	5629	for index, path in enumerate(paths, 1):
	5630	result = _traverse_obj(obj, path, index == len(paths), True)
	5631	if result is not None:
	5632	return result
	5633
	5634	return None if default is NO_DEFAULT else default
	5635
	5636
	5637	def traverse_dict(dictn, keys, casesense=True):
	5638	deprecation_warning(f'"{__name__}.traverse_dict" is deprecated and may be removed '
	5639	f'in a future version. Use "{__name__}.traverse_obj" instead')
	5640	return traverse_obj(dictn, keys, casesense=casesense, is_user_input=True, traverse_string=True)
	5641
	5642
	5643	def get_first(obj, keys, **kwargs):
	5644	return traverse_obj(obj, (..., variadic(keys)), *kwargs, get_all=False)
	5645
	5646
	5647	def time_seconds(**kwargs):
	5648	"""
	5649	Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
	5650	"""
	5651	return time.time() + datetime.timedelta(**kwargs).total_seconds()
	5652
	5653
	5654	# create a JSON Web Signature (jws) with HS256 algorithm
	5655	# the resulting format is in JWS Compact Serialization
	5656	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	5657	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	5658	def jwt_encode_hs256(payload_data, key, headers={}):
	5659	header_data = {
	5660	'alg': 'HS256',
	5661	'typ': 'JWT',
	5662	}
	5663	if headers:
	5664	header_data.update(headers)
	5665	header_b64 = base64.b64encode(json.dumps(header_data).encode())
	5666	payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
	5667	h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
	5668	signature_b64 = base64.b64encode(h.digest())
	5669	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	5670	return token
	5671
	5672
	5673	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	5674	def jwt_decode_hs256(jwt):
	5675	header_b64, payload_b64, signature_b64 = jwt.split('.')
	5676	# add trailing ='s that may have been stripped, superfluous ='s are ignored
	5677	payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
	5678	return payload_data
	5679
	5680
	5681	WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
	5682
	5683
	5684	@functools.cache
	5685	def supports_terminal_sequences(stream):
	5686	if compat_os_name == 'nt':
	5687	if not WINDOWS_VT_MODE:
	5688	return False
	5689	elif not os.getenv('TERM'):
	5690	return False
	5691	try:
	5692	return stream.isatty()
	5693	except BaseException:
	5694	return False
	5695
	5696
	5697	def windows_enable_vt_mode():
	5698	"""Ref: https://bugs.python.org/issue30075 """
	5699	if get_windows_version() < (10, 0, 10586):
	5700	return
	5701
	5702	import ctypes
	5703	import ctypes.wintypes
	5704	import msvcrt
	5705
	5706	ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
	5707
	5708	dll = ctypes.WinDLL('kernel32', use_last_error=False)
	5709	handle = os.open('CONOUT$', os.O_RDWR)
	5710	try:
	5711	h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
	5712	dw_original_mode = ctypes.wintypes.DWORD()
	5713	success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
	5714	if not success:
	5715	raise Exception('GetConsoleMode failed')
	5716
	5717	success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
	5718	dw_original_mode.value \| ENABLE_VIRTUAL_TERMINAL_PROCESSING))
	5719	if not success:
	5720	raise Exception('SetConsoleMode failed')
	5721	finally:
	5722	os.close(handle)
	5723
	5724	global WINDOWS_VT_MODE
	5725	WINDOWS_VT_MODE = True
	5726	supports_terminal_sequences.cache_clear()
	5727
	5728
	5729	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	5730
	5731
	5732	def remove_terminal_sequences(string):
	5733	return _terminal_sequences_re.sub('', string)
	5734
	5735
	5736	def number_of_digits(number):
	5737	return len('%d' % number)
	5738
	5739
	5740	def join_nonempty(*values, delim='-', from_dict=None):
	5741	if from_dict is not None:
	5742	values = (traverse_obj(from_dict, variadic(v)) for v in values)
	5743	return delim.join(map(str, filter(None, values)))
	5744
	5745
	5746	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	5747	"""
	5748	Find the largest format dimensions in terms of video width and, for each thumbnail:
	5749	* Modify the URL: Match the width with the provided regex and replace with the former width
	5750	* Update dimensions
	5751
	5752	This function is useful with video services that scale the provided thumbnails on demand
	5753	"""
	5754	_keys = ('width', 'height')
	5755	max_dimensions = max(
	5756	(tuple(format.get(k) or 0 for k in _keys) for format in formats),
	5757	default=(0, 0))
	5758	if not max_dimensions[0]:
	5759	return thumbnails
	5760	return [
	5761	merge_dicts(
	5762	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	5763	dict(zip(_keys, max_dimensions)), thumbnail)
	5764	for thumbnail in thumbnails
	5765	]
	5766
	5767
	5768	def parse_http_range(range):
	5769	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	5770	if not range:
	5771	return None, None, None
	5772	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	5773	if not crg:
	5774	return None, None, None
	5775	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	5776
	5777
	5778	def read_stdin(what):
	5779	eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
	5780	write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
	5781	return sys.stdin
	5782
	5783
	5784	def determine_file_encoding(data):
	5785	"""
	5786	Detect the text encoding used
	5787	@returns (encoding, bytes to skip)
	5788	"""
	5789
	5790	# BOM marks are given priority over declarations
	5791	for bom, enc in BOMS:
	5792	if data.startswith(bom):
	5793	return enc, len(bom)
	5794
	5795	# Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
	5796	# We ignore the endianness to get a good enough match
	5797	data = data.replace(b'\0', b'')
	5798	mobj = re.match(rb'(?m)^#\scoding\s:\s(\S+)\s$', data)
	5799	return mobj.group(1).decode() if mobj else None, 0
	5800
	5801
	5802	class Config:
	5803	own_args = None
	5804	parsed_args = None
	5805	filename = None
	5806	__initialized = False
	5807
	5808	def __init__(self, parser, label=None):
	5809	self.parser, self.label = parser, label
	5810	self._loaded_paths, self.configs = set(), []
	5811
	5812	def init(self, args=None, filename=None):
	5813	assert not self.__initialized
	5814	self.own_args, self.filename = args, filename
	5815	return self.load_configs()
	5816
	5817	def load_configs(self):
	5818	directory = ''
	5819	if self.filename:
	5820	location = os.path.realpath(self.filename)
	5821	directory = os.path.dirname(location)
	5822	if location in self._loaded_paths:
	5823	return False
	5824	self._loaded_paths.add(location)
	5825
	5826	self.__initialized = True
	5827	opts, _ = self.parser.parse_known_args(self.own_args)
	5828	self.parsed_args = self.own_args
	5829	for location in opts.config_locations or []:
	5830	if location == '-':
	5831	if location in self._loaded_paths:
	5832	continue
	5833	self._loaded_paths.add(location)
	5834	self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
	5835	continue
	5836	location = os.path.join(directory, expand_path(location))
	5837	if os.path.isdir(location):
	5838	location = os.path.join(location, 'yt-dlp.conf')
	5839	if not os.path.exists(location):
	5840	self.parser.error(f'config location {location} does not exist')
	5841	self.append_config(self.read_file(location), location)
	5842	return True
	5843
	5844	def __str__(self):
	5845	label = join_nonempty(
	5846	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	5847	delim=' ')
	5848	return join_nonempty(
	5849	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	5850	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	5851	delim='\n')
	5852
	5853	@staticmethod
	5854	def read_file(filename, default=[]):
	5855	try:
	5856	optionf = open(filename, 'rb')
	5857	except OSError:
	5858	return default # silently skip if file is not present
	5859	try:
	5860	enc, skip = determine_file_encoding(optionf.read(512))
	5861	optionf.seek(skip, io.SEEK_SET)
	5862	except OSError:
	5863	enc = None # silently skip read errors
	5864	try:
	5865	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	5866	contents = optionf.read().decode(enc or preferredencoding())
	5867	res = shlex.split(contents, comments=True)
	5868	except Exception as err:
	5869	raise ValueError(f'Unable to parse "{filename}": {err}')
	5870	finally:
	5871	optionf.close()
	5872	return res
	5873
	5874	@staticmethod
	5875	def hide_login_info(opts):
	5876	PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
	5877	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	5878
	5879	def _scrub_eq(o):
	5880	m = eqre.match(o)
	5881	if m:
	5882	return m.group('key') + '=PRIVATE'
	5883	else:
	5884	return o
	5885
	5886	opts = list(map(_scrub_eq, opts))
	5887	for idx, opt in enumerate(opts):
	5888	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	5889	opts[idx + 1] = 'PRIVATE'
	5890	return opts
	5891
	5892	def append_config(self, *args, label=None):
	5893	config = type(self)(self.parser, label)
	5894	config._loaded_paths = self._loaded_paths
	5895	if config.init(*args):
	5896	self.configs.append(config)
	5897
	5898	@property
	5899	def all_args(self):
	5900	for config in reversed(self.configs):
	5901	yield from config.all_args
	5902	yield from self.parsed_args or []
	5903
	5904	def parse_known_args(self, **kwargs):
	5905	return self.parser.parse_known_args(self.all_args, **kwargs)
	5906
	5907	def parse_args(self):
	5908	return self.parser.parse_args(self.all_args)
	5909
	5910
	5911	class WebSocketsWrapper:
	5912	"""Wraps websockets module to use in non-async scopes"""
	5913	pool = None
	5914
	5915	def __init__(self, url, headers=None, connect=True):
	5916	self.loop = asyncio.new_event_loop()
	5917	# XXX: "loop" is deprecated
	5918	self.conn = websockets.connect(
	5919	url, extra_headers=headers, ping_interval=None,
	5920	close_timeout=float('inf'), loop=self.loop, ping_timeout=float('inf'))
	5921	if connect:
	5922	self.__enter__()
	5923	atexit.register(self.__exit__, None, None, None)
	5924
	5925	def __enter__(self):
	5926	if not self.pool:
	5927	self.pool = self.run_with_loop(self.conn.__aenter__(), self.loop)
	5928	return self
	5929
	5930	def send(self, *args):
	5931	self.run_with_loop(self.pool.send(*args), self.loop)
	5932
	5933	def recv(self, *args):
	5934	return self.run_with_loop(self.pool.recv(*args), self.loop)
	5935
	5936	def __exit__(self, type, value, traceback):
	5937	try:
	5938	return self.run_with_loop(self.conn.__aexit__(type, value, traceback), self.loop)
	5939	finally:
	5940	self.loop.close()
	5941	self._cancel_all_tasks(self.loop)
	5942
	5943	# taken from https://github.com/python/cpython/blob/3.9/Lib/asyncio/runners.py with modifications
	5944	# for contributors: If there's any new library using asyncio needs to be run in non-async, move these function out of this class
	5945	@staticmethod
	5946	def run_with_loop(main, loop):
	5947	if not asyncio.iscoroutine(main):
	5948	raise ValueError(f'a coroutine was expected, got {main!r}')
	5949
	5950	try:
	5951	return loop.run_until_complete(main)
	5952	finally:
	5953	loop.run_until_complete(loop.shutdown_asyncgens())
	5954	if hasattr(loop, 'shutdown_default_executor'):
	5955	loop.run_until_complete(loop.shutdown_default_executor())
	5956
	5957	@staticmethod
	5958	def _cancel_all_tasks(loop):
	5959	to_cancel = asyncio.all_tasks(loop)
	5960
	5961	if not to_cancel:
	5962	return
	5963
	5964	for task in to_cancel:
	5965	task.cancel()
	5966
	5967	# XXX: "loop" is removed in python 3.10+
	5968	loop.run_until_complete(
	5969	asyncio.gather(*to_cancel, loop=loop, return_exceptions=True))
	5970
	5971	for task in to_cancel:
	5972	if task.cancelled():
	5973	continue
	5974	if task.exception() is not None:
	5975	loop.call_exception_handler({
	5976	'message': 'unhandled exception during asyncio.run() shutdown',
	5977	'exception': task.exception(),
	5978	'task': task,
	5979	})
	5980
	5981
	5982	def merge_headers(*dicts):
	5983	"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
	5984	return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
	5985
	5986
	5987	def cached_method(f):
	5988	"""Cache a method"""
	5989	signature = inspect.signature(f)
	5990
	5991	@functools.wraps(f)
	5992	def wrapper(self, args, *kwargs):
	5993	bound_args = signature.bind(self, args, *kwargs)
	5994	bound_args.apply_defaults()
	5995	key = tuple(bound_args.arguments.values())[1:]
	5996
	5997	cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
	5998	if key not in cache:
	5999	cache[key] = f(self, args, *kwargs)
	6000	return cache[key]
	6001	return wrapper
	6002
	6003
	6004	class classproperty:
	6005	"""property access for class methods with optional caching"""
	6006	def __new__(cls, func=None, args, *kwargs):
	6007	if not func:
	6008	return functools.partial(cls, args, *kwargs)
	6009	return super().__new__(cls)
	6010
	6011	def __init__(self, func, *, cache=False):
	6012	functools.update_wrapper(self, func)
	6013	self.func = func
	6014	self._cache = {} if cache else None
	6015
	6016	def __get__(self, _, cls):
	6017	if self._cache is None:
	6018	return self.func(cls)
	6019	elif cls not in self._cache:
	6020	self._cache[cls] = self.func(cls)
	6021	return self._cache[cls]
	6022
	6023
	6024	class Namespace(types.SimpleNamespace):
	6025	"""Immutable namespace"""
	6026
	6027	def __iter__(self):
	6028	return iter(self.__dict__.values())
	6029
	6030	@property
	6031	def items_(self):
	6032	return self.__dict__.items()
	6033
	6034
	6035	MEDIA_EXTENSIONS = Namespace(
	6036	common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
	6037	video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
	6038	common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
	6039	audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
	6040	thumbnails=('jpg', 'png', 'webp'),
	6041	storyboards=('mhtml', ),
	6042	subtitles=('srt', 'vtt', 'ass', 'lrc'),
	6043	manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
	6044	)
	6045	MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
	6046	MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
	6047
	6048	KNOWN_EXTENSIONS = (MEDIA_EXTENSIONS.video, MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
	6049
	6050
	6051	class RetryManager:
	6052	"""Usage:
	6053	for retry in RetryManager(...):
	6054	try:
	6055	...
	6056	except SomeException as err:
	6057	retry.error = err
	6058	continue
	6059	"""
	6060	attempt, _error = 0, None
	6061
	6062	def __init__(self, _retries, _error_callback, **kwargs):
	6063	self.retries = _retries or 0
	6064	self.error_callback = functools.partial(_error_callback, **kwargs)
	6065
	6066	def _should_retry(self):
	6067	return self._error is not NO_DEFAULT and self.attempt <= self.retries
	6068
	6069	@property
	6070	def error(self):
	6071	if self._error is NO_DEFAULT:
	6072	return None
	6073	return self._error
	6074
	6075	@error.setter
	6076	def error(self, value):
	6077	self._error = value
	6078
	6079	def __iter__(self):
	6080	while self._should_retry():
	6081	self.error = NO_DEFAULT
	6082	self.attempt += 1
	6083	yield self
	6084	if self.error:
	6085	self.error_callback(self.error, self.attempt, self.retries)
	6086
	6087	@staticmethod
	6088	def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
	6089	"""Utility function for reporting retries"""
	6090	if count > retries:
	6091	if error:
	6092	return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
	6093	raise e
	6094
	6095	if not count:
	6096	return warn(e)
	6097	elif isinstance(e, ExtractorError):
	6098	e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
	6099	warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
	6100
	6101	delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
	6102	if delay:
	6103	info(f'Sleeping {delay:.2f} seconds ...')
	6104	time.sleep(delay)
	6105
	6106
	6107	def make_archive_id(ie, video_id):
	6108	ie_key = ie if isinstance(ie, str) else ie.ie_key()
	6109	return f'{ie_key.lower()} {video_id}'
	6110
	6111
	6112	def truncate_string(s, left, right=0):
	6113	assert left > 3 and right >= 0
	6114	if s is None or len(s) <= left + right:
	6115	return s
	6116	return f'{s[:left-3]}...{s[-right:] if right else ""}'
	6117
	6118
	6119	def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
	6120	assert 'all' in alias_dict, '"all" alias is required'
	6121	requested = list(start or [])
	6122	for val in options:
	6123	discard = val.startswith('-')
	6124	if discard:
	6125	val = val[1:]
	6126
	6127	if val in alias_dict:
	6128	val = alias_dict[val] if not discard else [
	6129	i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
	6130	# NB: Do not allow regex in aliases for performance
	6131	requested = orderedSet_from_options(val, alias_dict, start=requested)
	6132	continue
	6133
	6134	current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
	6135	else [val] if val in alias_dict['all'] else None)
	6136	if current is None:
	6137	raise ValueError(val)
	6138
	6139	if discard:
	6140	for item in current:
	6141	while item in requested:
	6142	requested.remove(item)
	6143	else:
	6144	requested.extend(current)
	6145
	6146	return orderedSet(requested)
	6147
	6148
	6149	class FormatSorter:
	6150	regex = r' ((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.?))?)? *$'
	6151
	6152	default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
	6153	'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
	6154	'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
	6155	ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
	6156	'height', 'width', 'proto', 'vext', 'abr', 'aext',
	6157	'fps', 'fs_approx', 'source', 'id')
	6158
	6159	settings = {
	6160	'vcodec': {'type': 'ordered', 'regex': True,
	6161	'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265\|he?vc?', '[hx]264\|avc', 'vp0?8', 'mp4v\|h263', 'theora', '', None, 'none']},
	6162	'acodec': {'type': 'ordered', 'regex': True,
	6163	'order': ['[af]lac', 'wav\|aiff', 'opus', 'vorbis\|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
	6164	'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
	6165	'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
	6166	'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
	6167	'order': ['(ht\|f)tps', '(ht\|f)tp$', 'm3u8.', '.dash', 'websocket_frag', 'rtmpe?', '', 'mms\|rtsp', 'ws\|websocket', 'f4']},
	6168	'vext': {'type': 'ordered', 'field': 'video_ext',
	6169	'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
	6170	'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
	6171	'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
	6172	'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
	6173	'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
	6174	'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
	6175	'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
	6176	'field': ('vcodec', 'acodec'),
	6177	'function': lambda it: int(any(v != 'none' for v in it))},
	6178	'ie_pref': {'priority': True, 'type': 'extractor'},
	6179	'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
	6180	'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
	6181	'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
	6182	'quality': {'convert': 'float', 'default': -1},
	6183	'filesize': {'convert': 'bytes'},
	6184	'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
	6185	'id': {'convert': 'string', 'field': 'format_id'},
	6186	'height': {'convert': 'float_none'},
	6187	'width': {'convert': 'float_none'},
	6188	'fps': {'convert': 'float_none'},
	6189	'channels': {'convert': 'float_none', 'field': 'audio_channels'},
	6190	'tbr': {'convert': 'float_none'},
	6191	'vbr': {'convert': 'float_none'},
	6192	'abr': {'convert': 'float_none'},
	6193	'asr': {'convert': 'float_none'},
	6194	'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
	6195
	6196	'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
	6197	'br': {'type': 'combined', 'field': ('tbr', 'vbr', 'abr'), 'same_limit': True},
	6198	'size': {'type': 'combined', 'same_limit': True, 'field': ('filesize', 'fs_approx')},
	6199	'ext': {'type': 'combined', 'field': ('vext', 'aext')},
	6200	'res': {'type': 'multiple', 'field': ('height', 'width'),
	6201	'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
	6202
	6203	# Actual field names
	6204	'format_id': {'type': 'alias', 'field': 'id'},
	6205	'preference': {'type': 'alias', 'field': 'ie_pref'},
	6206	'language_preference': {'type': 'alias', 'field': 'lang'},
	6207	'source_preference': {'type': 'alias', 'field': 'source'},
	6208	'protocol': {'type': 'alias', 'field': 'proto'},
	6209	'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
	6210	'audio_channels': {'type': 'alias', 'field': 'channels'},
	6211
	6212	# Deprecated
	6213	'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
	6214	'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
	6215	'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
	6216	'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
	6217	'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
	6218	'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
	6219	'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
	6220	'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
	6221	'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
	6222	'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
	6223	'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
	6224	'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
	6225	'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
	6226	'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
	6227	'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	6228	'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	6229	'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	6230	'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	6231	'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	6232	'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	6233	}
	6234
	6235	def __init__(self, ydl, field_preference):
	6236	self.ydl = ydl
	6237	self._order = []
	6238	self.evaluate_params(self.ydl.params, field_preference)
	6239	if ydl.params.get('verbose'):
	6240	self.print_verbose_info(self.ydl.write_debug)
	6241
	6242	def _get_field_setting(self, field, key):
	6243	if field not in self.settings:
	6244	if key in ('forced', 'priority'):
	6245	return False
	6246	self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
	6247	'deprecated and may be removed in a future version')
	6248	self.settings[field] = {}
	6249	propObj = self.settings[field]
	6250	if key not in propObj:
	6251	type = propObj.get('type')
	6252	if key == 'field':
	6253	default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
	6254	elif key == 'convert':
	6255	default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
	6256	else:
	6257	default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
	6258	propObj[key] = default
	6259	return propObj[key]
	6260
	6261	def _resolve_field_value(self, field, value, convertNone=False):
	6262	if value is None:
	6263	if not convertNone:
	6264	return None
	6265	else:
	6266	value = value.lower()
	6267	conversion = self._get_field_setting(field, 'convert')
	6268	if conversion == 'ignore':
	6269	return None
	6270	if conversion == 'string':
	6271	return value
	6272	elif conversion == 'float_none':
	6273	return float_or_none(value)
	6274	elif conversion == 'bytes':
	6275	return parse_bytes(value)
	6276	elif conversion == 'order':
	6277	order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
	6278	use_regex = self._get_field_setting(field, 'regex')
	6279	list_length = len(order_list)
	6280	empty_pos = order_list.index('') if '' in order_list else list_length + 1
	6281	if use_regex and value is not None:
	6282	for i, regex in enumerate(order_list):
	6283	if regex and re.match(regex, value):
	6284	return list_length - i
	6285	return list_length - empty_pos # not in list
	6286	else: # not regex or value = None
	6287	return list_length - (order_list.index(value) if value in order_list else empty_pos)
	6288	else:
	6289	if value.isnumeric():
	6290	return float(value)
	6291	else:
	6292	self.settings[field]['convert'] = 'string'
	6293	return value
	6294
	6295	def evaluate_params(self, params, sort_extractor):
	6296	self._use_free_order = params.get('prefer_free_formats', False)
	6297	self._sort_user = params.get('format_sort', [])
	6298	self._sort_extractor = sort_extractor
	6299
	6300	def add_item(field, reverse, closest, limit_text):
	6301	field = field.lower()
	6302	if field in self._order:
	6303	return
	6304	self._order.append(field)
	6305	limit = self._resolve_field_value(field, limit_text)
	6306	data = {
	6307	'reverse': reverse,
	6308	'closest': False if limit is None else closest,
	6309	'limit_text': limit_text,
	6310	'limit': limit}
	6311	if field in self.settings:
	6312	self.settings[field].update(data)
	6313	else:
	6314	self.settings[field] = data
	6315
	6316	sort_list = (
	6317	tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
	6318	+ (tuple() if params.get('format_sort_force', False)
	6319	else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
	6320	+ tuple(self._sort_user) + tuple(sort_extractor) + self.default)
	6321
	6322	for item in sort_list:
	6323	match = re.match(self.regex, item)
	6324	if match is None:
	6325	raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
	6326	field = match.group('field')
	6327	if field is None:
	6328	continue
	6329	if self._get_field_setting(field, 'type') == 'alias':
	6330	alias, field = field, self._get_field_setting(field, 'field')
	6331	if self._get_field_setting(alias, 'deprecated'):
	6332	self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
	6333	f'be removed in a future version. Please use {field} instead')
	6334	reverse = match.group('reverse') is not None
	6335	closest = match.group('separator') == '~'
	6336	limit_text = match.group('limit')
	6337
	6338	has_limit = limit_text is not None
	6339	has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
	6340	has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
	6341
	6342	fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
	6343	limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
	6344	limit_count = len(limits)
	6345	for (i, f) in enumerate(fields):
	6346	add_item(f, reverse, closest,
	6347	limits[i] if i < limit_count
	6348	else limits[0] if has_limit and not has_multiple_limits
	6349	else None)
	6350
	6351	def print_verbose_info(self, write_debug):
	6352	if self._sort_user:
	6353	write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
	6354	if self._sort_extractor:
	6355	write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
	6356	write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
	6357	'+' if self._get_field_setting(field, 'reverse') else '', field,
	6358	'%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
	6359	self._get_field_setting(field, 'limit_text'),
	6360	self._get_field_setting(field, 'limit'))
	6361	if self._get_field_setting(field, 'limit_text') is not None else '')
	6362	for field in self._order if self._get_field_setting(field, 'visible')]))
	6363
	6364	def _calculate_field_preference_from_value(self, format, field, type, value):
	6365	reverse = self._get_field_setting(field, 'reverse')
	6366	closest = self._get_field_setting(field, 'closest')
	6367	limit = self._get_field_setting(field, 'limit')
	6368
	6369	if type == 'extractor':
	6370	maximum = self._get_field_setting(field, 'max')
	6371	if value is None or (maximum is not None and value >= maximum):
	6372	value = -1
	6373	elif type == 'boolean':
	6374	in_list = self._get_field_setting(field, 'in_list')
	6375	not_in_list = self._get_field_setting(field, 'not_in_list')
	6376	value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
	6377	elif type == 'ordered':
	6378	value = self._resolve_field_value(field, value, True)
	6379
	6380	# try to convert to number
	6381	val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
	6382	is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
	6383	if is_num:
	6384	value = val_num
	6385
	6386	return ((-10, 0) if value is None
	6387	else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
	6388	else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
	6389	else (0, value, 0) if not reverse and (limit is None or value <= limit)
	6390	else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
	6391	else (-1, value, 0))
	6392
	6393	def _calculate_field_preference(self, format, field):
	6394	type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
	6395	get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
	6396	if type == 'multiple':
	6397	type = 'field' # Only 'field' is allowed in multiple for now
	6398	actual_fields = self._get_field_setting(field, 'field')
	6399
	6400	value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
	6401	else:
	6402	value = get_value(field)
	6403	return self._calculate_field_preference_from_value(format, field, type, value)
	6404
	6405	def calculate_preference(self, format):
	6406	# Determine missing protocol
	6407	if not format.get('protocol'):
	6408	format['protocol'] = determine_protocol(format)
	6409
	6410	# Determine missing ext
	6411	if not format.get('ext') and 'url' in format:
	6412	format['ext'] = determine_ext(format['url'])
	6413	if format.get('vcodec') == 'none':
	6414	format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
	6415	format['video_ext'] = 'none'
	6416	else:
	6417	format['video_ext'] = format['ext']
	6418	format['audio_ext'] = 'none'
	6419	# if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
	6420	# format['preference'] = -1000
	6421
	6422	if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265\|he?vc?', format.get('vcodec') or ''):
	6423	# HEVC-over-FLV is out-of-spec by FLV's original spec
	6424	# ref. https://trac.ffmpeg.org/ticket/6389
	6425	# ref. https://github.com/yt-dlp/yt-dlp/pull/5821
	6426	format['preference'] = -100
	6427
	6428	# Determine missing bitrates
	6429	if format.get('tbr') is None:
	6430	if format.get('vbr') is not None and format.get('abr') is not None:
	6431	format['tbr'] = format.get('vbr', 0) + format.get('abr', 0)
	6432	else:
	6433	if format.get('vcodec') != 'none' and format.get('vbr') is None:
	6434	format['vbr'] = format.get('tbr') - format.get('abr', 0)
	6435	if format.get('acodec') != 'none' and format.get('abr') is None:
	6436	format['abr'] = format.get('tbr') - format.get('vbr', 0)
	6437
	6438	return tuple(self._calculate_field_preference(format, field) for field in self._order)
	6439
	6440
	6441	# Deprecated
	6442	has_certifi = bool(certifi)
	6443	has_websockets = bool(websockets)
	6444
	6445
	6446	def load_plugins(name, suffix, namespace):
	6447	from .plugins import load_plugins
	6448	ret = load_plugins(name, suffix)
	6449	namespace.update(ret)
	6450	return ret