jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import base64
	2	import binascii
	3	import calendar
	4	import codecs
	5	import collections
	6	import collections.abc
	7	import contextlib
	8	import datetime as dt
	9	import email.header
	10	import email.utils
	11	import errno
	12	import hashlib
	13	import hmac
	14	import html.entities
	15	import html.parser
	16	import inspect
	17	import io
	18	import itertools
	19	import json
	20	import locale
	21	import math
	22	import mimetypes
	23	import netrc
	24	import operator
	25	import os
	26	import platform
	27	import random
	28	import re
	29	import shlex
	30	import socket
	31	import ssl
	32	import struct
	33	import subprocess
	34	import sys
	35	import tempfile
	36	import time
	37	import traceback
	38	import types
	39	import unicodedata
	40	import urllib.error
	41	import urllib.parse
	42	import urllib.request
	43	import xml.etree.ElementTree
	44
	45	from . import traversal
	46
	47	from ..compat import functools # isort: split
	48	from ..compat import (
	49	compat_etree_fromstring,
	50	compat_expanduser,
	51	compat_HTMLParseError,
	52	compat_os_name,
	53	)
	54	from ..dependencies import xattr
	55
	56	__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module
	57
	58	# This is not clearly defined otherwise
	59	compiled_regex_type = type(re.compile(''))
	60
	61
	62	class NO_DEFAULT:
	63	pass
	64
	65
	66	def IDENTITY(x):
	67	return x
	68
	69
	70	ENGLISH_MONTH_NAMES = [
	71	'January', 'February', 'March', 'April', 'May', 'June',
	72	'July', 'August', 'September', 'October', 'November', 'December']
	73
	74	MONTH_NAMES = {
	75	'en': ENGLISH_MONTH_NAMES,
	76	'fr': [
	77	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	78	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	79	# these follow the genitive grammatical case (dopełniacz)
	80	# some websites might be using nominative, which will require another month list
	81	# https://en.wikibooks.org/wiki/Polish/Noun_cases
	82	'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
	83	'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
	84	}
	85
	86	# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
	87	TIMEZONE_NAMES = {
	88	'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
	89	'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
	90	'EST': -5, 'EDT': -4, # Eastern
	91	'CST': -6, 'CDT': -5, # Central
	92	'MST': -7, 'MDT': -6, # Mountain
	93	'PST': -8, 'PDT': -7 # Pacific
	94	}
	95
	96	# needed for sanitizing filenames in restricted mode
	97	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	98	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	99	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	100
	101	DATE_FORMATS = (
	102	'%d %B %Y',
	103	'%d %b %Y',
	104	'%B %d %Y',
	105	'%B %dst %Y',
	106	'%B %dnd %Y',
	107	'%B %drd %Y',
	108	'%B %dth %Y',
	109	'%b %d %Y',
	110	'%b %dst %Y',
	111	'%b %dnd %Y',
	112	'%b %drd %Y',
	113	'%b %dth %Y',
	114	'%b %dst %Y %I:%M',
	115	'%b %dnd %Y %I:%M',
	116	'%b %drd %Y %I:%M',
	117	'%b %dth %Y %I:%M',
	118	'%Y %m %d',
	119	'%Y-%m-%d',
	120	'%Y.%m.%d.',
	121	'%Y/%m/%d',
	122	'%Y/%m/%d %H:%M',
	123	'%Y/%m/%d %H:%M:%S',
	124	'%Y%m%d%H%M',
	125	'%Y%m%d%H%M%S',
	126	'%Y%m%d',
	127	'%Y-%m-%d %H:%M',
	128	'%Y-%m-%d %H:%M:%S',
	129	'%Y-%m-%d %H:%M:%S.%f',
	130	'%Y-%m-%d %H:%M:%S:%f',
	131	'%d.%m.%Y %H:%M',
	132	'%d.%m.%Y %H.%M',
	133	'%Y-%m-%dT%H:%M:%SZ',
	134	'%Y-%m-%dT%H:%M:%S.%fZ',
	135	'%Y-%m-%dT%H:%M:%S.%f0Z',
	136	'%Y-%m-%dT%H:%M:%S',
	137	'%Y-%m-%dT%H:%M:%S.%f',
	138	'%Y-%m-%dT%H:%M',
	139	'%b %d %Y at %H:%M',
	140	'%b %d %Y at %H:%M:%S',
	141	'%B %d %Y at %H:%M',
	142	'%B %d %Y at %H:%M:%S',
	143	'%H:%M %d-%b-%Y',
	144	)
	145
	146	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	147	DATE_FORMATS_DAY_FIRST.extend([
	148	'%d-%m-%Y',
	149	'%d.%m.%Y',
	150	'%d.%m.%y',
	151	'%d/%m/%Y',
	152	'%d/%m/%y',
	153	'%d/%m/%Y %H:%M:%S',
	154	'%d-%m-%Y %H:%M',
	155	'%H:%M %d/%m/%Y',
	156	])
	157
	158	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	159	DATE_FORMATS_MONTH_FIRST.extend([
	160	'%m-%d-%Y',
	161	'%m.%d.%Y',
	162	'%m/%d/%Y',
	163	'%m/%d/%y',
	164	'%m/%d/%Y %H:%M:%S',
	165	])
	166
	167	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	168	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]>\s(?P<json_ld>{.+?}\|\[.+?\])\s*</script>'
	169
	170	NUMBER_RE = r'\d+(?:\.\d+)?'
	171
	172
	173	@functools.cache
	174	def preferredencoding():
	175	"""Get preferred encoding.
	176
	177	Returns the best encoding scheme for the system, based on
	178	locale.getpreferredencoding() and some further tweaks.
	179	"""
	180	try:
	181	pref = locale.getpreferredencoding()
	182	'TEST'.encode(pref)
	183	except Exception:
	184	pref = 'UTF-8'
	185
	186	return pref
	187
	188
	189	def write_json_file(obj, fn):
	190	""" Encode obj as JSON and write it to fn, atomically if possible """
	191
	192	tf = tempfile.NamedTemporaryFile(
	193	prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
	194	suffix='.tmp', delete=False, mode='w', encoding='utf-8')
	195
	196	try:
	197	with tf:
	198	json.dump(obj, tf, ensure_ascii=False)
	199	if sys.platform == 'win32':
	200	# Need to remove existing file on Windows, else os.rename raises
	201	# WindowsError or FileExistsError.
	202	with contextlib.suppress(OSError):
	203	os.unlink(fn)
	204	with contextlib.suppress(OSError):
	205	mask = os.umask(0)
	206	os.umask(mask)
	207	os.chmod(tf.name, 0o666 & ~mask)
	208	os.rename(tf.name, fn)
	209	except Exception:
	210	with contextlib.suppress(OSError):
	211	os.remove(tf.name)
	212	raise
	213
	214
	215	def find_xpath_attr(node, xpath, key, val=None):
	216	""" Find the xpath xpath[@key=val] """
	217	assert re.match(r'^[a-zA-Z_-]+$', key)
	218	expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
	219	return node.find(expr)
	220
	221	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	222	# the namespace parameter
	223
	224
	225	def xpath_with_ns(path, ns_map):
	226	components = [c.split(':') for c in path.split('/')]
	227	replaced = []
	228	for c in components:
	229	if len(c) == 1:
	230	replaced.append(c[0])
	231	else:
	232	ns, tag = c
	233	replaced.append('{%s}%s' % (ns_map[ns], tag))
	234	return '/'.join(replaced)
	235
	236
	237	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	238	def _find_xpath(xpath):
	239	return node.find(xpath)
	240
	241	if isinstance(xpath, str):
	242	n = _find_xpath(xpath)
	243	else:
	244	for xp in xpath:
	245	n = _find_xpath(xp)
	246	if n is not None:
	247	break
	248
	249	if n is None:
	250	if default is not NO_DEFAULT:
	251	return default
	252	elif fatal:
	253	name = xpath if name is None else name
	254	raise ExtractorError('Could not find XML element %s' % name)
	255	else:
	256	return None
	257	return n
	258
	259
	260	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	261	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	262	if n is None or n == default:
	263	return n
	264	if n.text is None:
	265	if default is not NO_DEFAULT:
	266	return default
	267	elif fatal:
	268	name = xpath if name is None else name
	269	raise ExtractorError('Could not find XML element\'s text %s' % name)
	270	else:
	271	return None
	272	return n.text
	273
	274
	275	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	276	n = find_xpath_attr(node, xpath, key)
	277	if n is None:
	278	if default is not NO_DEFAULT:
	279	return default
	280	elif fatal:
	281	name = f'{xpath}[@{key}]' if name is None else name
	282	raise ExtractorError('Could not find XML attribute %s' % name)
	283	else:
	284	return None
	285	return n.attrib[key]
	286
	287
	288	def get_element_by_id(id, html, **kwargs):
	289	"""Return the content of the tag with the specified ID in the passed HTML document"""
	290	return get_element_by_attribute('id', id, html, **kwargs)
	291
	292
	293	def get_element_html_by_id(id, html, **kwargs):
	294	"""Return the html of the tag with the specified ID in the passed HTML document"""
	295	return get_element_html_by_attribute('id', id, html, **kwargs)
	296
	297
	298	def get_element_by_class(class_name, html):
	299	"""Return the content of the first tag with the specified class in the passed HTML document"""
	300	retval = get_elements_by_class(class_name, html)
	301	return retval[0] if retval else None
	302
	303
	304	def get_element_html_by_class(class_name, html):
	305	"""Return the html of the first tag with the specified class in the passed HTML document"""
	306	retval = get_elements_html_by_class(class_name, html)
	307	return retval[0] if retval else None
	308
	309
	310	def get_element_by_attribute(attribute, value, html, **kwargs):
	311	retval = get_elements_by_attribute(attribute, value, html, **kwargs)
	312	return retval[0] if retval else None
	313
	314
	315	def get_element_html_by_attribute(attribute, value, html, **kargs):
	316	retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
	317	return retval[0] if retval else None
	318
	319
	320	def get_elements_by_class(class_name, html, **kargs):
	321	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	322	return get_elements_by_attribute(
	323	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	324	html, escape_value=False)
	325
	326
	327	def get_elements_html_by_class(class_name, html):
	328	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	329	return get_elements_html_by_attribute(
	330	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	331	html, escape_value=False)
	332
	333
	334	def get_elements_by_attribute(args, *kwargs):
	335	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	336	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	337
	338
	339	def get_elements_html_by_attribute(args, *kwargs):
	340	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	341	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	342
	343
	344	def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
	345	"""
	346	Return the text (content) and the html (whole) of the tag with the specified
	347	attribute in the passed HTML document
	348	"""
	349	if not value:
	350	return
	351
	352	quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	353
	354	value = re.escape(value) if escape_value else value
	355
	356	partial_element_re = rf'''(?x)
	357	<(?P<tag>{tag})
	358	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	359	\s{re.escape(attribute)}\s=\s(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
	360	'''
	361
	362	for m in re.finditer(partial_element_re, html):
	363	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	364
	365	yield (
	366	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	367	whole
	368	)
	369
	370
	371	class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
	372	"""
	373	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	374	closing tag for the first opening tag it has encountered, and can be used
	375	as a context manager
	376	"""
	377
	378	class HTMLBreakOnClosingTagException(Exception):
	379	pass
	380
	381	def __init__(self):
	382	self.tagstack = collections.deque()
	383	html.parser.HTMLParser.__init__(self)
	384
	385	def __enter__(self):
	386	return self
	387
	388	def __exit__(self, *_):
	389	self.close()
	390
	391	def close(self):
	392	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	393	# so data remains buffered; we no longer have any interest in it, thus
	394	# override this method to discard it
	395	pass
	396
	397	def handle_starttag(self, tag, _):
	398	self.tagstack.append(tag)
	399
	400	def handle_endtag(self, tag):
	401	if not self.tagstack:
	402	raise compat_HTMLParseError('no tags in the stack')
	403	while self.tagstack:
	404	inner_tag = self.tagstack.pop()
	405	if inner_tag == tag:
	406	break
	407	else:
	408	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	409	if not self.tagstack:
	410	raise self.HTMLBreakOnClosingTagException()
	411
	412
	413	# XXX: This should be far less strict
	414	def get_element_text_and_html_by_tag(tag, html):
	415	"""
	416	For the first element with the specified tag in the passed HTML document
	417	return its' content (text) and the whole element (html)
	418	"""
	419	def find_or_raise(haystack, needle, exc):
	420	try:
	421	return haystack.index(needle)
	422	except ValueError:
	423	raise exc
	424	closing_tag = f'</{tag}>'
	425	whole_start = find_or_raise(
	426	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	427	content_start = find_or_raise(
	428	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	429	content_start += whole_start + 1
	430	with HTMLBreakOnClosingTagParser() as parser:
	431	parser.feed(html[whole_start:content_start])
	432	if not parser.tagstack or parser.tagstack[0] != tag:
	433	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	434	offset = content_start
	435	while offset < len(html):
	436	next_closing_tag_start = find_or_raise(
	437	html[offset:], closing_tag,
	438	compat_HTMLParseError(f'closing {tag} tag not found'))
	439	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	440	try:
	441	parser.feed(html[offset:offset + next_closing_tag_end])
	442	offset += next_closing_tag_end
	443	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	444	return html[content_start:offset + next_closing_tag_start], \
	445	html[whole_start:offset + next_closing_tag_end]
	446	raise compat_HTMLParseError('unexpected end of html')
	447
	448
	449	class HTMLAttributeParser(html.parser.HTMLParser):
	450	"""Trivial HTML parser to gather the attributes for a single element"""
	451
	452	def __init__(self):
	453	self.attrs = {}
	454	html.parser.HTMLParser.__init__(self)
	455
	456	def handle_starttag(self, tag, attrs):
	457	self.attrs = dict(attrs)
	458	raise compat_HTMLParseError('done')
	459
	460
	461	class HTMLListAttrsParser(html.parser.HTMLParser):
	462	"""HTML parser to gather the attributes for the elements of a list"""
	463
	464	def __init__(self):
	465	html.parser.HTMLParser.__init__(self)
	466	self.items = []
	467	self._level = 0
	468
	469	def handle_starttag(self, tag, attrs):
	470	if tag == 'li' and self._level == 0:
	471	self.items.append(dict(attrs))
	472	self._level += 1
	473
	474	def handle_endtag(self, tag):
	475	self._level -= 1
	476
	477
	478	def extract_attributes(html_element):
	479	"""Given a string for an HTML element such as
	480	<el
	481	a="foo" B="bar" c="&98;az" d=boz
	482	empty= noval entity="&"
	483	sq='"' dq="'"
	484	>
	485	Decode and return a dictionary of attributes.
	486	{
	487	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	488	'empty': '', 'noval': None, 'entity': '&',
	489	'sq': '"', 'dq': '\''
	490	}.
	491	"""
	492	parser = HTMLAttributeParser()
	493	with contextlib.suppress(compat_HTMLParseError):
	494	parser.feed(html_element)
	495	parser.close()
	496	return parser.attrs
	497
	498
	499	def parse_list(webpage):
	500	"""Given a string for an series of HTML <li> elements,
	501	return a dictionary of their attributes"""
	502	parser = HTMLListAttrsParser()
	503	parser.feed(webpage)
	504	parser.close()
	505	return parser.items
	506
	507
	508	def clean_html(html):
	509	"""Clean an HTML snippet into a readable string"""
	510
	511	if html is None: # Convenience for sanitizing descriptions etc.
	512	return html
	513
	514	html = re.sub(r'\s+', ' ', html)
	515	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	516	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	517	# Strip html tags
	518	html = re.sub('<.*?>', '', html)
	519	# Replace html entities
	520	html = unescapeHTML(html)
	521	return html.strip()
	522
	523
	524	class LenientJSONDecoder(json.JSONDecoder):
	525	# TODO: Write tests
	526	def __init__(self, args, transform_source=None, ignore_extra=False, close_objects=0, *kwargs):
	527	self.transform_source, self.ignore_extra = transform_source, ignore_extra
	528	self._close_attempts = 2 * close_objects
	529	super().__init__(args, *kwargs)
	530
	531	@staticmethod
	532	def _close_object(err):
	533	doc = err.doc[:err.pos]
	534	# We need to add comma first to get the correct error message
	535	if err.msg.startswith('Expecting \',\''):
	536	return doc + ','
	537	elif not doc.endswith(','):
	538	return
	539
	540	if err.msg.startswith('Expecting property name'):
	541	return doc[:-1] + '}'
	542	elif err.msg.startswith('Expecting value'):
	543	return doc[:-1] + ']'
	544
	545	def decode(self, s):
	546	if self.transform_source:
	547	s = self.transform_source(s)
	548	for attempt in range(self._close_attempts + 1):
	549	try:
	550	if self.ignore_extra:
	551	return self.raw_decode(s.lstrip())[0]
	552	return super().decode(s)
	553	except json.JSONDecodeError as e:
	554	if e.pos is None:
	555	raise
	556	elif attempt < self._close_attempts:
	557	s = self._close_object(e)
	558	if s is not None:
	559	continue
	560	raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
	561	assert False, 'Too many attempts to decode JSON'
	562
	563
	564	def sanitize_open(filename, open_mode):
	565	"""Try to open the given filename, and slightly tweak it if this fails.
	566
	567	Attempts to open the given filename. If this fails, it tries to change
	568	the filename slightly, step by step, until it's either able to open it
	569	or it fails and raises a final exception, like the standard open()
	570	function.
	571
	572	It returns the tuple (stream, definitive_file_name).
	573	"""
	574	if filename == '-':
	575	if sys.platform == 'win32':
	576	import msvcrt
	577
	578	# stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
	579	with contextlib.suppress(io.UnsupportedOperation):
	580	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	581	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	582
	583	for attempt in range(2):
	584	try:
	585	try:
	586	if sys.platform == 'win32':
	587	# FIXME: An exclusive lock also locks the file from being read.
	588	# Since windows locks are mandatory, don't lock the file on windows (for now).
	589	# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
	590	raise LockingUnsupportedError()
	591	stream = locked_file(filename, open_mode, block=False).__enter__()
	592	except OSError:
	593	stream = open(filename, open_mode)
	594	return stream, filename
	595	except OSError as err:
	596	if attempt or err.errno in (errno.EACCES,):
	597	raise
	598	old_filename, filename = filename, sanitize_path(filename)
	599	if old_filename == filename:
	600	raise
	601
	602
	603	def timeconvert(timestr):
	604	"""Convert RFC 2822 defined time string into system timestamp"""
	605	timestamp = None
	606	timetuple = email.utils.parsedate_tz(timestr)
	607	if timetuple is not None:
	608	timestamp = email.utils.mktime_tz(timetuple)
	609	return timestamp
	610
	611
	612	def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
	613	"""Sanitizes a string so it could be used as part of a filename.
	614	@param restricted Use a stricter subset of allowed characters
	615	@param is_id Whether this is an ID that should be kept unchanged if possible.
	616	If unset, yt-dlp's new sanitization rules are in effect
	617	"""
	618	if s == '':
	619	return ''
	620
	621	def replace_insane(char):
	622	if restricted and char in ACCENT_CHARS:
	623	return ACCENT_CHARS[char]
	624	elif not restricted and char == '\n':
	625	return '\0 '
	626	elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?\|/\\':
	627	# Replace with their full-width unicode counterparts
	628	return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
	629	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	630	return ''
	631	elif char == '"':
	632	return '' if restricted else '\''
	633	elif char == ':':
	634	return '\0_\0-' if restricted else '\0 \0-'
	635	elif char in '\\/\|*<>':
	636	return '\0_'
	637	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
	638	return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
	639	return char
	640
	641	# Replace look-alike Unicode glyphs
	642	if restricted and (is_id is NO_DEFAULT or not is_id):
	643	s = unicodedata.normalize('NFKC', s)
	644	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
	645	result = ''.join(map(replace_insane, s))
	646	if is_id is NO_DEFAULT:
	647	result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
	648	STRIP_RE = r'(?:\0.\|[ _-])*'
	649	result = re.sub(f'^\0.{STRIP_RE}\|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
	650	result = result.replace('\0', '') or '_'
	651
	652	if not is_id:
	653	while '__' in result:
	654	result = result.replace('__', '_')
	655	result = result.strip('_')
	656	# Common case of "Foreign band name - English song title"
	657	if restricted and result.startswith('-_'):
	658	result = result[2:]
	659	if result.startswith('-'):
	660	result = '_' + result[len('-'):]
	661	result = result.lstrip('.')
	662	if not result:
	663	result = '_'
	664	return result
	665
	666
	667	def sanitize_path(s, force=False):
	668	"""Sanitizes and normalizes path on Windows"""
	669	# XXX: this handles drive relative paths (c:sth) incorrectly
	670	if sys.platform == 'win32':
	671	force = False
	672	drive_or_unc, _ = os.path.splitdrive(s)
	673	elif force:
	674	drive_or_unc = ''
	675	else:
	676	return s
	677
	678	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	679	if drive_or_unc:
	680	norm_path.pop(0)
	681	sanitized_path = [
	682	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	683	for path_part in norm_path]
	684	if drive_or_unc:
	685	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	686	elif force and s and s[0] == os.path.sep:
	687	sanitized_path.insert(0, os.path.sep)
	688	# TODO: Fix behavioral differences <3.12
	689	# The workaround using `normpath` only superficially passes tests
	690	# Ref: https://github.com/python/cpython/pull/100351
	691	return os.path.normpath(os.path.join(*sanitized_path))
	692
	693
	694	def sanitize_url(url, *, scheme='http'):
	695	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	696	# the number of unwanted failures due to missing protocol
	697	if url is None:
	698	return
	699	elif url.startswith('//'):
	700	return f'{scheme}:{url}'
	701	# Fix some common typos seen so far
	702	COMMON_TYPOS = (
	703	# https://github.com/ytdl-org/youtube-dl/issues/15649
	704	(r'^httpss://', r'https://'),
	705	# https://bx1.be/lives/direct-tv/
	706	(r'^rmtp([es]?)://', r'rtmp\1://'),
	707	)
	708	for mistake, fixup in COMMON_TYPOS:
	709	if re.match(mistake, url):
	710	return re.sub(mistake, fixup, url)
	711	return url
	712
	713
	714	def extract_basic_auth(url):
	715	parts = urllib.parse.urlsplit(url)
	716	if parts.username is None:
	717	return url, None
	718	url = urllib.parse.urlunsplit(parts._replace(netloc=(
	719	parts.hostname if parts.port is None
	720	else '%s:%d' % (parts.hostname, parts.port))))
	721	auth_payload = base64.b64encode(
	722	('%s:%s' % (parts.username, parts.password or '')).encode())
	723	return url, f'Basic {auth_payload.decode()}'
	724
	725
	726	def expand_path(s):
	727	"""Expand shell variables and ~"""
	728	return os.path.expandvars(compat_expanduser(s))
	729
	730
	731	def orderedSet(iterable, *, lazy=False):
	732	"""Remove all duplicates from the input iterable"""
	733	def _iter():
	734	seen = [] # Do not use set since the items can be unhashable
	735	for x in iterable:
	736	if x not in seen:
	737	seen.append(x)
	738	yield x
	739
	740	return _iter() if lazy else list(_iter())
	741
	742
	743	def _htmlentity_transform(entity_with_semicolon):
	744	"""Transforms an HTML entity to a character."""
	745	entity = entity_with_semicolon[:-1]
	746
	747	# Known non-numeric HTML entity
	748	if entity in html.entities.name2codepoint:
	749	return chr(html.entities.name2codepoint[entity])
	750
	751	# TODO: HTML5 allows entities without a semicolon.
	752	# E.g. '&Eacuteric' should be decoded as 'Éric'.
	753	if entity_with_semicolon in html.entities.html5:
	754	return html.entities.html5[entity_with_semicolon]
	755
	756	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	757	if mobj is not None:
	758	numstr = mobj.group(1)
	759	if numstr.startswith('x'):
	760	base = 16
	761	numstr = '0%s' % numstr
	762	else:
	763	base = 10
	764	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	765	with contextlib.suppress(ValueError):
	766	return chr(int(numstr, base))
	767
	768	# Unknown entity in name, return its literal representation
	769	return '&%s;' % entity
	770
	771
	772	def unescapeHTML(s):
	773	if s is None:
	774	return None
	775	assert isinstance(s, str)
	776
	777	return re.sub(
	778	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	779
	780
	781	def escapeHTML(text):
	782	return (
	783	text
	784	.replace('&', '&')
	785	.replace('<', '<')
	786	.replace('>', '>')
	787	.replace('"', '"')
	788	.replace("'", ''')
	789	)
	790
	791
	792	class netrc_from_content(netrc.netrc):
	793	def __init__(self, content):
	794	self.hosts, self.macros = {}, {}
	795	with io.StringIO(content) as stream:
	796	self._parse('-', stream, False)
	797
	798
	799	class Popen(subprocess.Popen):
	800	if sys.platform == 'win32':
	801	_startupinfo = subprocess.STARTUPINFO()
	802	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	803	else:
	804	_startupinfo = None
	805
	806	@staticmethod
	807	def _fix_pyinstaller_ld_path(env):
	808	"""Restore LD_LIBRARY_PATH when using PyInstaller
	809	Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
	810	https://github.com/yt-dlp/yt-dlp/issues/4573
	811	"""
	812	if not hasattr(sys, '_MEIPASS'):
	813	return
	814
	815	def _fix(key):
	816	orig = env.get(f'{key}_ORIG')
	817	if orig is None:
	818	env.pop(key, None)
	819	else:
	820	env[key] = orig
	821
	822	_fix('LD_LIBRARY_PATH') # Linux
	823	_fix('DYLD_LIBRARY_PATH') # macOS
	824
	825	def __init__(self, args, remaining, env=None, text=False, shell=False, *kwargs):
	826	if env is None:
	827	env = os.environ.copy()
	828	self._fix_pyinstaller_ld_path(env)
	829
	830	self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
	831	if text is True:
	832	kwargs['universal_newlines'] = True # For 3.6 compatibility
	833	kwargs.setdefault('encoding', 'utf-8')
	834	kwargs.setdefault('errors', 'replace')
	835
	836	if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
	837	if not isinstance(args, str):
	838	args = shell_quote(args, shell=True)
	839	shell = False
	840	# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
	841	env['='] = '"^\n\n"'
	842	args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
	843
	844	super().__init__(args, remaining, env=env, shell=shell, *kwargs, startupinfo=self._startupinfo)
	845
	846	def __comspec(self):
	847	comspec = os.environ.get('ComSpec') or os.path.join(
	848	os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
	849	if os.path.isabs(comspec):
	850	return comspec
	851	raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
	852
	853	def communicate_or_kill(self, args, *kwargs):
	854	try:
	855	return self.communicate(args, *kwargs)
	856	except BaseException: # Including KeyboardInterrupt
	857	self.kill(timeout=None)
	858	raise
	859
	860	def kill(self, *, timeout=0):
	861	super().kill()
	862	if timeout != 0:
	863	self.wait(timeout=timeout)
	864
	865	@classmethod
	866	def run(cls, args, timeout=None, *kwargs):
	867	with cls(args, *kwargs) as proc:
	868	default = '' if proc.__text_mode else b''
	869	stdout, stderr = proc.communicate_or_kill(timeout=timeout)
	870	return stdout or default, stderr or default, proc.returncode
	871
	872
	873	def encodeArgument(s):
	874	# Legacy code that uses byte strings
	875	# Uncomment the following line after fixing all post processors
	876	# assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
	877	return s if isinstance(s, str) else s.decode('ascii')
	878
	879
	880	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	881
	882
	883	def timetuple_from_msec(msec):
	884	secs, msec = divmod(msec, 1000)
	885	mins, secs = divmod(secs, 60)
	886	hrs, mins = divmod(mins, 60)
	887	return _timetuple(hrs, mins, secs, msec)
	888
	889
	890	def formatSeconds(secs, delim=':', msec=False):
	891	time = timetuple_from_msec(secs * 1000)
	892	if time.hours:
	893	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	894	elif time.minutes:
	895	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	896	else:
	897	ret = '%d' % time.seconds
	898	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	899
	900
	901	def bug_reports_message(before=';'):
	902	from ..update import REPOSITORY
	903
	904	msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
	905	'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
	906
	907	before = before.rstrip()
	908	if not before or before.endswith(('.', '!', '?')):
	909	msg = msg[0].title() + msg[1:]
	910
	911	return (before + ' ' if before else '') + msg
	912
	913
	914	class YoutubeDLError(Exception):
	915	"""Base exception for YoutubeDL errors."""
	916	msg = None
	917
	918	def __init__(self, msg=None):
	919	if msg is not None:
	920	self.msg = msg
	921	elif self.msg is None:
	922	self.msg = type(self).__name__
	923	super().__init__(self.msg)
	924
	925
	926	class ExtractorError(YoutubeDLError):
	927	"""Error during info extraction."""
	928
	929	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	930	""" tb, if given, is the original traceback (so that it can be printed out).
	931	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	932	"""
	933	from ..networking.exceptions import network_exceptions
	934	if sys.exc_info()[0] in network_exceptions:
	935	expected = True
	936
	937	self.orig_msg = str(msg)
	938	self.traceback = tb
	939	self.expected = expected
	940	self.cause = cause
	941	self.video_id = video_id
	942	self.ie = ie
	943	self.exc_info = sys.exc_info() # preserve original exception
	944	if isinstance(self.exc_info[1], ExtractorError):
	945	self.exc_info = self.exc_info[1].exc_info
	946	super().__init__(self.__msg)
	947
	948	@property
	949	def __msg(self):
	950	return ''.join((
	951	format_field(self.ie, None, '[%s] '),
	952	format_field(self.video_id, None, '%s: '),
	953	self.orig_msg,
	954	format_field(self.cause, None, ' (caused by %r)'),
	955	'' if self.expected else bug_reports_message()))
	956
	957	def format_traceback(self):
	958	return join_nonempty(
	959	self.traceback and ''.join(traceback.format_tb(self.traceback)),
	960	self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
	961	delim='\n') or None
	962
	963	def __setattr__(self, name, value):
	964	super().__setattr__(name, value)
	965	if getattr(self, 'msg', None) and name not in ('msg', 'args'):
	966	self.msg = self.__msg or type(self).__name__
	967	self.args = (self.msg, ) # Cannot be property
	968
	969
	970	class UnsupportedError(ExtractorError):
	971	def __init__(self, url):
	972	super().__init__(
	973	'Unsupported URL: %s' % url, expected=True)
	974	self.url = url
	975
	976
	977	class RegexNotFoundError(ExtractorError):
	978	"""Error when a regex didn't match"""
	979	pass
	980
	981
	982	class GeoRestrictedError(ExtractorError):
	983	"""Geographic restriction Error exception.
	984
	985	This exception may be thrown when a video is not available from your
	986	geographic location due to geographic restrictions imposed by a website.
	987	"""
	988
	989	def __init__(self, msg, countries=None, **kwargs):
	990	kwargs['expected'] = True
	991	super().__init__(msg, **kwargs)
	992	self.countries = countries
	993
	994
	995	class UserNotLive(ExtractorError):
	996	"""Error when a channel/user is not live"""
	997
	998	def __init__(self, msg=None, **kwargs):
	999	kwargs['expected'] = True
	1000	super().__init__(msg or 'The channel is not currently live', **kwargs)
	1001
	1002
	1003	class DownloadError(YoutubeDLError):
	1004	"""Download Error exception.
	1005
	1006	This exception may be thrown by FileDownloader objects if they are not
	1007	configured to continue on errors. They will contain the appropriate
	1008	error message.
	1009	"""
	1010
	1011	def __init__(self, msg, exc_info=None):
	1012	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1013	super().__init__(msg)
	1014	self.exc_info = exc_info
	1015
	1016
	1017	class EntryNotInPlaylist(YoutubeDLError):
	1018	"""Entry not in playlist exception.
	1019
	1020	This exception will be thrown by YoutubeDL when a requested entry
	1021	is not found in the playlist info_dict
	1022	"""
	1023	msg = 'Entry not found in info'
	1024
	1025
	1026	class SameFileError(YoutubeDLError):
	1027	"""Same File exception.
	1028
	1029	This exception will be thrown by FileDownloader objects if they detect
	1030	multiple files would have to be downloaded to the same file on disk.
	1031	"""
	1032	msg = 'Fixed output name but more than one file to download'
	1033
	1034	def __init__(self, filename=None):
	1035	if filename is not None:
	1036	self.msg += f': {filename}'
	1037	super().__init__(self.msg)
	1038
	1039
	1040	class PostProcessingError(YoutubeDLError):
	1041	"""Post Processing exception.
	1042
	1043	This exception may be raised by PostProcessor's .run() method to
	1044	indicate an error in the postprocessing task.
	1045	"""
	1046
	1047
	1048	class DownloadCancelled(YoutubeDLError):
	1049	""" Exception raised when the download queue should be interrupted """
	1050	msg = 'The download was cancelled'
	1051
	1052
	1053	class ExistingVideoReached(DownloadCancelled):
	1054	""" --break-on-existing triggered """
	1055	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1056
	1057
	1058	class RejectedVideoReached(DownloadCancelled):
	1059	""" --break-match-filter triggered """
	1060	msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
	1061
	1062
	1063	class MaxDownloadsReached(DownloadCancelled):
	1064	""" --max-downloads limit has been reached. """
	1065	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1066
	1067
	1068	class ReExtractInfo(YoutubeDLError):
	1069	""" Video info needs to be re-extracted. """
	1070
	1071	def __init__(self, msg, expected=False):
	1072	super().__init__(msg)
	1073	self.expected = expected
	1074
	1075
	1076	class ThrottledDownload(ReExtractInfo):
	1077	""" Download speed below --throttled-rate. """
	1078	msg = 'The download speed is below throttle limit'
	1079
	1080	def __init__(self):
	1081	super().__init__(self.msg, expected=False)
	1082
	1083
	1084	class UnavailableVideoError(YoutubeDLError):
	1085	"""Unavailable Format exception.
	1086
	1087	This exception will be thrown when a video is requested
	1088	in a format that is not available for that video.
	1089	"""
	1090	msg = 'Unable to download video'
	1091
	1092	def __init__(self, err=None):
	1093	if err is not None:
	1094	self.msg += f': {err}'
	1095	super().__init__(self.msg)
	1096
	1097
	1098	class ContentTooShortError(YoutubeDLError):
	1099	"""Content Too Short exception.
	1100
	1101	This exception may be raised by FileDownloader objects when a file they
	1102	download is too small for what the server announced first, indicating
	1103	the connection was probably interrupted.
	1104	"""
	1105
	1106	def __init__(self, downloaded, expected):
	1107	super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
	1108	# Both in bytes
	1109	self.downloaded = downloaded
	1110	self.expected = expected
	1111
	1112
	1113	class XAttrMetadataError(YoutubeDLError):
	1114	def __init__(self, code=None, msg='Unknown error'):
	1115	super().__init__(msg)
	1116	self.code = code
	1117	self.msg = msg
	1118
	1119	# Parsing code and msg
	1120	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1121	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1122	self.reason = 'NO_SPACE'
	1123	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1124	self.reason = 'VALUE_TOO_LONG'
	1125	else:
	1126	self.reason = 'NOT_SUPPORTED'
	1127
	1128
	1129	class XAttrUnavailableError(YoutubeDLError):
	1130	pass
	1131
	1132
	1133	def is_path_like(f):
	1134	return isinstance(f, (str, bytes, os.PathLike))
	1135
	1136
	1137	def extract_timezone(date_str, default=None):
	1138	m = re.search(
	1139	r'''(?x)
	1140	^.{8,}? # >=8 char non-TZ prefix, if present
	1141	(?P<tz>Z\| # just the UTC Z, or
	1142	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1143	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1144	[ ]? # optional space
	1145	(?P<sign>\+\|-) # +/-
	1146	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1147	$)
	1148	''', date_str)
	1149	timezone = None
	1150
	1151	if not m:
	1152	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1153	timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
	1154	if timezone is not None:
	1155	date_str = date_str[:-len(m.group('tz'))]
	1156	timezone = dt.timedelta(hours=timezone)
	1157	else:
	1158	date_str = date_str[:-len(m.group('tz'))]
	1159	if m.group('sign'):
	1160	sign = 1 if m.group('sign') == '+' else -1
	1161	timezone = dt.timedelta(
	1162	hours=sign * int(m.group('hours')),
	1163	minutes=sign * int(m.group('minutes')))
	1164
	1165	if timezone is None and default is not NO_DEFAULT:
	1166	timezone = default or dt.timedelta()
	1167
	1168	return timezone, date_str
	1169
	1170
	1171	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1172	""" Return a UNIX timestamp from the given date """
	1173
	1174	if date_str is None:
	1175	return None
	1176
	1177	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1178
	1179	timezone, date_str = extract_timezone(date_str, timezone)
	1180
	1181	with contextlib.suppress(ValueError, TypeError):
	1182	date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
	1183	dt_ = dt.datetime.strptime(date_str, date_format) - timezone
	1184	return calendar.timegm(dt_.timetuple())
	1185
	1186
	1187	def date_formats(day_first=True):
	1188	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1189
	1190
	1191	def unified_strdate(date_str, day_first=True):
	1192	"""Return a string with the date in the format YYYYMMDD"""
	1193
	1194	if date_str is None:
	1195	return None
	1196	upload_date = None
	1197	# Replace commas
	1198	date_str = date_str.replace(',', ' ')
	1199	# Remove AM/PM + timezone
	1200	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1201	_, date_str = extract_timezone(date_str)
	1202
	1203	for expression in date_formats(day_first):
	1204	with contextlib.suppress(ValueError):
	1205	upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1206	if upload_date is None:
	1207	timetuple = email.utils.parsedate_tz(date_str)
	1208	if timetuple:
	1209	with contextlib.suppress(ValueError):
	1210	upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1211	if upload_date is not None:
	1212	return str(upload_date)
	1213
	1214
	1215	def unified_timestamp(date_str, day_first=True):
	1216	if not isinstance(date_str, str):
	1217	return None
	1218
	1219	date_str = re.sub(r'\s+', ' ', re.sub(
	1220	r'(?i)[,\|]\|(mon\|tues?\|wed(nes)?\|thu(rs)?\|fri\|sat(ur)?)(day)?', '', date_str))
	1221
	1222	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1223	timezone, date_str = extract_timezone(date_str)
	1224
	1225	# Remove AM/PM + timezone
	1226	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1227
	1228	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1229	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1230	if m:
	1231	date_str = date_str[:-len(m.group('tz'))]
	1232
	1233	# Python only supports microseconds, so remove nanoseconds
	1234	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1235	if m:
	1236	date_str = m.group(1)
	1237
	1238	for expression in date_formats(day_first):
	1239	with contextlib.suppress(ValueError):
	1240	dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
	1241	return calendar.timegm(dt_.timetuple())
	1242
	1243	timetuple = email.utils.parsedate_tz(date_str)
	1244	if timetuple:
	1245	return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
	1246
	1247
	1248	def determine_ext(url, default_ext='unknown_video'):
	1249	if url is None or '.' not in url:
	1250	return default_ext
	1251	guess = url.partition('?')[0].rpartition('.')[2]
	1252	if re.match(r'^[A-Za-z0-9]+$', guess):
	1253	return guess
	1254	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1255	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1256	return guess.rstrip('/')
	1257	else:
	1258	return default_ext
	1259
	1260
	1261	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1262	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1263
	1264
	1265	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1266	R"""
	1267	Return a datetime object from a string.
	1268	Supported format:
	1269	(now\|today\|yesterday\|DATE)([+-]\d+(microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?)?
	1270
	1271	@param format strftime format of DATE
	1272	@param precision Round the datetime object: auto\|microsecond\|second\|minute\|hour\|day
	1273	auto: round to the unit provided in date_str (if applicable).
	1274	"""
	1275	auto_precision = False
	1276	if precision == 'auto':
	1277	auto_precision = True
	1278	precision = 'microsecond'
	1279	today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
	1280	if date_str in ('now', 'today'):
	1281	return today
	1282	if date_str == 'yesterday':
	1283	return today - dt.timedelta(days=1)
	1284	match = re.match(
	1285	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?',
	1286	date_str)
	1287	if match is not None:
	1288	start_time = datetime_from_str(match.group('start'), precision, format)
	1289	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1290	unit = match.group('unit')
	1291	if unit == 'month' or unit == 'year':
	1292	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1293	unit = 'day'
	1294	else:
	1295	if unit == 'week':
	1296	unit = 'day'
	1297	time *= 7
	1298	delta = dt.timedelta(**{unit + 's': time})
	1299	new_date = start_time + delta
	1300	if auto_precision:
	1301	return datetime_round(new_date, unit)
	1302	return new_date
	1303
	1304	return datetime_round(dt.datetime.strptime(date_str, format), precision)
	1305
	1306
	1307	def date_from_str(date_str, format='%Y%m%d', strict=False):
	1308	R"""
	1309	Return a date object from a string using datetime_from_str
	1310
	1311	@param strict Restrict allowed patterns to "YYYYMMDD" and
	1312	(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?
	1313	"""
	1314	if strict and not re.fullmatch(r'\d{8}\|(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?', date_str):
	1315	raise ValueError(f'Invalid date format "{date_str}"')
	1316	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1317
	1318
	1319	def datetime_add_months(dt_, months):
	1320	"""Increment/Decrement a datetime object by months."""
	1321	month = dt_.month + months - 1
	1322	year = dt_.year + month // 12
	1323	month = month % 12 + 1
	1324	day = min(dt_.day, calendar.monthrange(year, month)[1])
	1325	return dt_.replace(year, month, day)
	1326
	1327
	1328	def datetime_round(dt_, precision='day'):
	1329	"""
	1330	Round a datetime object's time to a specific precision
	1331	"""
	1332	if precision == 'microsecond':
	1333	return dt_
	1334
	1335	unit_seconds = {
	1336	'day': 86400,
	1337	'hour': 3600,
	1338	'minute': 60,
	1339	'second': 1,
	1340	}
	1341	roundto = lambda x, n: ((x + n / 2) // n) * n
	1342	timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
	1343	return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
	1344
	1345
	1346	def hyphenate_date(date_str):
	1347	"""
	1348	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1349	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1350	if match is not None:
	1351	return '-'.join(match.groups())
	1352	else:
	1353	return date_str
	1354
	1355
	1356	class DateRange:
	1357	"""Represents a time interval between two dates"""
	1358
	1359	def __init__(self, start=None, end=None):
	1360	"""start and end must be strings in the format accepted by date"""
	1361	if start is not None:
	1362	self.start = date_from_str(start, strict=True)
	1363	else:
	1364	self.start = dt.datetime.min.date()
	1365	if end is not None:
	1366	self.end = date_from_str(end, strict=True)
	1367	else:
	1368	self.end = dt.datetime.max.date()
	1369	if self.start > self.end:
	1370	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1371
	1372	@classmethod
	1373	def day(cls, day):
	1374	"""Returns a range that only contains the given day"""
	1375	return cls(day, day)
	1376
	1377	def __contains__(self, date):
	1378	"""Check if the date is in the range"""
	1379	if not isinstance(date, dt.date):
	1380	date = date_from_str(date)
	1381	return self.start <= date <= self.end
	1382
	1383	def __repr__(self):
	1384	return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
	1385
	1386	def __str__(self):
	1387	return f'{self.start} to {self.end}'
	1388
	1389	def __eq__(self, other):
	1390	return (isinstance(other, DateRange)
	1391	and self.start == other.start and self.end == other.end)
	1392
	1393
	1394	@functools.cache
	1395	def system_identifier():
	1396	python_implementation = platform.python_implementation()
	1397	if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
	1398	python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
	1399	libc_ver = []
	1400	with contextlib.suppress(OSError): # We may not have access to the executable
	1401	libc_ver = platform.libc_ver()
	1402
	1403	return 'Python %s (%s %s %s) - %s (%s%s)' % (
	1404	platform.python_version(),
	1405	python_implementation,
	1406	platform.machine(),
	1407	platform.architecture()[0],
	1408	platform.platform(),
	1409	ssl.OPENSSL_VERSION,
	1410	format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
	1411	)
	1412
	1413
	1414	@functools.cache
	1415	def get_windows_version():
	1416	''' Get Windows version. returns () if it's not running on Windows '''
	1417	if compat_os_name == 'nt':
	1418	return version_tuple(platform.win32_ver()[1])
	1419	else:
	1420	return ()
	1421
	1422
	1423	def write_string(s, out=None, encoding=None):
	1424	assert isinstance(s, str)
	1425	out = out or sys.stderr
	1426	# `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
	1427	if not out:
	1428	return
	1429
	1430	if compat_os_name == 'nt' and supports_terminal_sequences(out):
	1431	s = re.sub(r'([\r\n]+)', r' \1', s)
	1432
	1433	enc, buffer = None, out
	1434	# `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
	1435	if 'b' in (getattr(out, 'mode', None) or ''):
	1436	enc = encoding or preferredencoding()
	1437	elif hasattr(out, 'buffer'):
	1438	buffer = out.buffer
	1439	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	1440
	1441	buffer.write(s.encode(enc, 'ignore') if enc else s)
	1442	out.flush()
	1443
	1444
	1445	# TODO: Use global logger
	1446	def deprecation_warning(msg, , printer=None, stacklevel=0, *kwargs):
	1447	from .. import _IN_CLI
	1448	if _IN_CLI:
	1449	if msg in deprecation_warning._cache:
	1450	return
	1451	deprecation_warning._cache.add(msg)
	1452	if printer:
	1453	return printer(f'{msg}{bug_reports_message()}', **kwargs)
	1454	return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
	1455	else:
	1456	import warnings
	1457	warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
	1458
	1459
	1460	deprecation_warning._cache = set()
	1461
	1462
	1463	def bytes_to_intlist(bs):
	1464	if not bs:
	1465	return []
	1466	if isinstance(bs[0], int): # Python 3
	1467	return list(bs)
	1468	else:
	1469	return [ord(c) for c in bs]
	1470
	1471
	1472	def intlist_to_bytes(xs):
	1473	if not xs:
	1474	return b''
	1475	return struct.pack('%dB' % len(xs), *xs)
	1476
	1477
	1478	class LockingUnsupportedError(OSError):
	1479	msg = 'File locking is not supported'
	1480
	1481	def __init__(self):
	1482	super().__init__(self.msg)
	1483
	1484
	1485	# Cross-platform file locking
	1486	if sys.platform == 'win32':
	1487	import ctypes
	1488	import ctypes.wintypes
	1489	import msvcrt
	1490
	1491	class OVERLAPPED(ctypes.Structure):
	1492	_fields_ = [
	1493	('Internal', ctypes.wintypes.LPVOID),
	1494	('InternalHigh', ctypes.wintypes.LPVOID),
	1495	('Offset', ctypes.wintypes.DWORD),
	1496	('OffsetHigh', ctypes.wintypes.DWORD),
	1497	('hEvent', ctypes.wintypes.HANDLE),
	1498	]
	1499
	1500	kernel32 = ctypes.WinDLL('kernel32')
	1501	LockFileEx = kernel32.LockFileEx
	1502	LockFileEx.argtypes = [
	1503	ctypes.wintypes.HANDLE, # hFile
	1504	ctypes.wintypes.DWORD, # dwFlags
	1505	ctypes.wintypes.DWORD, # dwReserved
	1506	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1507	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1508	ctypes.POINTER(OVERLAPPED) # Overlapped
	1509	]
	1510	LockFileEx.restype = ctypes.wintypes.BOOL
	1511	UnlockFileEx = kernel32.UnlockFileEx
	1512	UnlockFileEx.argtypes = [
	1513	ctypes.wintypes.HANDLE, # hFile
	1514	ctypes.wintypes.DWORD, # dwReserved
	1515	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1516	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1517	ctypes.POINTER(OVERLAPPED) # Overlapped
	1518	]
	1519	UnlockFileEx.restype = ctypes.wintypes.BOOL
	1520	whole_low = 0xffffffff
	1521	whole_high = 0x7fffffff
	1522
	1523	def _lock_file(f, exclusive, block):
	1524	overlapped = OVERLAPPED()
	1525	overlapped.Offset = 0
	1526	overlapped.OffsetHigh = 0
	1527	overlapped.hEvent = 0
	1528	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	1529
	1530	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	1531	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	1532	0, whole_low, whole_high, f._lock_file_overlapped_p):
	1533	# NB: No argument form of "ctypes.FormatError" does not work on PyPy
	1534	raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
	1535
	1536	def _unlock_file(f):
	1537	assert f._lock_file_overlapped_p
	1538	handle = msvcrt.get_osfhandle(f.fileno())
	1539	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
	1540	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	1541
	1542	else:
	1543	try:
	1544	import fcntl
	1545
	1546	def _lock_file(f, exclusive, block):
	1547	flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
	1548	if not block:
	1549	flags \|= fcntl.LOCK_NB
	1550	try:
	1551	fcntl.flock(f, flags)
	1552	except BlockingIOError:
	1553	raise
	1554	except OSError: # AOSP does not have flock()
	1555	fcntl.lockf(f, flags)
	1556
	1557	def _unlock_file(f):
	1558	with contextlib.suppress(OSError):
	1559	return fcntl.flock(f, fcntl.LOCK_UN)
	1560	with contextlib.suppress(OSError):
	1561	return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
	1562	return fcntl.flock(f, fcntl.LOCK_UN \| fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
	1563
	1564	except ImportError:
	1565
	1566	def _lock_file(f, exclusive, block):
	1567	raise LockingUnsupportedError()
	1568
	1569	def _unlock_file(f):
	1570	raise LockingUnsupportedError()
	1571
	1572
	1573	class locked_file:
	1574	locked = False
	1575
	1576	def __init__(self, filename, mode, block=True, encoding=None):
	1577	if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
	1578	raise NotImplementedError(mode)
	1579	self.mode, self.block = mode, block
	1580
	1581	writable = any(f in mode for f in 'wax+')
	1582	readable = any(f in mode for f in 'r+')
	1583	flags = functools.reduce(operator.ior, (
	1584	getattr(os, 'O_CLOEXEC', 0), # UNIX only
	1585	getattr(os, 'O_BINARY', 0), # Windows only
	1586	getattr(os, 'O_NOINHERIT', 0), # Windows only
	1587	os.O_CREAT if writable else 0, # O_TRUNC only after locking
	1588	os.O_APPEND if 'a' in mode else 0,
	1589	os.O_EXCL if 'x' in mode else 0,
	1590	os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
	1591	))
	1592
	1593	self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
	1594
	1595	def __enter__(self):
	1596	exclusive = 'r' not in self.mode
	1597	try:
	1598	_lock_file(self.f, exclusive, self.block)
	1599	self.locked = True
	1600	except OSError:
	1601	self.f.close()
	1602	raise
	1603	if 'w' in self.mode:
	1604	try:
	1605	self.f.truncate()
	1606	except OSError as e:
	1607	if e.errno not in (
	1608	errno.ESPIPE, # Illegal seek - expected for FIFO
	1609	errno.EINVAL, # Invalid argument - expected for /dev/null
	1610	):
	1611	raise
	1612	return self
	1613
	1614	def unlock(self):
	1615	if not self.locked:
	1616	return
	1617	try:
	1618	_unlock_file(self.f)
	1619	finally:
	1620	self.locked = False
	1621
	1622	def __exit__(self, *_):
	1623	try:
	1624	self.unlock()
	1625	finally:
	1626	self.f.close()
	1627
	1628	open = __enter__
	1629	close = __exit__
	1630
	1631	def __getattr__(self, attr):
	1632	return getattr(self.f, attr)
	1633
	1634	def __iter__(self):
	1635	return iter(self.f)
	1636
	1637
	1638	@functools.cache
	1639	def get_filesystem_encoding():
	1640	encoding = sys.getfilesystemencoding()
	1641	return encoding if encoding is not None else 'utf-8'
	1642
	1643
	1644	_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
	1645	_CMD_QUOTE_TRANS = str.maketrans({
	1646	# Keep quotes balanced by replacing them with `""` instead of `\\"`
	1647	'"': '""',
	1648	# These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
	1649	# `=` should be unique since variables containing `=` cannot be set using cmd
	1650	'\n': '%=%',
	1651	'\r': '%=%',
	1652	# Use zero length variable replacement so `%` doesn't get expanded
	1653	# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
	1654	'%': '%%cd:~,%',
	1655	})
	1656
	1657
	1658	def shell_quote(args, *, shell=False):
	1659	args = list(variadic(args))
	1660
	1661	if compat_os_name != 'nt':
	1662	return shlex.join(args)
	1663
	1664	trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
	1665	return ' '.join(
	1666	s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
	1667	else re.sub(r'(\\+)("\|$)', r'\1\1\2', s).translate(trans).join('""')
	1668	for s in args)
	1669
	1670
	1671	def smuggle_url(url, data):
	1672	""" Pass additional data in a URL for internal use. """
	1673
	1674	url, idata = unsmuggle_url(url, {})
	1675	data.update(idata)
	1676	sdata = urllib.parse.urlencode(
	1677	{'__youtubedl_smuggle': json.dumps(data)})
	1678	return url + '#' + sdata
	1679
	1680
	1681	def unsmuggle_url(smug_url, default=None):
	1682	if '#__youtubedl_smuggle' not in smug_url:
	1683	return smug_url, default
	1684	url, _, sdata = smug_url.rpartition('#')
	1685	jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
	1686	data = json.loads(jsond)
	1687	return url, data
	1688
	1689
	1690	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	1691	""" Formats numbers with decimal sufixes like K, M, etc """
	1692	num, factor = float_or_none(num), float(factor)
	1693	if num is None or num < 0:
	1694	return None
	1695	POSSIBLE_SUFFIXES = 'kMGTPEZY'
	1696	exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
	1697	suffix = ['', *POSSIBLE_SUFFIXES][exponent]
	1698	if factor == 1024:
	1699	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	1700	converted = num / (factor ** exponent)
	1701	return fmt % (converted, suffix)
	1702
	1703
	1704	def format_bytes(bytes):
	1705	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	1706
	1707
	1708	def lookup_unit_table(unit_table, s, strict=False):
	1709	num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
	1710	units_re = '\|'.join(re.escape(u) for u in unit_table)
	1711	m = (re.fullmatch if strict else re.match)(
	1712	rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
	1713	if not m:
	1714	return None
	1715
	1716	num = float(m.group('num').replace(',', '.'))
	1717	mult = unit_table[m.group('unit')]
	1718	return round(num * mult)
	1719
	1720
	1721	def parse_bytes(s):
	1722	"""Parse a string indicating a byte quantity into an integer"""
	1723	return lookup_unit_table(
	1724	{u: 1024*i for i, u in enumerate(['', 'KMGTPEZY'])},
	1725	s.upper(), strict=True)
	1726
	1727
	1728	def parse_filesize(s):
	1729	if s is None:
	1730	return None
	1731
	1732	# The lower-case forms are of course incorrect and unofficial,
	1733	# but we support those too
	1734	_UNIT_TABLE = {
	1735	'B': 1,
	1736	'b': 1,
	1737	'bytes': 1,
	1738	'KiB': 1024,
	1739	'KB': 1000,
	1740	'kB': 1024,
	1741	'Kb': 1000,
	1742	'kb': 1000,
	1743	'kilobytes': 1000,
	1744	'kibibytes': 1024,
	1745	'MiB': 1024 ** 2,
	1746	'MB': 1000 ** 2,
	1747	'mB': 1024 ** 2,
	1748	'Mb': 1000 ** 2,
	1749	'mb': 1000 ** 2,
	1750	'megabytes': 1000 ** 2,
	1751	'mebibytes': 1024 ** 2,
	1752	'GiB': 1024 ** 3,
	1753	'GB': 1000 ** 3,
	1754	'gB': 1024 ** 3,
	1755	'Gb': 1000 ** 3,
	1756	'gb': 1000 ** 3,
	1757	'gigabytes': 1000 ** 3,
	1758	'gibibytes': 1024 ** 3,
	1759	'TiB': 1024 ** 4,
	1760	'TB': 1000 ** 4,
	1761	'tB': 1024 ** 4,
	1762	'Tb': 1000 ** 4,
	1763	'tb': 1000 ** 4,
	1764	'terabytes': 1000 ** 4,
	1765	'tebibytes': 1024 ** 4,
	1766	'PiB': 1024 ** 5,
	1767	'PB': 1000 ** 5,
	1768	'pB': 1024 ** 5,
	1769	'Pb': 1000 ** 5,
	1770	'pb': 1000 ** 5,
	1771	'petabytes': 1000 ** 5,
	1772	'pebibytes': 1024 ** 5,
	1773	'EiB': 1024 ** 6,
	1774	'EB': 1000 ** 6,
	1775	'eB': 1024 ** 6,
	1776	'Eb': 1000 ** 6,
	1777	'eb': 1000 ** 6,
	1778	'exabytes': 1000 ** 6,
	1779	'exbibytes': 1024 ** 6,
	1780	'ZiB': 1024 ** 7,
	1781	'ZB': 1000 ** 7,
	1782	'zB': 1024 ** 7,
	1783	'Zb': 1000 ** 7,
	1784	'zb': 1000 ** 7,
	1785	'zettabytes': 1000 ** 7,
	1786	'zebibytes': 1024 ** 7,
	1787	'YiB': 1024 ** 8,
	1788	'YB': 1000 ** 8,
	1789	'yB': 1024 ** 8,
	1790	'Yb': 1000 ** 8,
	1791	'yb': 1000 ** 8,
	1792	'yottabytes': 1000 ** 8,
	1793	'yobibytes': 1024 ** 8,
	1794	}
	1795
	1796	return lookup_unit_table(_UNIT_TABLE, s)
	1797
	1798
	1799	def parse_count(s):
	1800	if s is None:
	1801	return None
	1802
	1803	s = re.sub(r'^[^\d]+\s', '', s).strip()
	1804
	1805	if re.match(r'^[\d,.]+$', s):
	1806	return str_to_int(s)
	1807
	1808	_UNIT_TABLE = {
	1809	'k': 1000,
	1810	'K': 1000,
	1811	'm': 1000 ** 2,
	1812	'M': 1000 ** 2,
	1813	'kk': 1000 ** 2,
	1814	'KK': 1000 ** 2,
	1815	'b': 1000 ** 3,
	1816	'B': 1000 ** 3,
	1817	}
	1818
	1819	ret = lookup_unit_table(_UNIT_TABLE, s)
	1820	if ret is not None:
	1821	return ret
	1822
	1823	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	1824	if mobj:
	1825	return str_to_int(mobj.group(1))
	1826
	1827
	1828	def parse_resolution(s, *, lenient=False):
	1829	if s is None:
	1830	return {}
	1831
	1832	if lenient:
	1833	mobj = re.search(r'(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)', s)
	1834	else:
	1835	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	1836	if mobj:
	1837	return {
	1838	'width': int(mobj.group('w')),
	1839	'height': int(mobj.group('h')),
	1840	}
	1841
	1842	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	1843	if mobj:
	1844	return {'height': int(mobj.group(1))}
	1845
	1846	mobj = re.search(r'\b([48])[kK]\b', s)
	1847	if mobj:
	1848	return {'height': int(mobj.group(1)) * 540}
	1849
	1850	return {}
	1851
	1852
	1853	def parse_bitrate(s):
	1854	if not isinstance(s, str):
	1855	return
	1856	mobj = re.search(r'\b(\d+)\s*kbps', s)
	1857	if mobj:
	1858	return int(mobj.group(1))
	1859
	1860
	1861	def month_by_name(name, lang='en'):
	1862	""" Return the number of a month by (locale-independently) English name """
	1863
	1864	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	1865
	1866	try:
	1867	return month_names.index(name) + 1
	1868	except ValueError:
	1869	return None
	1870
	1871
	1872	def month_by_abbreviation(abbrev):
	1873	""" Return the number of a month by (locale-independently) English
	1874	abbreviations """
	1875
	1876	try:
	1877	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	1878	except ValueError:
	1879	return None
	1880
	1881
	1882	def fix_xml_ampersands(xml_str):
	1883	"""Replace all the '&' by '&' in XML"""
	1884	return re.sub(
	1885	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	1886	'&',
	1887	xml_str)
	1888
	1889
	1890	def setproctitle(title):
	1891	assert isinstance(title, str)
	1892
	1893	# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
	1894	try:
	1895	import ctypes
	1896	except ImportError:
	1897	return
	1898
	1899	try:
	1900	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	1901	except OSError:
	1902	return
	1903	except TypeError:
	1904	# LoadLibrary in Windows Python 2.7.13 only expects
	1905	# a bytestring, but since unicode_literals turns
	1906	# every string into a unicode string, it fails.
	1907	return
	1908	title_bytes = title.encode()
	1909	buf = ctypes.create_string_buffer(len(title_bytes))
	1910	buf.value = title_bytes
	1911	try:
	1912	# PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
	1913	libc.prctl(15, buf, 0, 0, 0)
	1914	except AttributeError:
	1915	return # Strange libc, just skip this
	1916
	1917
	1918	def remove_start(s, start):
	1919	return s[len(start):] if s is not None and s.startswith(start) else s
	1920
	1921
	1922	def remove_end(s, end):
	1923	return s[:-len(end)] if s is not None and s.endswith(end) else s
	1924
	1925
	1926	def remove_quotes(s):
	1927	if s is None or len(s) < 2:
	1928	return s
	1929	for quote in ('"', "'", ):
	1930	if s[0] == quote and s[-1] == quote:
	1931	return s[1:-1]
	1932	return s
	1933
	1934
	1935	def get_domain(url):
	1936	"""
	1937	This implementation is inconsistent, but is kept for compatibility.
	1938	Use this only for "webpage_url_domain"
	1939	"""
	1940	return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
	1941
	1942
	1943	def url_basename(url):
	1944	path = urllib.parse.urlparse(url).path
	1945	return path.strip('/').split('/')[-1]
	1946
	1947
	1948	def base_url(url):
	1949	return re.match(r'https?://[^?#]+/', url).group()
	1950
	1951
	1952	def urljoin(base, path):
	1953	if isinstance(path, bytes):
	1954	path = path.decode()
	1955	if not isinstance(path, str) or not path:
	1956	return None
	1957	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	1958	return path
	1959	if isinstance(base, bytes):
	1960	base = base.decode()
	1961	if not isinstance(base, str) or not re.match(
	1962	r'^(?:https?:)?//', base):
	1963	return None
	1964	return urllib.parse.urljoin(base, path)
	1965
	1966
	1967	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	1968	if get_attr and v is not None:
	1969	v = getattr(v, get_attr, None)
	1970	try:
	1971	return int(v) * invscale // scale
	1972	except (ValueError, TypeError, OverflowError):
	1973	return default
	1974
	1975
	1976	def str_or_none(v, default=None):
	1977	return default if v is None else str(v)
	1978
	1979
	1980	def str_to_int(int_str):
	1981	""" A more relaxed version of int_or_none """
	1982	if isinstance(int_str, int):
	1983	return int_str
	1984	elif isinstance(int_str, str):
	1985	int_str = re.sub(r'[,\.\+]', '', int_str)
	1986	return int_or_none(int_str)
	1987
	1988
	1989	def float_or_none(v, scale=1, invscale=1, default=None):
	1990	if v is None:
	1991	return default
	1992	try:
	1993	return float(v) * invscale / scale
	1994	except (ValueError, TypeError):
	1995	return default
	1996
	1997
	1998	def bool_or_none(v, default=None):
	1999	return v if isinstance(v, bool) else default
	2000
	2001
	2002	def strip_or_none(v, default=None):
	2003	return v.strip() if isinstance(v, str) else default
	2004
	2005
	2006	def url_or_none(url):
	2007	if not url or not isinstance(url, str):
	2008	return None
	2009	url = url.strip()
	2010	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2011
	2012
	2013	def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
	2014	datetime_object = None
	2015	try:
	2016	if isinstance(timestamp, (int, float)): # unix timestamp
	2017	# Using naive datetime here can break timestamp() in Windows
	2018	# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
	2019	# Also, dt.datetime.fromtimestamp breaks for negative timestamps
	2020	# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
	2021	datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
	2022	+ dt.timedelta(seconds=timestamp))
	2023	elif isinstance(timestamp, str): # assume YYYYMMDD
	2024	datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
	2025	date_format = re.sub( # Support %s on windows
	2026	r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
	2027	return datetime_object.strftime(date_format)
	2028	except (ValueError, TypeError, AttributeError):
	2029	return default
	2030
	2031
	2032	def parse_duration(s):
	2033	if not isinstance(s, str):
	2034	return None
	2035	s = s.strip()
	2036	if not s:
	2037	return None
	2038
	2039	days, hours, mins, secs, ms = [None] * 5
	2040	m = re.match(r'''(?x)
	2041	(?P<before_secs>
	2042	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2043	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2044	(?P<ms>[.:][0-9]+)?Z?$
	2045	''', s)
	2046	if m:
	2047	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2048	else:
	2049	m = re.match(
	2050	r'''(?ix)(?:P?
	2051	(?:
	2052	[0-9]+\sy(?:ears?)?,?\s
	2053	)?
	2054	(?:
	2055	[0-9]+\sm(?:onths?)?,?\s
	2056	)?
	2057	(?:
	2058	[0-9]+\sw(?:eeks?)?,?\s
	2059	)?
	2060	(?:
	2061	(?P<days>[0-9]+)\sd(?:ays?)?,?\s
	2062	)?
	2063	T)?
	2064	(?:
	2065	(?P<hours>[0-9]+)\sh(?:(?:ou)?rs?)?,?\s
	2066	)?
	2067	(?:
	2068	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?,?\s
	2069	)?
	2070	(?:
	2071	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2072	)?Z?$''', s)
	2073	if m:
	2074	days, hours, mins, secs, ms = m.groups()
	2075	else:
	2076	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2077	if m:
	2078	hours, mins = m.groups()
	2079	else:
	2080	return None
	2081
	2082	if ms:
	2083	ms = ms.replace(':', '.')
	2084	return sum(float(part or 0) * mult for part, mult in (
	2085	(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
	2086
	2087
	2088	def prepend_extension(filename, ext, expected_real_ext=None):
	2089	name, real_ext = os.path.splitext(filename)
	2090	return (
	2091	f'{name}.{ext}{real_ext}'
	2092	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2093	else f'{filename}.{ext}')
	2094
	2095
	2096	def replace_extension(filename, ext, expected_real_ext=None):
	2097	name, real_ext = os.path.splitext(filename)
	2098	return '{}.{}'.format(
	2099	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2100	ext)
	2101
	2102
	2103	def check_executable(exe, args=[]):
	2104	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2105	args can be a list of arguments for a short output (like -version) """
	2106	try:
	2107	Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	2108	except OSError:
	2109	return False
	2110	return exe
	2111
	2112
	2113	def _get_exe_version_output(exe, args):
	2114	try:
	2115	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2116	# SIGTTOU if yt-dlp is run in the background.
	2117	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2118	stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
	2119	stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
	2120	if ret:
	2121	return None
	2122	except OSError:
	2123	return False
	2124	return stdout
	2125
	2126
	2127	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2128	assert isinstance(output, str)
	2129	if version_re is None:
	2130	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2131	m = re.search(version_re, output)
	2132	if m:
	2133	return m.group(1)
	2134	else:
	2135	return unrecognized
	2136
	2137
	2138	def get_exe_version(exe, args=['--version'],
	2139	version_re=None, unrecognized=('present', 'broken')):
	2140	""" Returns the version of the specified executable,
	2141	or False if the executable is not present """
	2142	unrecognized = variadic(unrecognized)
	2143	assert len(unrecognized) in (1, 2)
	2144	out = _get_exe_version_output(exe, args)
	2145	if out is None:
	2146	return unrecognized[-1]
	2147	return out and detect_exe_version(out, version_re, unrecognized[0])
	2148
	2149
	2150	def frange(start=0, stop=None, step=1):
	2151	"""Float range"""
	2152	if stop is None:
	2153	start, stop = 0, start
	2154	sign = [-1, 1][step > 0] if step else 0
	2155	while sign * start < sign * stop:
	2156	yield start
	2157	start += step
	2158
	2159
	2160	class LazyList(collections.abc.Sequence):
	2161	"""Lazy immutable list from an iterable
	2162	Note that slices of a LazyList are lists and not LazyList"""
	2163
	2164	class IndexError(IndexError):
	2165	pass
	2166
	2167	def __init__(self, iterable, *, reverse=False, _cache=None):
	2168	self._iterable = iter(iterable)
	2169	self._cache = [] if _cache is None else _cache
	2170	self._reversed = reverse
	2171
	2172	def __iter__(self):
	2173	if self._reversed:
	2174	# We need to consume the entire iterable to iterate in reverse
	2175	yield from self.exhaust()
	2176	return
	2177	yield from self._cache
	2178	for item in self._iterable:
	2179	self._cache.append(item)
	2180	yield item
	2181
	2182	def _exhaust(self):
	2183	self._cache.extend(self._iterable)
	2184	self._iterable = [] # Discard the emptied iterable to make it pickle-able
	2185	return self._cache
	2186
	2187	def exhaust(self):
	2188	"""Evaluate the entire iterable"""
	2189	return self._exhaust()[::-1 if self._reversed else 1]
	2190
	2191	@staticmethod
	2192	def _reverse_index(x):
	2193	return None if x is None else ~x
	2194
	2195	def __getitem__(self, idx):
	2196	if isinstance(idx, slice):
	2197	if self._reversed:
	2198	idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
	2199	start, stop, step = idx.start, idx.stop, idx.step or 1
	2200	elif isinstance(idx, int):
	2201	if self._reversed:
	2202	idx = self._reverse_index(idx)
	2203	start, stop, step = idx, idx, 0
	2204	else:
	2205	raise TypeError('indices must be integers or slices')
	2206	if ((start or 0) < 0 or (stop or 0) < 0
	2207	or (start is None and step < 0)
	2208	or (stop is None and step > 0)):
	2209	# We need to consume the entire iterable to be able to slice from the end
	2210	# Obviously, never use this with infinite iterables
	2211	self._exhaust()
	2212	try:
	2213	return self._cache[idx]
	2214	except IndexError as e:
	2215	raise self.IndexError(e) from e
	2216	n = max(start or 0, stop or 0) - len(self._cache) + 1
	2217	if n > 0:
	2218	self._cache.extend(itertools.islice(self._iterable, n))
	2219	try:
	2220	return self._cache[idx]
	2221	except IndexError as e:
	2222	raise self.IndexError(e) from e
	2223
	2224	def __bool__(self):
	2225	try:
	2226	self[-1] if self._reversed else self[0]
	2227	except self.IndexError:
	2228	return False
	2229	return True
	2230
	2231	def __len__(self):
	2232	self._exhaust()
	2233	return len(self._cache)
	2234
	2235	def __reversed__(self):
	2236	return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
	2237
	2238	def __copy__(self):
	2239	return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
	2240
	2241	def __repr__(self):
	2242	# repr and str should mimic a list. So we exhaust the iterable
	2243	return repr(self.exhaust())
	2244
	2245	def __str__(self):
	2246	return repr(self.exhaust())
	2247
	2248
	2249	class PagedList:
	2250
	2251	class IndexError(IndexError):
	2252	pass
	2253
	2254	def __len__(self):
	2255	# This is only useful for tests
	2256	return len(self.getslice())
	2257
	2258	def __init__(self, pagefunc, pagesize, use_cache=True):
	2259	self._pagefunc = pagefunc
	2260	self._pagesize = pagesize
	2261	self._pagecount = float('inf')
	2262	self._use_cache = use_cache
	2263	self._cache = {}
	2264
	2265	def getpage(self, pagenum):
	2266	page_results = self._cache.get(pagenum)
	2267	if page_results is None:
	2268	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
	2269	if self._use_cache:
	2270	self._cache[pagenum] = page_results
	2271	return page_results
	2272
	2273	def getslice(self, start=0, end=None):
	2274	return list(self._getslice(start, end))
	2275
	2276	def _getslice(self, start, end):
	2277	raise NotImplementedError('This method must be implemented by subclasses')
	2278
	2279	def __getitem__(self, idx):
	2280	assert self._use_cache, 'Indexing PagedList requires cache'
	2281	if not isinstance(idx, int) or idx < 0:
	2282	raise TypeError('indices must be non-negative integers')
	2283	entries = self.getslice(idx, idx + 1)
	2284	if not entries:
	2285	raise self.IndexError()
	2286	return entries[0]
	2287
	2288	def __bool__(self):
	2289	return bool(self.getslice(0, 1))
	2290
	2291
	2292	class OnDemandPagedList(PagedList):
	2293	"""Download pages until a page with less than maximum results"""
	2294
	2295	def _getslice(self, start, end):
	2296	for pagenum in itertools.count(start // self._pagesize):
	2297	firstid = pagenum * self._pagesize
	2298	nextfirstid = pagenum * self._pagesize + self._pagesize
	2299	if start >= nextfirstid:
	2300	continue
	2301
	2302	startv = (
	2303	start % self._pagesize
	2304	if firstid <= start < nextfirstid
	2305	else 0)
	2306	endv = (
	2307	((end - 1) % self._pagesize) + 1
	2308	if (end is not None and firstid <= end <= nextfirstid)
	2309	else None)
	2310
	2311	try:
	2312	page_results = self.getpage(pagenum)
	2313	except Exception:
	2314	self._pagecount = pagenum - 1
	2315	raise
	2316	if startv != 0 or endv is not None:
	2317	page_results = page_results[startv:endv]
	2318	yield from page_results
	2319
	2320	# A little optimization - if current page is not "full", ie. does
	2321	# not contain page_size videos then we can assume that this page
	2322	# is the last one - there are no more ids on further pages -
	2323	# i.e. no need to query again.
	2324	if len(page_results) + startv < self._pagesize:
	2325	break
	2326
	2327	# If we got the whole page, but the next page is not interesting,
	2328	# break out early as well
	2329	if end == nextfirstid:
	2330	break
	2331
	2332
	2333	class InAdvancePagedList(PagedList):
	2334	"""PagedList with total number of pages known in advance"""
	2335
	2336	def __init__(self, pagefunc, pagecount, pagesize):
	2337	PagedList.__init__(self, pagefunc, pagesize, True)
	2338	self._pagecount = pagecount
	2339
	2340	def _getslice(self, start, end):
	2341	start_page = start // self._pagesize
	2342	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2343	skip_elems = start - start_page * self._pagesize
	2344	only_more = None if end is None else end - start
	2345	for pagenum in range(start_page, end_page):
	2346	page_results = self.getpage(pagenum)
	2347	if skip_elems:
	2348	page_results = page_results[skip_elems:]
	2349	skip_elems = None
	2350	if only_more is not None:
	2351	if len(page_results) < only_more:
	2352	only_more -= len(page_results)
	2353	else:
	2354	yield from page_results[:only_more]
	2355	break
	2356	yield from page_results
	2357
	2358
	2359	class PlaylistEntries:
	2360	MissingEntry = object()
	2361	is_exhausted = False
	2362
	2363	def __init__(self, ydl, info_dict):
	2364	self.ydl = ydl
	2365
	2366	# _entries must be assigned now since infodict can change during iteration
	2367	entries = info_dict.get('entries')
	2368	if entries is None:
	2369	raise EntryNotInPlaylist('There are no entries')
	2370	elif isinstance(entries, list):
	2371	self.is_exhausted = True
	2372
	2373	requested_entries = info_dict.get('requested_entries')
	2374	self.is_incomplete = requested_entries is not None
	2375	if self.is_incomplete:
	2376	assert self.is_exhausted
	2377	self._entries = [self.MissingEntry] * max(requested_entries or [0])
	2378	for i, entry in zip(requested_entries, entries):
	2379	self._entries[i - 1] = entry
	2380	elif isinstance(entries, (list, PagedList, LazyList)):
	2381	self._entries = entries
	2382	else:
	2383	self._entries = LazyList(entries)
	2384
	2385	PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
	2386	(?P<start>[+-]?\d+)?
	2387	(?P<range>[:-]
	2388	(?P<end>[+-]?\d+\|inf(?:inite)?)?
	2389	(?::(?P<step>[+-]?\d+))?
	2390	)?''')
	2391
	2392	@classmethod
	2393	def parse_playlist_items(cls, string):
	2394	for segment in string.split(','):
	2395	if not segment:
	2396	raise ValueError('There is two or more consecutive commas')
	2397	mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
	2398	if not mobj:
	2399	raise ValueError(f'{segment!r} is not a valid specification')
	2400	start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
	2401	if int_or_none(step) == 0:
	2402	raise ValueError(f'Step in {segment!r} cannot be zero')
	2403	yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
	2404
	2405	def get_requested_items(self):
	2406	playlist_items = self.ydl.params.get('playlist_items')
	2407	playlist_start = self.ydl.params.get('playliststart', 1)
	2408	playlist_end = self.ydl.params.get('playlistend')
	2409	# For backwards compatibility, interpret -1 as whole list
	2410	if playlist_end in (-1, None):
	2411	playlist_end = ''
	2412	if not playlist_items:
	2413	playlist_items = f'{playlist_start}:{playlist_end}'
	2414	elif playlist_start != 1 or playlist_end:
	2415	self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
	2416
	2417	for index in self.parse_playlist_items(playlist_items):
	2418	for i, entry in self[index]:
	2419	yield i, entry
	2420	if not entry:
	2421	continue
	2422	try:
	2423	# The item may have just been added to archive. Don't break due to it
	2424	if not self.ydl.params.get('lazy_playlist'):
	2425	# TODO: Add auto-generated fields
	2426	self.ydl._match_entry(entry, incomplete=True, silent=True)
	2427	except (ExistingVideoReached, RejectedVideoReached):
	2428	return
	2429
	2430	def get_full_count(self):
	2431	if self.is_exhausted and not self.is_incomplete:
	2432	return len(self)
	2433	elif isinstance(self._entries, InAdvancePagedList):
	2434	if self._entries._pagesize == 1:
	2435	return self._entries._pagecount
	2436
	2437	@functools.cached_property
	2438	def _getter(self):
	2439	if isinstance(self._entries, list):
	2440	def get_entry(i):
	2441	try:
	2442	entry = self._entries[i]
	2443	except IndexError:
	2444	entry = self.MissingEntry
	2445	if not self.is_incomplete:
	2446	raise self.IndexError()
	2447	if entry is self.MissingEntry:
	2448	raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
	2449	return entry
	2450	else:
	2451	def get_entry(i):
	2452	try:
	2453	return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
	2454	except (LazyList.IndexError, PagedList.IndexError):
	2455	raise self.IndexError()
	2456	return get_entry
	2457
	2458	def __getitem__(self, idx):
	2459	if isinstance(idx, int):
	2460	idx = slice(idx, idx)
	2461
	2462	# NB: PlaylistEntries[1:10] => (0, 1, ... 9)
	2463	step = 1 if idx.step is None else idx.step
	2464	if idx.start is None:
	2465	start = 0 if step > 0 else len(self) - 1
	2466	else:
	2467	start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
	2468
	2469	# NB: Do not call len(self) when idx == [:]
	2470	if idx.stop is None:
	2471	stop = 0 if step < 0 else float('inf')
	2472	else:
	2473	stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
	2474	stop += [-1, 1][step > 0]
	2475
	2476	for i in frange(start, stop, step):
	2477	if i < 0:
	2478	continue
	2479	try:
	2480	entry = self._getter(i)
	2481	except self.IndexError:
	2482	self.is_exhausted = True
	2483	if step > 0:
	2484	break
	2485	continue
	2486	yield i + 1, entry
	2487
	2488	def __len__(self):
	2489	return len(tuple(self[:]))
	2490
	2491	class IndexError(IndexError):
	2492	pass
	2493
	2494
	2495	def uppercase_escape(s):
	2496	unicode_escape = codecs.getdecoder('unicode_escape')
	2497	return re.sub(
	2498	r'\\U[0-9a-fA-F]{8}',
	2499	lambda m: unicode_escape(m.group(0))[0],
	2500	s)
	2501
	2502
	2503	def lowercase_escape(s):
	2504	unicode_escape = codecs.getdecoder('unicode_escape')
	2505	return re.sub(
	2506	r'\\u[0-9a-fA-F]{4}',
	2507	lambda m: unicode_escape(m.group(0))[0],
	2508	s)
	2509
	2510
	2511	def parse_qs(url, **kwargs):
	2512	return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
	2513
	2514
	2515	def read_batch_urls(batch_fd):
	2516	def fixup(url):
	2517	if not isinstance(url, str):
	2518	url = url.decode('utf-8', 'replace')
	2519	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2520	for bom in BOM_UTF8:
	2521	if url.startswith(bom):
	2522	url = url[len(bom):]
	2523	url = url.lstrip()
	2524	if not url or url.startswith(('#', ';', ']')):
	2525	return False
	2526	# "#" cannot be stripped out since it is part of the URI
	2527	# However, it can be safely stripped out if following a whitespace
	2528	return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
	2529
	2530	with contextlib.closing(batch_fd) as fd:
	2531	return [url for url in map(fixup, fd) if url]
	2532
	2533
	2534	def urlencode_postdata(args, *kargs):
	2535	return urllib.parse.urlencode(args, *kargs).encode('ascii')
	2536
	2537
	2538	def update_url(url, , query_update=None, *kwargs):
	2539	"""Replace URL components specified by kwargs
	2540	@param url str or parse url tuple
	2541	@param query_update update query
	2542	@returns str
	2543	"""
	2544	if isinstance(url, str):
	2545	if not kwargs and not query_update:
	2546	return url
	2547	else:
	2548	url = urllib.parse.urlparse(url)
	2549	if query_update:
	2550	assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
	2551	kwargs['query'] = urllib.parse.urlencode({
	2552	**urllib.parse.parse_qs(url.query),
	2553	**query_update
	2554	}, True)
	2555	return urllib.parse.urlunparse(url._replace(**kwargs))
	2556
	2557
	2558	def update_url_query(url, query):
	2559	return update_url(url, query_update=query)
	2560
	2561
	2562	def _multipart_encode_impl(data, boundary):
	2563	content_type = 'multipart/form-data; boundary=%s' % boundary
	2564
	2565	out = b''
	2566	for k, v in data.items():
	2567	out += b'--' + boundary.encode('ascii') + b'\r\n'
	2568	if isinstance(k, str):
	2569	k = k.encode()
	2570	if isinstance(v, str):
	2571	v = v.encode()
	2572	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	2573	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	2574	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	2575	if boundary.encode('ascii') in content:
	2576	raise ValueError('Boundary overlaps with data')
	2577	out += content
	2578
	2579	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	2580
	2581	return out, content_type
	2582
	2583
	2584	def multipart_encode(data, boundary=None):
	2585	'''
	2586	Encode a dict to RFC 7578-compliant form-data
	2587
	2588	data:
	2589	A dict where keys and values can be either Unicode or bytes-like
	2590	objects.
	2591	boundary:
	2592	If specified a Unicode object, it's used as the boundary. Otherwise
	2593	a random boundary is generated.
	2594
	2595	Reference: https://tools.ietf.org/html/rfc7578
	2596	'''
	2597	has_specified_boundary = boundary is not None
	2598
	2599	while True:
	2600	if boundary is None:
	2601	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	2602
	2603	try:
	2604	out, content_type = _multipart_encode_impl(data, boundary)
	2605	break
	2606	except ValueError:
	2607	if has_specified_boundary:
	2608	raise
	2609	boundary = None
	2610
	2611	return out, content_type
	2612
	2613
	2614	def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
	2615	if blocked_types is NO_DEFAULT:
	2616	blocked_types = (str, bytes, collections.abc.Mapping)
	2617	return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
	2618
	2619
	2620	def variadic(x, allowed_types=NO_DEFAULT):
	2621	if not isinstance(allowed_types, (tuple, type)):
	2622	deprecation_warning('allowed_types should be a tuple or a type')
	2623	allowed_types = tuple(allowed_types)
	2624	return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
	2625
	2626
	2627	def try_call(*funcs, expected_type=None, args=[], kwargs={}):
	2628	for f in funcs:
	2629	try:
	2630	val = f(args, *kwargs)
	2631	except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
	2632	pass
	2633	else:
	2634	if expected_type is None or isinstance(val, expected_type):
	2635	return val
	2636
	2637
	2638	def try_get(src, getter, expected_type=None):
	2639	return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
	2640
	2641
	2642	def filter_dict(dct, cndn=lambda _, v: v is not None):
	2643	return {k: v for k, v in dct.items() if cndn(k, v)}
	2644
	2645
	2646	def merge_dicts(*dicts):
	2647	merged = {}
	2648	for a_dict in dicts:
	2649	for k, v in a_dict.items():
	2650	if (v is not None and k not in merged
	2651	or isinstance(v, str) and merged[k] == ''):
	2652	merged[k] = v
	2653	return merged
	2654
	2655
	2656	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	2657	return string if isinstance(string, str) else str(string, encoding, errors)
	2658
	2659
	2660	US_RATINGS = {
	2661	'G': 0,
	2662	'PG': 10,
	2663	'PG-13': 13,
	2664	'R': 16,
	2665	'NC': 18,
	2666	}
	2667
	2668
	2669	TV_PARENTAL_GUIDELINES = {
	2670	'TV-Y': 0,
	2671	'TV-Y7': 7,
	2672	'TV-G': 0,
	2673	'TV-PG': 0,
	2674	'TV-14': 14,
	2675	'TV-MA': 17,
	2676	}
	2677
	2678
	2679	def parse_age_limit(s):
	2680	# isinstance(False, int) is True. So type() must be used instead
	2681	if type(s) is int: # noqa: E721
	2682	return s if 0 <= s <= 21 else None
	2683	elif not isinstance(s, str):
	2684	return None
	2685	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	2686	if m:
	2687	return int(m.group('age'))
	2688	s = s.upper()
	2689	if s in US_RATINGS:
	2690	return US_RATINGS[s]
	2691	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	2692	if m:
	2693	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	2694	return None
	2695
	2696
	2697	def strip_jsonp(code):
	2698	return re.sub(
	2699	r'''(?sx)^
	2700	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	2701	(?:\s&&\s(?P=func_name))?
	2702	\s$\s(?P<callback_data>.*)$;?
	2703	\s?(?://[^\n])*$''',
	2704	r'\g<callback_data>', code)
	2705
	2706
	2707	def js_to_json(code, vars={}, *, strict=False):
	2708	# vars is a dict of var, val pairs to substitute
	2709	STRING_QUOTES = '\'"`'
	2710	STRING_RE = '\|'.join(rf'{q}(?:\\.\|[^\\{q}])*{q}' for q in STRING_QUOTES)
	2711	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	2712	SKIP_RE = fr'\s(?:{COMMENT_RE})?\s'
	2713	INTEGER_TABLE = (
	2714	(fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
	2715	(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
	2716	)
	2717
	2718	def process_escape(match):
	2719	JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
	2720	escape = match.group(1) or match.group(2)
	2721
	2722	return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
	2723	else R'\u00' if escape == 'x'
	2724	else '' if escape == '\n'
	2725	else escape)
	2726
	2727	def template_substitute(match):
	2728	evaluated = js_to_json(match.group(1), vars, strict=strict)
	2729	if evaluated[0] == '"':
	2730	return json.loads(evaluated)
	2731	return evaluated
	2732
	2733	def fix_kv(m):
	2734	v = m.group(0)
	2735	if v in ('true', 'false', 'null'):
	2736	return v
	2737	elif v in ('undefined', 'void 0'):
	2738	return 'null'
	2739	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	2740	return ''
	2741
	2742	if v[0] in STRING_QUOTES:
	2743	v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
	2744	escaped = re.sub(r'(?s)(")\|\\(.)', process_escape, v)
	2745	return f'"{escaped}"'
	2746
	2747	for regex, base in INTEGER_TABLE:
	2748	im = re.match(regex, v)
	2749	if im:
	2750	i = int(im.group(1), base)
	2751	return f'"{i}":' if v.endswith(':') else str(i)
	2752
	2753	if v in vars:
	2754	try:
	2755	if not strict:
	2756	json.loads(vars[v])
	2757	except json.JSONDecodeError:
	2758	return json.dumps(vars[v])
	2759	else:
	2760	return vars[v]
	2761
	2762	if not strict:
	2763	return f'"{v}"'
	2764
	2765	raise ValueError(f'Unknown value: {v}')
	2766
	2767	def create_map(mobj):
	2768	return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
	2769
	2770	code = re.sub(r'(?:new\s+)?Array$(.*?)$', r'[\g<1>]', code)
	2771	code = re.sub(r'new Map$(\[.*?\])?$', create_map, code)
	2772	if not strict:
	2773	code = re.sub(rf'new Date$({STRING_RE})$', r'\g<1>', code)
	2774	code = re.sub(r'new \w+$(.*?)$', lambda m: json.dumps(m.group(0)), code)
	2775	code = re.sub(r'parseInt$[^\d]+(\d+)[^\d]+$', r'\1', code)
	2776	code = re.sub(r'$function\([^)]$\s\{[^}]\}\s\)\s$\s(["\'][^)]["\'])\s$', r'\1', code)
	2777
	2778	return re.sub(rf'''(?sx)
	2779	{STRING_RE}\|
	2780	{COMMENT_RE}\|,(?={SKIP_RE}[\]}}])\|
	2781	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	2782	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{SKIP_RE}:)?\|
	2783	[0-9]+(?={SKIP_RE}:)\|
	2784	!+
	2785	''', fix_kv, code)
	2786
	2787
	2788	def qualities(quality_ids):
	2789	""" Get a numeric quality value out of a list of possible values """
	2790	def q(qid):
	2791	try:
	2792	return quality_ids.index(qid)
	2793	except ValueError:
	2794	return -1
	2795	return q
	2796
	2797
	2798	POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
	2799
	2800
	2801	DEFAULT_OUTTMPL = {
	2802	'default': '%(title)s [%(id)s].%(ext)s',
	2803	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	2804	}
	2805	OUTTMPL_TYPES = {
	2806	'chapter': None,
	2807	'subtitle': None,
	2808	'thumbnail': None,
	2809	'description': 'description',
	2810	'annotation': 'annotations.xml',
	2811	'infojson': 'info.json',
	2812	'link': None,
	2813	'pl_video': None,
	2814	'pl_thumbnail': None,
	2815	'pl_description': 'description',
	2816	'pl_infojson': 'info.json',
	2817	}
	2818
	2819	# As of [1] format syntax is:
	2820	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	2821	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	2822	STR_FORMAT_RE_TMPL = r'''(?x)
	2823	(?<!%)(?P<prefix>(?:%%)*)
	2824	%
	2825	(?P<has_key>$(?P<key>{0})$)?
	2826	(?P<format>
	2827	(?P<conversion>[#0\-+ ]+)?
	2828	(?P<min_width>\d+)?
	2829	(?P<precision>\.\d+)?
	2830	(?P<len_mod>[hlL])? # unused in python
	2831	{1} # conversion type
	2832	)
	2833	'''
	2834
	2835
	2836	STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
	2837
	2838
	2839	def limit_length(s, length):
	2840	""" Add ellipses to overly long strings """
	2841	if s is None:
	2842	return None
	2843	ELLIPSES = '...'
	2844	if len(s) > length:
	2845	return s[:length - len(ELLIPSES)] + ELLIPSES
	2846	return s
	2847
	2848
	2849	def version_tuple(v):
	2850	return tuple(int(e) for e in re.split(r'[-.]', v))
	2851
	2852
	2853	def is_outdated_version(version, limit, assume_new=True):
	2854	if not version:
	2855	return not assume_new
	2856	try:
	2857	return version_tuple(version) < version_tuple(limit)
	2858	except ValueError:
	2859	return not assume_new
	2860
	2861
	2862	def ytdl_is_updateable():
	2863	""" Returns if yt-dlp can be updated with -U """
	2864
	2865	from ..update import is_non_updateable
	2866
	2867	return not is_non_updateable()
	2868
	2869
	2870	def args_to_str(args):
	2871	# Get a short string representation for a subprocess command
	2872	return shell_quote(args)
	2873
	2874
	2875	def error_to_str(err):
	2876	return f'{type(err).__name__}: {err}'
	2877
	2878
	2879	def mimetype2ext(mt, default=NO_DEFAULT):
	2880	if not isinstance(mt, str):
	2881	if default is not NO_DEFAULT:
	2882	return default
	2883	return None
	2884
	2885	MAP = {
	2886	# video
	2887	'3gpp': '3gp',
	2888	'mp2t': 'ts',
	2889	'mp4': 'mp4',
	2890	'mpeg': 'mpeg',
	2891	'mpegurl': 'm3u8',
	2892	'quicktime': 'mov',
	2893	'webm': 'webm',
	2894	'vp9': 'vp9',
	2895	'video/ogg': 'ogv',
	2896	'x-flv': 'flv',
	2897	'x-m4v': 'm4v',
	2898	'x-matroska': 'mkv',
	2899	'x-mng': 'mng',
	2900	'x-mp4-fragmented': 'mp4',
	2901	'x-ms-asf': 'asf',
	2902	'x-ms-wmv': 'wmv',
	2903	'x-msvideo': 'avi',
	2904
	2905	# application (streaming playlists)
	2906	'dash+xml': 'mpd',
	2907	'f4m+xml': 'f4m',
	2908	'hds+xml': 'f4m',
	2909	'vnd.apple.mpegurl': 'm3u8',
	2910	'vnd.ms-sstr+xml': 'ism',
	2911	'x-mpegurl': 'm3u8',
	2912
	2913	# audio
	2914	'audio/mp4': 'm4a',
	2915	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
	2916	# Using .mp3 as it's the most popular one
	2917	'audio/mpeg': 'mp3',
	2918	'audio/webm': 'webm',
	2919	'audio/x-matroska': 'mka',
	2920	'audio/x-mpegurl': 'm3u',
	2921	'midi': 'mid',
	2922	'ogg': 'ogg',
	2923	'wav': 'wav',
	2924	'wave': 'wav',
	2925	'x-aac': 'aac',
	2926	'x-flac': 'flac',
	2927	'x-m4a': 'm4a',
	2928	'x-realaudio': 'ra',
	2929	'x-wav': 'wav',
	2930
	2931	# image
	2932	'avif': 'avif',
	2933	'bmp': 'bmp',
	2934	'gif': 'gif',
	2935	'jpeg': 'jpg',
	2936	'png': 'png',
	2937	'svg+xml': 'svg',
	2938	'tiff': 'tif',
	2939	'vnd.wap.wbmp': 'wbmp',
	2940	'webp': 'webp',
	2941	'x-icon': 'ico',
	2942	'x-jng': 'jng',
	2943	'x-ms-bmp': 'bmp',
	2944
	2945	# caption
	2946	'filmstrip+json': 'fs',
	2947	'smptett+xml': 'tt',
	2948	'ttaf+xml': 'dfxp',
	2949	'ttml+xml': 'ttml',
	2950	'x-ms-sami': 'sami',
	2951
	2952	# misc
	2953	'gzip': 'gz',
	2954	'json': 'json',
	2955	'xml': 'xml',
	2956	'zip': 'zip',
	2957	}
	2958
	2959	mimetype = mt.partition(';')[0].strip().lower()
	2960	_, _, subtype = mimetype.rpartition('/')
	2961
	2962	ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
	2963	if ext:
	2964	return ext
	2965	elif default is not NO_DEFAULT:
	2966	return default
	2967	return subtype.replace('+', '.')
	2968
	2969
	2970	def ext2mimetype(ext_or_url):
	2971	if not ext_or_url:
	2972	return None
	2973	if '.' not in ext_or_url:
	2974	ext_or_url = f'file.{ext_or_url}'
	2975	return mimetypes.guess_type(ext_or_url)[0]
	2976
	2977
	2978	def parse_codecs(codecs_str):
	2979	# http://tools.ietf.org/html/rfc6381
	2980	if not codecs_str:
	2981	return {}
	2982	split_codecs = list(filter(None, map(
	2983	str.strip, codecs_str.strip().strip(',').split(','))))
	2984	vcodec, acodec, scodec, hdr = None, None, None, None
	2985	for full_codec in split_codecs:
	2986	parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
	2987	if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	2988	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	2989	if vcodec:
	2990	continue
	2991	vcodec = full_codec
	2992	if parts[0] in ('dvh1', 'dvhe'):
	2993	hdr = 'DV'
	2994	elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
	2995	hdr = 'HDR10'
	2996	elif parts[:2] == ['vp9', '2']:
	2997	hdr = 'HDR10'
	2998	elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
	2999	'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3000	acodec = acodec or full_codec
	3001	elif parts[0] in ('stpp', 'wvtt'):
	3002	scodec = scodec or full_codec
	3003	else:
	3004	write_string(f'WARNING: Unknown codec {full_codec}\n')
	3005	if vcodec or acodec or scodec:
	3006	return {
	3007	'vcodec': vcodec or 'none',
	3008	'acodec': acodec or 'none',
	3009	'dynamic_range': hdr,
	3010	**({'scodec': scodec} if scodec is not None else {}),
	3011	}
	3012	elif len(split_codecs) == 2:
	3013	return {
	3014	'vcodec': split_codecs[0],
	3015	'acodec': split_codecs[1],
	3016	}
	3017	return {}
	3018
	3019
	3020	def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
	3021	assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
	3022
	3023	allow_mkv = not preferences or 'mkv' in preferences
	3024
	3025	if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
	3026	return 'mkv' # TODO: any other format allows this?
	3027
	3028	# TODO: All codecs supported by parse_codecs isn't handled here
	3029	COMPATIBLE_CODECS = {
	3030	'mp4': {
	3031	'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
	3032	'h264', 'aacl', 'ec-3', # Set in ISM
	3033	},
	3034	'webm': {
	3035	'av1', 'vp9', 'vp8', 'opus', 'vrbs',
	3036	'vp9x', 'vp8x', # in the webm spec
	3037	},
	3038	}
	3039
	3040	sanitize_codec = functools.partial(
	3041	try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
	3042	vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
	3043
	3044	for ext in preferences or COMPATIBLE_CODECS.keys():
	3045	codec_set = COMPATIBLE_CODECS.get(ext, set())
	3046	if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
	3047	return ext
	3048
	3049	COMPATIBLE_EXTS = (
	3050	{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
	3051	{'webm', 'weba'},
	3052	)
	3053	for ext in preferences or vexts:
	3054	current_exts = {ext, vexts, aexts}
	3055	if ext == 'mkv' or current_exts == {ext} or any(
	3056	ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
	3057	return ext
	3058	return 'mkv' if allow_mkv else preferences[-1]
	3059
	3060
	3061	def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
	3062	getheader = url_handle.headers.get
	3063
	3064	cd = getheader('Content-Disposition')
	3065	if cd:
	3066	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3067	if m:
	3068	e = determine_ext(m.group('filename'), default_ext=None)
	3069	if e:
	3070	return e
	3071
	3072	meta_ext = getheader('x-amz-meta-name')
	3073	if meta_ext:
	3074	e = meta_ext.rpartition('.')[2]
	3075	if e:
	3076	return e
	3077
	3078	return mimetype2ext(getheader('Content-Type'), default=default)
	3079
	3080
	3081	def encode_data_uri(data, mime_type):
	3082	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3083
	3084
	3085	def age_restricted(content_limit, age_limit):
	3086	""" Returns True iff the content should be blocked """
	3087
	3088	if age_limit is None: # No limit set
	3089	return False
	3090	if content_limit is None:
	3091	return False # Content available for everyone
	3092	return age_limit < content_limit
	3093
	3094
	3095	# List of known byte-order-marks (BOM)
	3096	BOMS = [
	3097	(b'\xef\xbb\xbf', 'utf-8'),
	3098	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3099	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3100	(b'\xff\xfe', 'utf-16-le'),
	3101	(b'\xfe\xff', 'utf-16-be'),
	3102	]
	3103
	3104
	3105	def is_html(first_bytes):
	3106	""" Detect whether a file contains HTML by examining its first bytes. """
	3107
	3108	encoding = 'utf-8'
	3109	for bom, enc in BOMS:
	3110	while first_bytes.startswith(bom):
	3111	encoding, first_bytes = enc, first_bytes[len(bom):]
	3112
	3113	return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
	3114
	3115
	3116	def determine_protocol(info_dict):
	3117	protocol = info_dict.get('protocol')
	3118	if protocol is not None:
	3119	return protocol
	3120
	3121	url = sanitize_url(info_dict['url'])
	3122	if url.startswith('rtmp'):
	3123	return 'rtmp'
	3124	elif url.startswith('mms'):
	3125	return 'mms'
	3126	elif url.startswith('rtsp'):
	3127	return 'rtsp'
	3128
	3129	ext = determine_ext(url)
	3130	if ext == 'm3u8':
	3131	return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
	3132	elif ext == 'f4m':
	3133	return 'f4m'
	3134
	3135	return urllib.parse.urlparse(url).scheme
	3136
	3137
	3138	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3139	""" Render a list of rows, each as a list of values.
	3140	Text after a \t will be right aligned """
	3141	def width(string):
	3142	return len(remove_terminal_sequences(string).replace('\t', ''))
	3143
	3144	def get_max_lens(table):
	3145	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3146
	3147	def filter_using_list(row, filterArray):
	3148	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3149
	3150	max_lens = get_max_lens(data) if hide_empty else []
	3151	header_row = filter_using_list(header_row, max_lens)
	3152	data = [filter_using_list(row, max_lens) for row in data]
	3153
	3154	table = [header_row] + data
	3155	max_lens = get_max_lens(table)
	3156	extra_gap += 1
	3157	if delim:
	3158	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3159	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
	3160	for row in table:
	3161	for pos, text in enumerate(map(str, row)):
	3162	if '\t' in text:
	3163	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3164	else:
	3165	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3166	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3167	return ret
	3168
	3169
	3170	def _match_one(filter_part, dct, incomplete):
	3171	# TODO: Generalize code with YoutubeDL._build_format_filter
	3172	STRING_OPERATORS = {
	3173	'*=': operator.contains,
	3174	'^=': lambda attr, value: attr.startswith(value),
	3175	'$=': lambda attr, value: attr.endswith(value),
	3176	'~=': lambda attr, value: re.search(value, attr),
	3177	}
	3178	COMPARISON_OPERATORS = {
	3179	**STRING_OPERATORS,
	3180	'<=': operator.le, # "<=" must be defined above "<"
	3181	'<': operator.lt,
	3182	'>=': operator.ge,
	3183	'>': operator.gt,
	3184	'=': operator.eq,
	3185	}
	3186
	3187	if isinstance(incomplete, bool):
	3188	is_incomplete = lambda _: incomplete
	3189	else:
	3190	is_incomplete = lambda k: k in incomplete
	3191
	3192	operator_rex = re.compile(r'''(?x)
	3193	(?P<key>[a-z_]+)
	3194	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3195	(?:
	3196	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3197	(?P<strval>.+?)
	3198	)
	3199	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3200	m = operator_rex.fullmatch(filter_part.strip())
	3201	if m:
	3202	m = m.groupdict()
	3203	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3204	if m['negation']:
	3205	op = lambda attr, value: not unnegated_op(attr, value)
	3206	else:
	3207	op = unnegated_op
	3208	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3209	if m['quote']:
	3210	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3211	actual_value = dct.get(m['key'])
	3212	numeric_comparison = None
	3213	if isinstance(actual_value, (int, float)):
	3214	# If the original field is a string and matching comparisonvalue is
	3215	# a number we should respect the origin of the original field
	3216	# and process comparison value as a string (see
	3217	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3218	try:
	3219	numeric_comparison = int(comparison_value)
	3220	except ValueError:
	3221	numeric_comparison = parse_filesize(comparison_value)
	3222	if numeric_comparison is None:
	3223	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3224	if numeric_comparison is None:
	3225	numeric_comparison = parse_duration(comparison_value)
	3226	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3227	raise ValueError('Operator %s only supports string values!' % m['op'])
	3228	if actual_value is None:
	3229	return is_incomplete(m['key']) or m['none_inclusive']
	3230	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3231
	3232	UNARY_OPERATORS = {
	3233	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3234	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3235	}
	3236	operator_rex = re.compile(r'''(?x)
	3237	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3238	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3239	m = operator_rex.fullmatch(filter_part.strip())
	3240	if m:
	3241	op = UNARY_OPERATORS[m.group('op')]
	3242	actual_value = dct.get(m.group('key'))
	3243	if is_incomplete(m.group('key')) and actual_value is None:
	3244	return True
	3245	return op(actual_value)
	3246
	3247	raise ValueError('Invalid filter part %r' % filter_part)
	3248
	3249
	3250	def match_str(filter_str, dct, incomplete=False):
	3251	""" Filter a dictionary with a simple string syntax.
	3252	@returns Whether the filter passes
	3253	@param incomplete Set of keys that is expected to be missing from dct.
	3254	Can be True/False to indicate all/none of the keys may be missing.
	3255	All conditions on incomplete keys pass if the key is missing
	3256	"""
	3257	return all(
	3258	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3259	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3260
	3261
	3262	def match_filter_func(filters, breaking_filters=None):
	3263	if not filters and not breaking_filters:
	3264	return None
	3265	repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'
	3266
	3267	breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
	3268	filters = set(variadic(filters or []))
	3269
	3270	interactive = '-' in filters
	3271	if interactive:
	3272	filters.remove('-')
	3273
	3274	@function_with_repr.set_repr(repr_)
	3275	def _match_func(info_dict, incomplete=False):
	3276	ret = breaking_filters(info_dict, incomplete)
	3277	if ret is not None:
	3278	raise RejectedVideoReached(ret)
	3279
	3280	if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
	3281	return NO_DEFAULT if interactive and not incomplete else None
	3282	else:
	3283	video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
	3284	filter_str = ') \| ('.join(map(str.strip, filters))
	3285	return f'{video_title} does not pass filter ({filter_str}), skipping ..'
	3286	return _match_func
	3287
	3288
	3289	class download_range_func:
	3290	def __init__(self, chapters, ranges, from_info=False):
	3291	self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
	3292
	3293	def __call__(self, info_dict, ydl):
	3294
	3295	warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
	3296	else 'Cannot match chapters since chapter information is unavailable')
	3297	for regex in self.chapters or []:
	3298	for i, chapter in enumerate(info_dict.get('chapters') or []):
	3299	if re.search(regex, chapter['title']):
	3300	warning = None
	3301	yield {**chapter, 'index': i}
	3302	if self.chapters and warning:
	3303	ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
	3304
	3305	for start, end in self.ranges or []:
	3306	yield {
	3307	'start_time': self._handle_negative_timestamp(start, info_dict),
	3308	'end_time': self._handle_negative_timestamp(end, info_dict),
	3309	}
	3310
	3311	if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
	3312	yield {
	3313	'start_time': info_dict.get('start_time') or 0,
	3314	'end_time': info_dict.get('end_time') or float('inf'),
	3315	}
	3316	elif not self.ranges and not self.chapters:
	3317	yield {}
	3318
	3319	@staticmethod
	3320	def _handle_negative_timestamp(time, info):
	3321	return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
	3322
	3323	def __eq__(self, other):
	3324	return (isinstance(other, download_range_func)
	3325	and self.chapters == other.chapters and self.ranges == other.ranges)
	3326
	3327	def __repr__(self):
	3328	return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
	3329
	3330
	3331	def parse_dfxp_time_expr(time_expr):
	3332	if not time_expr:
	3333	return
	3334
	3335	mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
	3336	if mobj:
	3337	return float(mobj.group('time_offset'))
	3338
	3339	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3340	if mobj:
	3341	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3342
	3343
	3344	def srt_subtitles_timecode(seconds):
	3345	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3346
	3347
	3348	def ass_subtitles_timecode(seconds):
	3349	time = timetuple_from_msec(seconds * 1000)
	3350	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3351
	3352
	3353	def dfxp2srt(dfxp_data):
	3354	'''
	3355	@param dfxp_data A bytes-like object containing DFXP data
	3356	@returns A unicode object containing converted SRT data
	3357	'''
	3358	LEGACY_NAMESPACES = (
	3359	(b'http://www.w3.org/ns/ttml', [
	3360	b'http://www.w3.org/2004/11/ttaf1',
	3361	b'http://www.w3.org/2006/04/ttaf1',
	3362	b'http://www.w3.org/2006/10/ttaf1',
	3363	]),
	3364	(b'http://www.w3.org/ns/ttml#styling', [
	3365	b'http://www.w3.org/ns/ttml#style',
	3366	]),
	3367	)
	3368
	3369	SUPPORTED_STYLING = [
	3370	'color',
	3371	'fontFamily',
	3372	'fontSize',
	3373	'fontStyle',
	3374	'fontWeight',
	3375	'textDecoration'
	3376	]
	3377
	3378	_x = functools.partial(xpath_with_ns, ns_map={
	3379	'xml': 'http://www.w3.org/XML/1998/namespace',
	3380	'ttml': 'http://www.w3.org/ns/ttml',
	3381	'tts': 'http://www.w3.org/ns/ttml#styling',
	3382	})
	3383
	3384	styles = {}
	3385	default_style = {}
	3386
	3387	class TTMLPElementParser:
	3388	_out = ''
	3389	_unclosed_elements = []
	3390	_applied_styles = []
	3391
	3392	def start(self, tag, attrib):
	3393	if tag in (_x('ttml:br'), 'br'):
	3394	self._out += '\n'
	3395	else:
	3396	unclosed_elements = []
	3397	style = {}
	3398	element_style_id = attrib.get('style')
	3399	if default_style:
	3400	style.update(default_style)
	3401	if element_style_id:
	3402	style.update(styles.get(element_style_id, {}))
	3403	for prop in SUPPORTED_STYLING:
	3404	prop_val = attrib.get(_x('tts:' + prop))
	3405	if prop_val:
	3406	style[prop] = prop_val
	3407	if style:
	3408	font = ''
	3409	for k, v in sorted(style.items()):
	3410	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3411	continue
	3412	if k == 'color':
	3413	font += ' color="%s"' % v
	3414	elif k == 'fontSize':
	3415	font += ' size="%s"' % v
	3416	elif k == 'fontFamily':
	3417	font += ' face="%s"' % v
	3418	elif k == 'fontWeight' and v == 'bold':
	3419	self._out += '<b>'
	3420	unclosed_elements.append('b')
	3421	elif k == 'fontStyle' and v == 'italic':
	3422	self._out += '<i>'
	3423	unclosed_elements.append('i')
	3424	elif k == 'textDecoration' and v == 'underline':
	3425	self._out += '<u>'
	3426	unclosed_elements.append('u')
	3427	if font:
	3428	self._out += '<font' + font + '>'
	3429	unclosed_elements.append('font')
	3430	applied_style = {}
	3431	if self._applied_styles:
	3432	applied_style.update(self._applied_styles[-1])
	3433	applied_style.update(style)
	3434	self._applied_styles.append(applied_style)
	3435	self._unclosed_elements.append(unclosed_elements)
	3436
	3437	def end(self, tag):
	3438	if tag not in (_x('ttml:br'), 'br'):
	3439	unclosed_elements = self._unclosed_elements.pop()
	3440	for element in reversed(unclosed_elements):
	3441	self._out += '</%s>' % element
	3442	if unclosed_elements and self._applied_styles:
	3443	self._applied_styles.pop()
	3444
	3445	def data(self, data):
	3446	self._out += data
	3447
	3448	def close(self):
	3449	return self._out.strip()
	3450
	3451	# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
	3452	# This will not trigger false positives since only UTF-8 text is being replaced
	3453	dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
	3454
	3455	def parse_node(node):
	3456	target = TTMLPElementParser()
	3457	parser = xml.etree.ElementTree.XMLParser(target=target)
	3458	parser.feed(xml.etree.ElementTree.tostring(node))
	3459	return parser.close()
	3460
	3461	for k, v in LEGACY_NAMESPACES:
	3462	for ns in v:
	3463	dfxp_data = dfxp_data.replace(ns, k)
	3464
	3465	dfxp = compat_etree_fromstring(dfxp_data)
	3466	out = []
	3467	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	3468
	3469	if not paras:
	3470	raise ValueError('Invalid dfxp/TTML subtitle')
	3471
	3472	repeat = False
	3473	while True:
	3474	for style in dfxp.findall(_x('.//ttml:style')):
	3475	style_id = style.get('id') or style.get(_x('xml:id'))
	3476	if not style_id:
	3477	continue
	3478	parent_style_id = style.get('style')
	3479	if parent_style_id:
	3480	if parent_style_id not in styles:
	3481	repeat = True
	3482	continue
	3483	styles[style_id] = styles[parent_style_id].copy()
	3484	for prop in SUPPORTED_STYLING:
	3485	prop_val = style.get(_x('tts:' + prop))
	3486	if prop_val:
	3487	styles.setdefault(style_id, {})[prop] = prop_val
	3488	if repeat:
	3489	repeat = False
	3490	else:
	3491	break
	3492
	3493	for p in ('body', 'div'):
	3494	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3495	if ele is None:
	3496	continue
	3497	style = styles.get(ele.get('style'))
	3498	if not style:
	3499	continue
	3500	default_style.update(style)
	3501
	3502	for para, index in zip(paras, itertools.count(1)):
	3503	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	3504	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	3505	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3506	if begin_time is None:
	3507	continue
	3508	if not end_time:
	3509	if not dur:
	3510	continue
	3511	end_time = begin_time + dur
	3512	out.append('%d\n%s --> %s\n%s\n\n' % (
	3513	index,
	3514	srt_subtitles_timecode(begin_time),
	3515	srt_subtitles_timecode(end_time),
	3516	parse_node(para)))
	3517
	3518	return ''.join(out)
	3519
	3520
	3521	def cli_option(params, command_option, param, separator=None):
	3522	param = params.get(param)
	3523	return ([] if param is None
	3524	else [command_option, str(param)] if separator is None
	3525	else [f'{command_option}{separator}{param}'])
	3526
	3527
	3528	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3529	param = params.get(param)
	3530	assert param in (True, False, None)
	3531	return cli_option({True: true_value, False: false_value}, command_option, param, separator)
	3532
	3533
	3534	def cli_valueless_option(params, command_option, param, expected_value=True):
	3535	return [command_option] if params.get(param) == expected_value else []
	3536
	3537
	3538	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	3539	if isinstance(argdict, (list, tuple)): # for backward compatibility
	3540	if use_compat:
	3541	return argdict
	3542	else:
	3543	argdict = None
	3544	if argdict is None:
	3545	return default
	3546	assert isinstance(argdict, dict)
	3547
	3548	assert isinstance(keys, (list, tuple))
	3549	for key_list in keys:
	3550	arg_list = list(filter(
	3551	lambda x: x is not None,
	3552	[argdict.get(key.lower()) for key in variadic(key_list)]))
	3553	if arg_list:
	3554	return [arg for args in arg_list for arg in args]
	3555	return default
	3556
	3557
	3558	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3559	main_key, exe = main_key.lower(), exe.lower()
	3560	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3561	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3562	if root_key in keys:
	3563	if main_key != exe:
	3564	keys.append((main_key, exe))
	3565	keys.append('default')
	3566	else:
	3567	use_compat = False
	3568	return cli_configuration_args(argdict, keys, default, use_compat)
	3569
	3570
	3571	class ISO639Utils:
	3572	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3573	_lang_map = {
	3574	'aa': 'aar',
	3575	'ab': 'abk',
	3576	'ae': 'ave',
	3577	'af': 'afr',
	3578	'ak': 'aka',
	3579	'am': 'amh',
	3580	'an': 'arg',
	3581	'ar': 'ara',
	3582	'as': 'asm',
	3583	'av': 'ava',
	3584	'ay': 'aym',
	3585	'az': 'aze',
	3586	'ba': 'bak',
	3587	'be': 'bel',
	3588	'bg': 'bul',
	3589	'bh': 'bih',
	3590	'bi': 'bis',
	3591	'bm': 'bam',
	3592	'bn': 'ben',
	3593	'bo': 'bod',
	3594	'br': 'bre',
	3595	'bs': 'bos',
	3596	'ca': 'cat',
	3597	'ce': 'che',
	3598	'ch': 'cha',
	3599	'co': 'cos',
	3600	'cr': 'cre',
	3601	'cs': 'ces',
	3602	'cu': 'chu',
	3603	'cv': 'chv',
	3604	'cy': 'cym',
	3605	'da': 'dan',
	3606	'de': 'deu',
	3607	'dv': 'div',
	3608	'dz': 'dzo',
	3609	'ee': 'ewe',
	3610	'el': 'ell',
	3611	'en': 'eng',
	3612	'eo': 'epo',
	3613	'es': 'spa',
	3614	'et': 'est',
	3615	'eu': 'eus',
	3616	'fa': 'fas',
	3617	'ff': 'ful',
	3618	'fi': 'fin',
	3619	'fj': 'fij',
	3620	'fo': 'fao',
	3621	'fr': 'fra',
	3622	'fy': 'fry',
	3623	'ga': 'gle',
	3624	'gd': 'gla',
	3625	'gl': 'glg',
	3626	'gn': 'grn',
	3627	'gu': 'guj',
	3628	'gv': 'glv',
	3629	'ha': 'hau',
	3630	'he': 'heb',
	3631	'iw': 'heb', # Replaced by he in 1989 revision
	3632	'hi': 'hin',
	3633	'ho': 'hmo',
	3634	'hr': 'hrv',
	3635	'ht': 'hat',
	3636	'hu': 'hun',
	3637	'hy': 'hye',
	3638	'hz': 'her',
	3639	'ia': 'ina',
	3640	'id': 'ind',
	3641	'in': 'ind', # Replaced by id in 1989 revision
	3642	'ie': 'ile',
	3643	'ig': 'ibo',
	3644	'ii': 'iii',
	3645	'ik': 'ipk',
	3646	'io': 'ido',
	3647	'is': 'isl',
	3648	'it': 'ita',
	3649	'iu': 'iku',
	3650	'ja': 'jpn',
	3651	'jv': 'jav',
	3652	'ka': 'kat',
	3653	'kg': 'kon',
	3654	'ki': 'kik',
	3655	'kj': 'kua',
	3656	'kk': 'kaz',
	3657	'kl': 'kal',
	3658	'km': 'khm',
	3659	'kn': 'kan',
	3660	'ko': 'kor',
	3661	'kr': 'kau',
	3662	'ks': 'kas',
	3663	'ku': 'kur',
	3664	'kv': 'kom',
	3665	'kw': 'cor',
	3666	'ky': 'kir',
	3667	'la': 'lat',
	3668	'lb': 'ltz',
	3669	'lg': 'lug',
	3670	'li': 'lim',
	3671	'ln': 'lin',
	3672	'lo': 'lao',
	3673	'lt': 'lit',
	3674	'lu': 'lub',
	3675	'lv': 'lav',
	3676	'mg': 'mlg',
	3677	'mh': 'mah',
	3678	'mi': 'mri',
	3679	'mk': 'mkd',
	3680	'ml': 'mal',
	3681	'mn': 'mon',
	3682	'mr': 'mar',
	3683	'ms': 'msa',
	3684	'mt': 'mlt',
	3685	'my': 'mya',
	3686	'na': 'nau',
	3687	'nb': 'nob',
	3688	'nd': 'nde',
	3689	'ne': 'nep',
	3690	'ng': 'ndo',
	3691	'nl': 'nld',
	3692	'nn': 'nno',
	3693	'no': 'nor',
	3694	'nr': 'nbl',
	3695	'nv': 'nav',
	3696	'ny': 'nya',
	3697	'oc': 'oci',
	3698	'oj': 'oji',
	3699	'om': 'orm',
	3700	'or': 'ori',
	3701	'os': 'oss',
	3702	'pa': 'pan',
	3703	'pe': 'per',
	3704	'pi': 'pli',
	3705	'pl': 'pol',
	3706	'ps': 'pus',
	3707	'pt': 'por',
	3708	'qu': 'que',
	3709	'rm': 'roh',
	3710	'rn': 'run',
	3711	'ro': 'ron',
	3712	'ru': 'rus',
	3713	'rw': 'kin',
	3714	'sa': 'san',
	3715	'sc': 'srd',
	3716	'sd': 'snd',
	3717	'se': 'sme',
	3718	'sg': 'sag',
	3719	'si': 'sin',
	3720	'sk': 'slk',
	3721	'sl': 'slv',
	3722	'sm': 'smo',
	3723	'sn': 'sna',
	3724	'so': 'som',
	3725	'sq': 'sqi',
	3726	'sr': 'srp',
	3727	'ss': 'ssw',
	3728	'st': 'sot',
	3729	'su': 'sun',
	3730	'sv': 'swe',
	3731	'sw': 'swa',
	3732	'ta': 'tam',
	3733	'te': 'tel',
	3734	'tg': 'tgk',
	3735	'th': 'tha',
	3736	'ti': 'tir',
	3737	'tk': 'tuk',
	3738	'tl': 'tgl',
	3739	'tn': 'tsn',
	3740	'to': 'ton',
	3741	'tr': 'tur',
	3742	'ts': 'tso',
	3743	'tt': 'tat',
	3744	'tw': 'twi',
	3745	'ty': 'tah',
	3746	'ug': 'uig',
	3747	'uk': 'ukr',
	3748	'ur': 'urd',
	3749	'uz': 'uzb',
	3750	've': 'ven',
	3751	'vi': 'vie',
	3752	'vo': 'vol',
	3753	'wa': 'wln',
	3754	'wo': 'wol',
	3755	'xh': 'xho',
	3756	'yi': 'yid',
	3757	'ji': 'yid', # Replaced by yi in 1989 revision
	3758	'yo': 'yor',
	3759	'za': 'zha',
	3760	'zh': 'zho',
	3761	'zu': 'zul',
	3762	}
	3763
	3764	@classmethod
	3765	def short2long(cls, code):
	3766	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	3767	return cls._lang_map.get(code[:2])
	3768
	3769	@classmethod
	3770	def long2short(cls, code):
	3771	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	3772	for short_name, long_name in cls._lang_map.items():
	3773	if long_name == code:
	3774	return short_name
	3775
	3776
	3777	class ISO3166Utils:
	3778	# From http://data.okfn.org/data/core/country-list
	3779	_country_map = {
	3780	'AF': 'Afghanistan',
	3781	'AX': 'Åland Islands',
	3782	'AL': 'Albania',
	3783	'DZ': 'Algeria',
	3784	'AS': 'American Samoa',
	3785	'AD': 'Andorra',
	3786	'AO': 'Angola',
	3787	'AI': 'Anguilla',
	3788	'AQ': 'Antarctica',
	3789	'AG': 'Antigua and Barbuda',
	3790	'AR': 'Argentina',
	3791	'AM': 'Armenia',
	3792	'AW': 'Aruba',
	3793	'AU': 'Australia',
	3794	'AT': 'Austria',
	3795	'AZ': 'Azerbaijan',
	3796	'BS': 'Bahamas',
	3797	'BH': 'Bahrain',
	3798	'BD': 'Bangladesh',
	3799	'BB': 'Barbados',
	3800	'BY': 'Belarus',
	3801	'BE': 'Belgium',
	3802	'BZ': 'Belize',
	3803	'BJ': 'Benin',
	3804	'BM': 'Bermuda',
	3805	'BT': 'Bhutan',
	3806	'BO': 'Bolivia, Plurinational State of',
	3807	'BQ': 'Bonaire, Sint Eustatius and Saba',
	3808	'BA': 'Bosnia and Herzegovina',
	3809	'BW': 'Botswana',
	3810	'BV': 'Bouvet Island',
	3811	'BR': 'Brazil',
	3812	'IO': 'British Indian Ocean Territory',
	3813	'BN': 'Brunei Darussalam',
	3814	'BG': 'Bulgaria',
	3815	'BF': 'Burkina Faso',
	3816	'BI': 'Burundi',
	3817	'KH': 'Cambodia',
	3818	'CM': 'Cameroon',
	3819	'CA': 'Canada',
	3820	'CV': 'Cape Verde',
	3821	'KY': 'Cayman Islands',
	3822	'CF': 'Central African Republic',
	3823	'TD': 'Chad',
	3824	'CL': 'Chile',
	3825	'CN': 'China',
	3826	'CX': 'Christmas Island',
	3827	'CC': 'Cocos (Keeling) Islands',
	3828	'CO': 'Colombia',
	3829	'KM': 'Comoros',
	3830	'CG': 'Congo',
	3831	'CD': 'Congo, the Democratic Republic of the',
	3832	'CK': 'Cook Islands',
	3833	'CR': 'Costa Rica',
	3834	'CI': 'Côte d\'Ivoire',
	3835	'HR': 'Croatia',
	3836	'CU': 'Cuba',
	3837	'CW': 'Curaçao',
	3838	'CY': 'Cyprus',
	3839	'CZ': 'Czech Republic',
	3840	'DK': 'Denmark',
	3841	'DJ': 'Djibouti',
	3842	'DM': 'Dominica',
	3843	'DO': 'Dominican Republic',
	3844	'EC': 'Ecuador',
	3845	'EG': 'Egypt',
	3846	'SV': 'El Salvador',
	3847	'GQ': 'Equatorial Guinea',
	3848	'ER': 'Eritrea',
	3849	'EE': 'Estonia',
	3850	'ET': 'Ethiopia',
	3851	'FK': 'Falkland Islands (Malvinas)',
	3852	'FO': 'Faroe Islands',
	3853	'FJ': 'Fiji',
	3854	'FI': 'Finland',
	3855	'FR': 'France',
	3856	'GF': 'French Guiana',
	3857	'PF': 'French Polynesia',
	3858	'TF': 'French Southern Territories',
	3859	'GA': 'Gabon',
	3860	'GM': 'Gambia',
	3861	'GE': 'Georgia',
	3862	'DE': 'Germany',
	3863	'GH': 'Ghana',
	3864	'GI': 'Gibraltar',
	3865	'GR': 'Greece',
	3866	'GL': 'Greenland',
	3867	'GD': 'Grenada',
	3868	'GP': 'Guadeloupe',
	3869	'GU': 'Guam',
	3870	'GT': 'Guatemala',
	3871	'GG': 'Guernsey',
	3872	'GN': 'Guinea',
	3873	'GW': 'Guinea-Bissau',
	3874	'GY': 'Guyana',
	3875	'HT': 'Haiti',
	3876	'HM': 'Heard Island and McDonald Islands',
	3877	'VA': 'Holy See (Vatican City State)',
	3878	'HN': 'Honduras',
	3879	'HK': 'Hong Kong',
	3880	'HU': 'Hungary',
	3881	'IS': 'Iceland',
	3882	'IN': 'India',
	3883	'ID': 'Indonesia',
	3884	'IR': 'Iran, Islamic Republic of',
	3885	'IQ': 'Iraq',
	3886	'IE': 'Ireland',
	3887	'IM': 'Isle of Man',
	3888	'IL': 'Israel',
	3889	'IT': 'Italy',
	3890	'JM': 'Jamaica',
	3891	'JP': 'Japan',
	3892	'JE': 'Jersey',
	3893	'JO': 'Jordan',
	3894	'KZ': 'Kazakhstan',
	3895	'KE': 'Kenya',
	3896	'KI': 'Kiribati',
	3897	'KP': 'Korea, Democratic People\'s Republic of',
	3898	'KR': 'Korea, Republic of',
	3899	'KW': 'Kuwait',
	3900	'KG': 'Kyrgyzstan',
	3901	'LA': 'Lao People\'s Democratic Republic',
	3902	'LV': 'Latvia',
	3903	'LB': 'Lebanon',
	3904	'LS': 'Lesotho',
	3905	'LR': 'Liberia',
	3906	'LY': 'Libya',
	3907	'LI': 'Liechtenstein',
	3908	'LT': 'Lithuania',
	3909	'LU': 'Luxembourg',
	3910	'MO': 'Macao',
	3911	'MK': 'Macedonia, the Former Yugoslav Republic of',
	3912	'MG': 'Madagascar',
	3913	'MW': 'Malawi',
	3914	'MY': 'Malaysia',
	3915	'MV': 'Maldives',
	3916	'ML': 'Mali',
	3917	'MT': 'Malta',
	3918	'MH': 'Marshall Islands',
	3919	'MQ': 'Martinique',
	3920	'MR': 'Mauritania',
	3921	'MU': 'Mauritius',
	3922	'YT': 'Mayotte',
	3923	'MX': 'Mexico',
	3924	'FM': 'Micronesia, Federated States of',
	3925	'MD': 'Moldova, Republic of',
	3926	'MC': 'Monaco',
	3927	'MN': 'Mongolia',
	3928	'ME': 'Montenegro',
	3929	'MS': 'Montserrat',
	3930	'MA': 'Morocco',
	3931	'MZ': 'Mozambique',
	3932	'MM': 'Myanmar',
	3933	'NA': 'Namibia',
	3934	'NR': 'Nauru',
	3935	'NP': 'Nepal',
	3936	'NL': 'Netherlands',
	3937	'NC': 'New Caledonia',
	3938	'NZ': 'New Zealand',
	3939	'NI': 'Nicaragua',
	3940	'NE': 'Niger',
	3941	'NG': 'Nigeria',
	3942	'NU': 'Niue',
	3943	'NF': 'Norfolk Island',
	3944	'MP': 'Northern Mariana Islands',
	3945	'NO': 'Norway',
	3946	'OM': 'Oman',
	3947	'PK': 'Pakistan',
	3948	'PW': 'Palau',
	3949	'PS': 'Palestine, State of',
	3950	'PA': 'Panama',
	3951	'PG': 'Papua New Guinea',
	3952	'PY': 'Paraguay',
	3953	'PE': 'Peru',
	3954	'PH': 'Philippines',
	3955	'PN': 'Pitcairn',
	3956	'PL': 'Poland',
	3957	'PT': 'Portugal',
	3958	'PR': 'Puerto Rico',
	3959	'QA': 'Qatar',
	3960	'RE': 'Réunion',
	3961	'RO': 'Romania',
	3962	'RU': 'Russian Federation',
	3963	'RW': 'Rwanda',
	3964	'BL': 'Saint Barthélemy',
	3965	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	3966	'KN': 'Saint Kitts and Nevis',
	3967	'LC': 'Saint Lucia',
	3968	'MF': 'Saint Martin (French part)',
	3969	'PM': 'Saint Pierre and Miquelon',
	3970	'VC': 'Saint Vincent and the Grenadines',
	3971	'WS': 'Samoa',
	3972	'SM': 'San Marino',
	3973	'ST': 'Sao Tome and Principe',
	3974	'SA': 'Saudi Arabia',
	3975	'SN': 'Senegal',
	3976	'RS': 'Serbia',
	3977	'SC': 'Seychelles',
	3978	'SL': 'Sierra Leone',
	3979	'SG': 'Singapore',
	3980	'SX': 'Sint Maarten (Dutch part)',
	3981	'SK': 'Slovakia',
	3982	'SI': 'Slovenia',
	3983	'SB': 'Solomon Islands',
	3984	'SO': 'Somalia',
	3985	'ZA': 'South Africa',
	3986	'GS': 'South Georgia and the South Sandwich Islands',
	3987	'SS': 'South Sudan',
	3988	'ES': 'Spain',
	3989	'LK': 'Sri Lanka',
	3990	'SD': 'Sudan',
	3991	'SR': 'Suriname',
	3992	'SJ': 'Svalbard and Jan Mayen',
	3993	'SZ': 'Swaziland',
	3994	'SE': 'Sweden',
	3995	'CH': 'Switzerland',
	3996	'SY': 'Syrian Arab Republic',
	3997	'TW': 'Taiwan, Province of China',
	3998	'TJ': 'Tajikistan',
	3999	'TZ': 'Tanzania, United Republic of',
	4000	'TH': 'Thailand',
	4001	'TL': 'Timor-Leste',
	4002	'TG': 'Togo',
	4003	'TK': 'Tokelau',
	4004	'TO': 'Tonga',
	4005	'TT': 'Trinidad and Tobago',
	4006	'TN': 'Tunisia',
	4007	'TR': 'Turkey',
	4008	'TM': 'Turkmenistan',
	4009	'TC': 'Turks and Caicos Islands',
	4010	'TV': 'Tuvalu',
	4011	'UG': 'Uganda',
	4012	'UA': 'Ukraine',
	4013	'AE': 'United Arab Emirates',
	4014	'GB': 'United Kingdom',
	4015	'US': 'United States',
	4016	'UM': 'United States Minor Outlying Islands',
	4017	'UY': 'Uruguay',
	4018	'UZ': 'Uzbekistan',
	4019	'VU': 'Vanuatu',
	4020	'VE': 'Venezuela, Bolivarian Republic of',
	4021	'VN': 'Viet Nam',
	4022	'VG': 'Virgin Islands, British',
	4023	'VI': 'Virgin Islands, U.S.',
	4024	'WF': 'Wallis and Futuna',
	4025	'EH': 'Western Sahara',
	4026	'YE': 'Yemen',
	4027	'ZM': 'Zambia',
	4028	'ZW': 'Zimbabwe',
	4029	# Not ISO 3166 codes, but used for IP blocks
	4030	'AP': 'Asia/Pacific Region',
	4031	'EU': 'Europe',
	4032	}
	4033
	4034	@classmethod
	4035	def short2full(cls, code):
	4036	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4037	return cls._country_map.get(code.upper())
	4038
	4039
	4040	class GeoUtils:
	4041	# Major IPv4 address blocks per country
	4042	_country_ip_map = {
	4043	'AD': '46.172.224.0/19',
	4044	'AE': '94.200.0.0/13',
	4045	'AF': '149.54.0.0/17',
	4046	'AG': '209.59.64.0/18',
	4047	'AI': '204.14.248.0/21',
	4048	'AL': '46.99.0.0/16',
	4049	'AM': '46.70.0.0/15',
	4050	'AO': '105.168.0.0/13',
	4051	'AP': '182.50.184.0/21',
	4052	'AQ': '23.154.160.0/24',
	4053	'AR': '181.0.0.0/12',
	4054	'AS': '202.70.112.0/20',
	4055	'AT': '77.116.0.0/14',
	4056	'AU': '1.128.0.0/11',
	4057	'AW': '181.41.0.0/18',
	4058	'AX': '185.217.4.0/22',
	4059	'AZ': '5.197.0.0/16',
	4060	'BA': '31.176.128.0/17',
	4061	'BB': '65.48.128.0/17',
	4062	'BD': '114.130.0.0/16',
	4063	'BE': '57.0.0.0/8',
	4064	'BF': '102.178.0.0/15',
	4065	'BG': '95.42.0.0/15',
	4066	'BH': '37.131.0.0/17',
	4067	'BI': '154.117.192.0/18',
	4068	'BJ': '137.255.0.0/16',
	4069	'BL': '185.212.72.0/23',
	4070	'BM': '196.12.64.0/18',
	4071	'BN': '156.31.0.0/16',
	4072	'BO': '161.56.0.0/16',
	4073	'BQ': '161.0.80.0/20',
	4074	'BR': '191.128.0.0/12',
	4075	'BS': '24.51.64.0/18',
	4076	'BT': '119.2.96.0/19',
	4077	'BW': '168.167.0.0/16',
	4078	'BY': '178.120.0.0/13',
	4079	'BZ': '179.42.192.0/18',
	4080	'CA': '99.224.0.0/11',
	4081	'CD': '41.243.0.0/16',
	4082	'CF': '197.242.176.0/21',
	4083	'CG': '160.113.0.0/16',
	4084	'CH': '85.0.0.0/13',
	4085	'CI': '102.136.0.0/14',
	4086	'CK': '202.65.32.0/19',
	4087	'CL': '152.172.0.0/14',
	4088	'CM': '102.244.0.0/14',
	4089	'CN': '36.128.0.0/10',
	4090	'CO': '181.240.0.0/12',
	4091	'CR': '201.192.0.0/12',
	4092	'CU': '152.206.0.0/15',
	4093	'CV': '165.90.96.0/19',
	4094	'CW': '190.88.128.0/17',
	4095	'CY': '31.153.0.0/16',
	4096	'CZ': '88.100.0.0/14',
	4097	'DE': '53.0.0.0/8',
	4098	'DJ': '197.241.0.0/17',
	4099	'DK': '87.48.0.0/12',
	4100	'DM': '192.243.48.0/20',
	4101	'DO': '152.166.0.0/15',
	4102	'DZ': '41.96.0.0/12',
	4103	'EC': '186.68.0.0/15',
	4104	'EE': '90.190.0.0/15',
	4105	'EG': '156.160.0.0/11',
	4106	'ER': '196.200.96.0/20',
	4107	'ES': '88.0.0.0/11',
	4108	'ET': '196.188.0.0/14',
	4109	'EU': '2.16.0.0/13',
	4110	'FI': '91.152.0.0/13',
	4111	'FJ': '144.120.0.0/16',
	4112	'FK': '80.73.208.0/21',
	4113	'FM': '119.252.112.0/20',
	4114	'FO': '88.85.32.0/19',
	4115	'FR': '90.0.0.0/9',
	4116	'GA': '41.158.0.0/15',
	4117	'GB': '25.0.0.0/8',
	4118	'GD': '74.122.88.0/21',
	4119	'GE': '31.146.0.0/16',
	4120	'GF': '161.22.64.0/18',
	4121	'GG': '62.68.160.0/19',
	4122	'GH': '154.160.0.0/12',
	4123	'GI': '95.164.0.0/16',
	4124	'GL': '88.83.0.0/19',
	4125	'GM': '160.182.0.0/15',
	4126	'GN': '197.149.192.0/18',
	4127	'GP': '104.250.0.0/19',
	4128	'GQ': '105.235.224.0/20',
	4129	'GR': '94.64.0.0/13',
	4130	'GT': '168.234.0.0/16',
	4131	'GU': '168.123.0.0/16',
	4132	'GW': '197.214.80.0/20',
	4133	'GY': '181.41.64.0/18',
	4134	'HK': '113.252.0.0/14',
	4135	'HN': '181.210.0.0/16',
	4136	'HR': '93.136.0.0/13',
	4137	'HT': '148.102.128.0/17',
	4138	'HU': '84.0.0.0/14',
	4139	'ID': '39.192.0.0/10',
	4140	'IE': '87.32.0.0/12',
	4141	'IL': '79.176.0.0/13',
	4142	'IM': '5.62.80.0/20',
	4143	'IN': '117.192.0.0/10',
	4144	'IO': '203.83.48.0/21',
	4145	'IQ': '37.236.0.0/14',
	4146	'IR': '2.176.0.0/12',
	4147	'IS': '82.221.0.0/16',
	4148	'IT': '79.0.0.0/10',
	4149	'JE': '87.244.64.0/18',
	4150	'JM': '72.27.0.0/17',
	4151	'JO': '176.29.0.0/16',
	4152	'JP': '133.0.0.0/8',
	4153	'KE': '105.48.0.0/12',
	4154	'KG': '158.181.128.0/17',
	4155	'KH': '36.37.128.0/17',
	4156	'KI': '103.25.140.0/22',
	4157	'KM': '197.255.224.0/20',
	4158	'KN': '198.167.192.0/19',
	4159	'KP': '175.45.176.0/22',
	4160	'KR': '175.192.0.0/10',
	4161	'KW': '37.36.0.0/14',
	4162	'KY': '64.96.0.0/15',
	4163	'KZ': '2.72.0.0/13',
	4164	'LA': '115.84.64.0/18',
	4165	'LB': '178.135.0.0/16',
	4166	'LC': '24.92.144.0/20',
	4167	'LI': '82.117.0.0/19',
	4168	'LK': '112.134.0.0/15',
	4169	'LR': '102.183.0.0/16',
	4170	'LS': '129.232.0.0/17',
	4171	'LT': '78.56.0.0/13',
	4172	'LU': '188.42.0.0/16',
	4173	'LV': '46.109.0.0/16',
	4174	'LY': '41.252.0.0/14',
	4175	'MA': '105.128.0.0/11',
	4176	'MC': '88.209.64.0/18',
	4177	'MD': '37.246.0.0/16',
	4178	'ME': '178.175.0.0/17',
	4179	'MF': '74.112.232.0/21',
	4180	'MG': '154.126.0.0/17',
	4181	'MH': '117.103.88.0/21',
	4182	'MK': '77.28.0.0/15',
	4183	'ML': '154.118.128.0/18',
	4184	'MM': '37.111.0.0/17',
	4185	'MN': '49.0.128.0/17',
	4186	'MO': '60.246.0.0/16',
	4187	'MP': '202.88.64.0/20',
	4188	'MQ': '109.203.224.0/19',
	4189	'MR': '41.188.64.0/18',
	4190	'MS': '208.90.112.0/22',
	4191	'MT': '46.11.0.0/16',
	4192	'MU': '105.16.0.0/12',
	4193	'MV': '27.114.128.0/18',
	4194	'MW': '102.70.0.0/15',
	4195	'MX': '187.192.0.0/11',
	4196	'MY': '175.136.0.0/13',
	4197	'MZ': '197.218.0.0/15',
	4198	'NA': '41.182.0.0/16',
	4199	'NC': '101.101.0.0/18',
	4200	'NE': '197.214.0.0/18',
	4201	'NF': '203.17.240.0/22',
	4202	'NG': '105.112.0.0/12',
	4203	'NI': '186.76.0.0/15',
	4204	'NL': '145.96.0.0/11',
	4205	'NO': '84.208.0.0/13',
	4206	'NP': '36.252.0.0/15',
	4207	'NR': '203.98.224.0/19',
	4208	'NU': '49.156.48.0/22',
	4209	'NZ': '49.224.0.0/14',
	4210	'OM': '5.36.0.0/15',
	4211	'PA': '186.72.0.0/15',
	4212	'PE': '186.160.0.0/14',
	4213	'PF': '123.50.64.0/18',
	4214	'PG': '124.240.192.0/19',
	4215	'PH': '49.144.0.0/13',
	4216	'PK': '39.32.0.0/11',
	4217	'PL': '83.0.0.0/11',
	4218	'PM': '70.36.0.0/20',
	4219	'PR': '66.50.0.0/16',
	4220	'PS': '188.161.0.0/16',
	4221	'PT': '85.240.0.0/13',
	4222	'PW': '202.124.224.0/20',
	4223	'PY': '181.120.0.0/14',
	4224	'QA': '37.210.0.0/15',
	4225	'RE': '102.35.0.0/16',
	4226	'RO': '79.112.0.0/13',
	4227	'RS': '93.86.0.0/15',
	4228	'RU': '5.136.0.0/13',
	4229	'RW': '41.186.0.0/16',
	4230	'SA': '188.48.0.0/13',
	4231	'SB': '202.1.160.0/19',
	4232	'SC': '154.192.0.0/11',
	4233	'SD': '102.120.0.0/13',
	4234	'SE': '78.64.0.0/12',
	4235	'SG': '8.128.0.0/10',
	4236	'SI': '188.196.0.0/14',
	4237	'SK': '78.98.0.0/15',
	4238	'SL': '102.143.0.0/17',
	4239	'SM': '89.186.32.0/19',
	4240	'SN': '41.82.0.0/15',
	4241	'SO': '154.115.192.0/18',
	4242	'SR': '186.179.128.0/17',
	4243	'SS': '105.235.208.0/21',
	4244	'ST': '197.159.160.0/19',
	4245	'SV': '168.243.0.0/16',
	4246	'SX': '190.102.0.0/20',
	4247	'SY': '5.0.0.0/16',
	4248	'SZ': '41.84.224.0/19',
	4249	'TC': '65.255.48.0/20',
	4250	'TD': '154.68.128.0/19',
	4251	'TG': '196.168.0.0/14',
	4252	'TH': '171.96.0.0/13',
	4253	'TJ': '85.9.128.0/18',
	4254	'TK': '27.96.24.0/21',
	4255	'TL': '180.189.160.0/20',
	4256	'TM': '95.85.96.0/19',
	4257	'TN': '197.0.0.0/11',
	4258	'TO': '175.176.144.0/21',
	4259	'TR': '78.160.0.0/11',
	4260	'TT': '186.44.0.0/15',
	4261	'TV': '202.2.96.0/19',
	4262	'TW': '120.96.0.0/11',
	4263	'TZ': '156.156.0.0/14',
	4264	'UA': '37.52.0.0/14',
	4265	'UG': '102.80.0.0/13',
	4266	'US': '6.0.0.0/8',
	4267	'UY': '167.56.0.0/13',
	4268	'UZ': '84.54.64.0/18',
	4269	'VA': '212.77.0.0/19',
	4270	'VC': '207.191.240.0/21',
	4271	'VE': '186.88.0.0/13',
	4272	'VG': '66.81.192.0/20',
	4273	'VI': '146.226.0.0/16',
	4274	'VN': '14.160.0.0/11',
	4275	'VU': '202.80.32.0/20',
	4276	'WF': '117.20.32.0/21',
	4277	'WS': '202.4.32.0/19',
	4278	'YE': '134.35.0.0/16',
	4279	'YT': '41.242.116.0/22',
	4280	'ZA': '41.0.0.0/11',
	4281	'ZM': '102.144.0.0/13',
	4282	'ZW': '102.177.192.0/18',
	4283	}
	4284
	4285	@classmethod
	4286	def random_ipv4(cls, code_or_block):
	4287	if len(code_or_block) == 2:
	4288	block = cls._country_ip_map.get(code_or_block.upper())
	4289	if not block:
	4290	return None
	4291	else:
	4292	block = code_or_block
	4293	addr, preflen = block.split('/')
	4294	addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
	4295	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4296	return str(socket.inet_ntoa(
	4297	struct.pack('!L', random.randint(addr_min, addr_max))))
	4298
	4299
	4300	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4301	# released into Public Domain
	4302	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4303
	4304	def long_to_bytes(n, blocksize=0):
	4305	"""long_to_bytes(n:long, blocksize:int) : string
	4306	Convert a long integer to a byte string.
	4307
	4308	If optional blocksize is given and greater than zero, pad the front of the
	4309	byte string with binary zeros so that the length is a multiple of
	4310	blocksize.
	4311	"""
	4312	# after much testing, this algorithm was deemed to be the fastest
	4313	s = b''
	4314	n = int(n)
	4315	while n > 0:
	4316	s = struct.pack('>I', n & 0xffffffff) + s
	4317	n = n >> 32
	4318	# strip off leading zeros
	4319	for i in range(len(s)):
	4320	if s[i] != b'\000'[0]:
	4321	break
	4322	else:
	4323	# only happens when n == 0
	4324	s = b'\000'
	4325	i = 0
	4326	s = s[i:]
	4327	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4328	# de-padding being done above, but sigh...
	4329	if blocksize > 0 and len(s) % blocksize:
	4330	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4331	return s
	4332
	4333
	4334	def bytes_to_long(s):
	4335	"""bytes_to_long(string) : long
	4336	Convert a byte string to a long integer.
	4337
	4338	This is (essentially) the inverse of long_to_bytes().
	4339	"""
	4340	acc = 0
	4341	length = len(s)
	4342	if length % 4:
	4343	extra = (4 - length % 4)
	4344	s = b'\000' * extra + s
	4345	length = length + extra
	4346	for i in range(0, length, 4):
	4347	acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
	4348	return acc
	4349
	4350
	4351	def ohdave_rsa_encrypt(data, exponent, modulus):
	4352	'''
	4353	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4354
	4355	Input:
	4356	data: data to encrypt, bytes-like object
	4357	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4358	Output: hex string of encrypted data
	4359
	4360	Limitation: supports one block encryption only
	4361	'''
	4362
	4363	payload = int(binascii.hexlify(data[::-1]), 16)
	4364	encrypted = pow(payload, exponent, modulus)
	4365	return '%x' % encrypted
	4366
	4367
	4368	def pkcs1pad(data, length):
	4369	"""
	4370	Padding input data with PKCS#1 scheme
	4371
	4372	@param {int[]} data input data
	4373	@param {int} length target length
	4374	@returns {int[]} padded data
	4375	"""
	4376	if len(data) > length - 11:
	4377	raise ValueError('Input data too long for PKCS#1 padding')
	4378
	4379	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4380	return [0, 2] + pseudo_random + [0] + data
	4381
	4382
	4383	def _base_n_table(n, table):
	4384	if not table and not n:
	4385	raise ValueError('Either table or n must be specified')
	4386	table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
	4387
	4388	if n and n != len(table):
	4389	raise ValueError(f'base {n} exceeds table length {len(table)}')
	4390	return table
	4391
	4392
	4393	def encode_base_n(num, n=None, table=None):
	4394	"""Convert given int to a base-n string"""
	4395	table = _base_n_table(n, table)
	4396	if not num:
	4397	return table[0]
	4398
	4399	result, base = '', len(table)
	4400	while num:
	4401	result = table[num % base] + result
	4402	num = num // base
	4403	return result
	4404
	4405
	4406	def decode_base_n(string, n=None, table=None):
	4407	"""Convert given base-n string to int"""
	4408	table = {char: index for index, char in enumerate(_base_n_table(n, table))}
	4409	result, base = 0, len(table)
	4410	for char in string:
	4411	result = result * base + table[char]
	4412	return result
	4413
	4414
	4415	def decode_packed_codes(code):
	4416	mobj = re.search(PACKED_CODES_RE, code)
	4417	obfuscated_code, base, count, symbols = mobj.groups()
	4418	base = int(base)
	4419	count = int(count)
	4420	symbols = symbols.split('\|')
	4421	symbol_table = {}
	4422
	4423	while count:
	4424	count -= 1
	4425	base_n_count = encode_base_n(count, base)
	4426	symbol_table[base_n_count] = symbols[count] or base_n_count
	4427
	4428	return re.sub(
	4429	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	4430	obfuscated_code)
	4431
	4432
	4433	def caesar(s, alphabet, shift):
	4434	if shift == 0:
	4435	return s
	4436	l = len(alphabet)
	4437	return ''.join(
	4438	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4439	for c in s)
	4440
	4441
	4442	def rot47(s):
	4443	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4444
	4445
	4446	def parse_m3u8_attributes(attrib):
	4447	info = {}
	4448	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4449	if val.startswith('"'):
	4450	val = val[1:-1]
	4451	info[key] = val
	4452	return info
	4453
	4454
	4455	def urshift(val, n):
	4456	return val >> n if val >= 0 else (val + 0x100000000) >> n
	4457
	4458
	4459	def write_xattr(path, key, value):
	4460	# Windows: Write xattrs to NTFS Alternate Data Streams:
	4461	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4462	if compat_os_name == 'nt':
	4463	assert ':' not in key
	4464	assert os.path.exists(path)
	4465
	4466	try:
	4467	with open(f'{path}:{key}', 'wb') as f:
	4468	f.write(value)
	4469	except OSError as e:
	4470	raise XAttrMetadataError(e.errno, e.strerror)
	4471	return
	4472
	4473	# UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
	4474
	4475	setxattr = None
	4476	if callable(getattr(os, 'setxattr', None)):
	4477	setxattr = os.setxattr
	4478	elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
	4479	# Unicode arguments are not supported in pyxattr until version 0.5.0
	4480	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4481	if version_tuple(xattr.__version__) >= (0, 5, 0):
	4482	setxattr = xattr.set
	4483	elif xattr:
	4484	setxattr = xattr.setxattr
	4485
	4486	if setxattr:
	4487	try:
	4488	setxattr(path, key, value)
	4489	except OSError as e:
	4490	raise XAttrMetadataError(e.errno, e.strerror)
	4491	return
	4492
	4493	# UNIX Method 2. Use setfattr/xattr executables
	4494	exe = ('setfattr' if check_executable('setfattr', ['--version'])
	4495	else 'xattr' if check_executable('xattr', ['-h']) else None)
	4496	if not exe:
	4497	raise XAttrUnavailableError(
	4498	'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
	4499	+ ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
	4500
	4501	value = value.decode()
	4502	try:
	4503	_, stderr, returncode = Popen.run(
	4504	[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
	4505	text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	4506	except OSError as e:
	4507	raise XAttrMetadataError(e.errno, e.strerror)
	4508	if returncode:
	4509	raise XAttrMetadataError(returncode, stderr)
	4510
	4511
	4512	def random_birthday(year_field, month_field, day_field):
	4513	start_date = dt.date(1950, 1, 1)
	4514	end_date = dt.date(1995, 12, 31)
	4515	offset = random.randint(0, (end_date - start_date).days)
	4516	random_date = start_date + dt.timedelta(offset)
	4517	return {
	4518	year_field: str(random_date.year),
	4519	month_field: str(random_date.month),
	4520	day_field: str(random_date.day),
	4521	}
	4522
	4523
	4524	def find_available_port(interface=''):
	4525	try:
	4526	with socket.socket() as sock:
	4527	sock.bind((interface, 0))
	4528	return sock.getsockname()[1]
	4529	except OSError:
	4530	return None
	4531
	4532
	4533	# Templates for internet shortcut files, which are plain text files.
	4534	DOT_URL_LINK_TEMPLATE = '''\
	4535	[InternetShortcut]
	4536	URL=%(url)s
	4537	'''
	4538
	4539	DOT_WEBLOC_LINK_TEMPLATE = '''\
	4540	<?xml version="1.0" encoding="UTF-8"?>
	4541	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4542	<plist version="1.0">
	4543	<dict>
	4544	\t<key>URL</key>
	4545	\t<string>%(url)s</string>
	4546	</dict>
	4547	</plist>
	4548	'''
	4549
	4550	DOT_DESKTOP_LINK_TEMPLATE = '''\
	4551	[Desktop Entry]
	4552	Encoding=UTF-8
	4553	Name=%(filename)s
	4554	Type=Link
	4555	URL=%(url)s
	4556	Icon=text-html
	4557	'''
	4558
	4559	LINK_TEMPLATES = {
	4560	'url': DOT_URL_LINK_TEMPLATE,
	4561	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	4562	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	4563	}
	4564
	4565
	4566	def iri_to_uri(iri):
	4567	"""
	4568	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	4569
	4570	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	4571	"""
	4572
	4573	iri_parts = urllib.parse.urlparse(iri)
	4574
	4575	if '[' in iri_parts.netloc:
	4576	raise ValueError('IPv6 URIs are not, yet, supported.')
	4577	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	4578
	4579	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	4580
	4581	net_location = ''
	4582	if iri_parts.username:
	4583	net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
	4584	if iri_parts.password is not None:
	4585	net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
	4586	net_location += '@'
	4587
	4588	net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
	4589	# The 'idna' encoding produces ASCII text.
	4590	if iri_parts.port is not None and iri_parts.port != 80:
	4591	net_location += ':' + str(iri_parts.port)
	4592
	4593	return urllib.parse.urlunparse(
	4594	(iri_parts.scheme,
	4595	net_location,
	4596
	4597	urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	4598
	4599	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	4600	urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	4601
	4602	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	4603	urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	4604
	4605	urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	4606
	4607	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	4608
	4609
	4610	def to_high_limit_path(path):
	4611	if sys.platform in ['win32', 'cygwin']:
	4612	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	4613	return '\\\\?\\' + os.path.abspath(path)
	4614
	4615	return path
	4616
	4617
	4618	def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
	4619	val = traversal.traverse_obj(obj, *variadic(field))
	4620	if not val if ignore is NO_DEFAULT else val in variadic(ignore):
	4621	return default
	4622	return template % func(val)
	4623
	4624
	4625	def clean_podcast_url(url):
	4626	url = re.sub(r'''(?x)
	4627	(?:
	4628	(?:
	4629	chtbl\.com/track\|
	4630	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	4631	play\.podtrac\.com\|
	4632	chrt\.fm/track\|
	4633	mgln\.ai/e
	4634	)(?:/[^/.]+)?\|
	4635	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	4636	flex\.acast\.com\|
	4637	pd(?:
	4638	cn\.co\| # https://podcorn.com/analytics-prefix/
	4639	st\.fm # https://podsights.com/docs/
	4640	)/e\|
	4641	[0-9]\.gum\.fm\|
	4642	pscrb\.fm/rss/p
	4643	)/''', '', url)
	4644	return re.sub(r'^\w+://(\w+://)', r'\1', url)
	4645
	4646
	4647	_HEX_TABLE = '0123456789abcdef'
	4648
	4649
	4650	def random_uuidv4():
	4651	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	4652
	4653
	4654	def make_dir(path, to_screen=None):
	4655	try:
	4656	dn = os.path.dirname(path)
	4657	if dn:
	4658	os.makedirs(dn, exist_ok=True)
	4659	return True
	4660	except OSError as err:
	4661	if callable(to_screen) is not None:
	4662	to_screen(f'unable to create directory {err}')
	4663	return False
	4664
	4665
	4666	def get_executable_path():
	4667	from ..update import _get_variant_and_executable_path
	4668
	4669	return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
	4670
	4671
	4672	def get_user_config_dirs(package_name):
	4673	# .config (e.g. ~/.config/package_name)
	4674	xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
	4675	yield os.path.join(xdg_config_home, package_name)
	4676
	4677	# appdata (%APPDATA%/package_name)
	4678	appdata_dir = os.getenv('appdata')
	4679	if appdata_dir:
	4680	yield os.path.join(appdata_dir, package_name)
	4681
	4682	# home (~/.package_name)
	4683	yield os.path.join(compat_expanduser('~'), f'.{package_name}')
	4684
	4685
	4686	def get_system_config_dirs(package_name):
	4687	# /etc/package_name
	4688	yield os.path.join('/etc', package_name)
	4689
	4690
	4691	def time_seconds(**kwargs):
	4692	"""
	4693	Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
	4694	"""
	4695	return time.time() + dt.timedelta(**kwargs).total_seconds()
	4696
	4697
	4698	# create a JSON Web Signature (jws) with HS256 algorithm
	4699	# the resulting format is in JWS Compact Serialization
	4700	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	4701	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	4702	def jwt_encode_hs256(payload_data, key, headers={}):
	4703	header_data = {
	4704	'alg': 'HS256',
	4705	'typ': 'JWT',
	4706	}
	4707	if headers:
	4708	header_data.update(headers)
	4709	header_b64 = base64.b64encode(json.dumps(header_data).encode())
	4710	payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
	4711	h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
	4712	signature_b64 = base64.b64encode(h.digest())
	4713	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	4714	return token
	4715
	4716
	4717	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	4718	def jwt_decode_hs256(jwt):
	4719	header_b64, payload_b64, signature_b64 = jwt.split('.')
	4720	# add trailing ='s that may have been stripped, superfluous ='s are ignored
	4721	payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
	4722	return payload_data
	4723
	4724
	4725	WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
	4726
	4727
	4728	@functools.cache
	4729	def supports_terminal_sequences(stream):
	4730	if compat_os_name == 'nt':
	4731	if not WINDOWS_VT_MODE:
	4732	return False
	4733	elif not os.getenv('TERM'):
	4734	return False
	4735	try:
	4736	return stream.isatty()
	4737	except BaseException:
	4738	return False
	4739
	4740
	4741	def windows_enable_vt_mode():
	4742	"""Ref: https://bugs.python.org/issue30075 """
	4743	if get_windows_version() < (10, 0, 10586):
	4744	return
	4745
	4746	import ctypes
	4747	import ctypes.wintypes
	4748	import msvcrt
	4749
	4750	ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
	4751
	4752	dll = ctypes.WinDLL('kernel32', use_last_error=False)
	4753	handle = os.open('CONOUT$', os.O_RDWR)
	4754	try:
	4755	h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
	4756	dw_original_mode = ctypes.wintypes.DWORD()
	4757	success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
	4758	if not success:
	4759	raise Exception('GetConsoleMode failed')
	4760
	4761	success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
	4762	dw_original_mode.value \| ENABLE_VIRTUAL_TERMINAL_PROCESSING))
	4763	if not success:
	4764	raise Exception('SetConsoleMode failed')
	4765	finally:
	4766	os.close(handle)
	4767
	4768	global WINDOWS_VT_MODE
	4769	WINDOWS_VT_MODE = True
	4770	supports_terminal_sequences.cache_clear()
	4771
	4772
	4773	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	4774
	4775
	4776	def remove_terminal_sequences(string):
	4777	return _terminal_sequences_re.sub('', string)
	4778
	4779
	4780	def number_of_digits(number):
	4781	return len('%d' % number)
	4782
	4783
	4784	def join_nonempty(*values, delim='-', from_dict=None):
	4785	if from_dict is not None:
	4786	values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
	4787	return delim.join(map(str, filter(None, values)))
	4788
	4789
	4790	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	4791	"""
	4792	Find the largest format dimensions in terms of video width and, for each thumbnail:
	4793	* Modify the URL: Match the width with the provided regex and replace with the former width
	4794	* Update dimensions
	4795
	4796	This function is useful with video services that scale the provided thumbnails on demand
	4797	"""
	4798	_keys = ('width', 'height')
	4799	max_dimensions = max(
	4800	(tuple(format.get(k) or 0 for k in _keys) for format in formats),
	4801	default=(0, 0))
	4802	if not max_dimensions[0]:
	4803	return thumbnails
	4804	return [
	4805	merge_dicts(
	4806	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	4807	dict(zip(_keys, max_dimensions)), thumbnail)
	4808	for thumbnail in thumbnails
	4809	]
	4810
	4811
	4812	def parse_http_range(range):
	4813	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	4814	if not range:
	4815	return None, None, None
	4816	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	4817	if not crg:
	4818	return None, None, None
	4819	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	4820
	4821
	4822	def read_stdin(what):
	4823	if what:
	4824	eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
	4825	write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
	4826	return sys.stdin
	4827
	4828
	4829	def determine_file_encoding(data):
	4830	"""
	4831	Detect the text encoding used
	4832	@returns (encoding, bytes to skip)
	4833	"""
	4834
	4835	# BOM marks are given priority over declarations
	4836	for bom, enc in BOMS:
	4837	if data.startswith(bom):
	4838	return enc, len(bom)
	4839
	4840	# Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
	4841	# We ignore the endianness to get a good enough match
	4842	data = data.replace(b'\0', b'')
	4843	mobj = re.match(rb'(?m)^#\scoding\s:\s(\S+)\s$', data)
	4844	return mobj.group(1).decode() if mobj else None, 0
	4845
	4846
	4847	class Config:
	4848	own_args = None
	4849	parsed_args = None
	4850	filename = None
	4851	__initialized = False
	4852
	4853	def __init__(self, parser, label=None):
	4854	self.parser, self.label = parser, label
	4855	self._loaded_paths, self.configs = set(), []
	4856
	4857	def init(self, args=None, filename=None):
	4858	assert not self.__initialized
	4859	self.own_args, self.filename = args, filename
	4860	return self.load_configs()
	4861
	4862	def load_configs(self):
	4863	directory = ''
	4864	if self.filename:
	4865	location = os.path.realpath(self.filename)
	4866	directory = os.path.dirname(location)
	4867	if location in self._loaded_paths:
	4868	return False
	4869	self._loaded_paths.add(location)
	4870
	4871	self.__initialized = True
	4872	opts, _ = self.parser.parse_known_args(self.own_args)
	4873	self.parsed_args = self.own_args
	4874	for location in opts.config_locations or []:
	4875	if location == '-':
	4876	if location in self._loaded_paths:
	4877	continue
	4878	self._loaded_paths.add(location)
	4879	self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
	4880	continue
	4881	location = os.path.join(directory, expand_path(location))
	4882	if os.path.isdir(location):
	4883	location = os.path.join(location, 'yt-dlp.conf')
	4884	if not os.path.exists(location):
	4885	self.parser.error(f'config location {location} does not exist')
	4886	self.append_config(self.read_file(location), location)
	4887	return True
	4888
	4889	def __str__(self):
	4890	label = join_nonempty(
	4891	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	4892	delim=' ')
	4893	return join_nonempty(
	4894	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	4895	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	4896	delim='\n')
	4897
	4898	@staticmethod
	4899	def read_file(filename, default=[]):
	4900	try:
	4901	optionf = open(filename, 'rb')
	4902	except OSError:
	4903	return default # silently skip if file is not present
	4904	try:
	4905	enc, skip = determine_file_encoding(optionf.read(512))
	4906	optionf.seek(skip, io.SEEK_SET)
	4907	except OSError:
	4908	enc = None # silently skip read errors
	4909	try:
	4910	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	4911	contents = optionf.read().decode(enc or preferredencoding())
	4912	res = shlex.split(contents, comments=True)
	4913	except Exception as err:
	4914	raise ValueError(f'Unable to parse "{filename}": {err}')
	4915	finally:
	4916	optionf.close()
	4917	return res
	4918
	4919	@staticmethod
	4920	def hide_login_info(opts):
	4921	PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
	4922	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	4923
	4924	def _scrub_eq(o):
	4925	m = eqre.match(o)
	4926	if m:
	4927	return m.group('key') + '=PRIVATE'
	4928	else:
	4929	return o
	4930
	4931	opts = list(map(_scrub_eq, opts))
	4932	for idx, opt in enumerate(opts):
	4933	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	4934	opts[idx + 1] = 'PRIVATE'
	4935	return opts
	4936
	4937	def append_config(self, *args, label=None):
	4938	config = type(self)(self.parser, label)
	4939	config._loaded_paths = self._loaded_paths
	4940	if config.init(*args):
	4941	self.configs.append(config)
	4942
	4943	@property
	4944	def all_args(self):
	4945	for config in reversed(self.configs):
	4946	yield from config.all_args
	4947	yield from self.parsed_args or []
	4948
	4949	def parse_known_args(self, **kwargs):
	4950	return self.parser.parse_known_args(self.all_args, **kwargs)
	4951
	4952	def parse_args(self):
	4953	return self.parser.parse_args(self.all_args)
	4954
	4955
	4956	def merge_headers(*dicts):
	4957	"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
	4958	return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
	4959
	4960
	4961	def cached_method(f):
	4962	"""Cache a method"""
	4963	signature = inspect.signature(f)
	4964
	4965	@functools.wraps(f)
	4966	def wrapper(self, args, *kwargs):
	4967	bound_args = signature.bind(self, args, *kwargs)
	4968	bound_args.apply_defaults()
	4969	key = tuple(bound_args.arguments.values())[1:]
	4970
	4971	cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
	4972	if key not in cache:
	4973	cache[key] = f(self, args, *kwargs)
	4974	return cache[key]
	4975	return wrapper
	4976
	4977
	4978	class classproperty:
	4979	"""property access for class methods with optional caching"""
	4980	def __new__(cls, func=None, args, *kwargs):
	4981	if not func:
	4982	return functools.partial(cls, args, *kwargs)
	4983	return super().__new__(cls)
	4984
	4985	def __init__(self, func, *, cache=False):
	4986	functools.update_wrapper(self, func)
	4987	self.func = func
	4988	self._cache = {} if cache else None
	4989
	4990	def __get__(self, _, cls):
	4991	if self._cache is None:
	4992	return self.func(cls)
	4993	elif cls not in self._cache:
	4994	self._cache[cls] = self.func(cls)
	4995	return self._cache[cls]
	4996
	4997
	4998	class function_with_repr:
	4999	def __init__(self, func, repr_=None):
	5000	functools.update_wrapper(self, func)
	5001	self.func, self.__repr = func, repr_
	5002
	5003	def __call__(self, args, *kwargs):
	5004	return self.func(args, *kwargs)
	5005
	5006	@classmethod
	5007	def set_repr(cls, repr_):
	5008	return functools.partial(cls, repr_=repr_)
	5009
	5010	def __repr__(self):
	5011	if self.__repr:
	5012	return self.__repr
	5013	return f'{self.func.__module__}.{self.func.__qualname__}'
	5014
	5015
	5016	class Namespace(types.SimpleNamespace):
	5017	"""Immutable namespace"""
	5018
	5019	def __iter__(self):
	5020	return iter(self.__dict__.values())
	5021
	5022	@property
	5023	def items_(self):
	5024	return self.__dict__.items()
	5025
	5026
	5027	MEDIA_EXTENSIONS = Namespace(
	5028	common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
	5029	video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
	5030	common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
	5031	audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
	5032	thumbnails=('jpg', 'png', 'webp'),
	5033	storyboards=('mhtml', ),
	5034	subtitles=('srt', 'vtt', 'ass', 'lrc'),
	5035	manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
	5036	)
	5037	MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
	5038	MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
	5039
	5040	KNOWN_EXTENSIONS = (MEDIA_EXTENSIONS.video, MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
	5041
	5042
	5043	class RetryManager:
	5044	"""Usage:
	5045	for retry in RetryManager(...):
	5046	try:
	5047	...
	5048	except SomeException as err:
	5049	retry.error = err
	5050	continue
	5051	"""
	5052	attempt, _error = 0, None
	5053
	5054	def __init__(self, _retries, _error_callback, **kwargs):
	5055	self.retries = _retries or 0
	5056	self.error_callback = functools.partial(_error_callback, **kwargs)
	5057
	5058	def _should_retry(self):
	5059	return self._error is not NO_DEFAULT and self.attempt <= self.retries
	5060
	5061	@property
	5062	def error(self):
	5063	if self._error is NO_DEFAULT:
	5064	return None
	5065	return self._error
	5066
	5067	@error.setter
	5068	def error(self, value):
	5069	self._error = value
	5070
	5071	def __iter__(self):
	5072	while self._should_retry():
	5073	self.error = NO_DEFAULT
	5074	self.attempt += 1
	5075	yield self
	5076	if self.error:
	5077	self.error_callback(self.error, self.attempt, self.retries)
	5078
	5079	@staticmethod
	5080	def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
	5081	"""Utility function for reporting retries"""
	5082	if count > retries:
	5083	if error:
	5084	return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
	5085	raise e
	5086
	5087	if not count:
	5088	return warn(e)
	5089	elif isinstance(e, ExtractorError):
	5090	e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
	5091	warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
	5092
	5093	delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
	5094	if delay:
	5095	info(f'Sleeping {delay:.2f} seconds ...')
	5096	time.sleep(delay)
	5097
	5098
	5099	def make_archive_id(ie, video_id):
	5100	ie_key = ie if isinstance(ie, str) else ie.ie_key()
	5101	return f'{ie_key.lower()} {video_id}'
	5102
	5103
	5104	def truncate_string(s, left, right=0):
	5105	assert left > 3 and right >= 0
	5106	if s is None or len(s) <= left + right:
	5107	return s
	5108	return f'{s[:left - 3]}...{s[-right:] if right else ""}'
	5109
	5110
	5111	def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
	5112	assert 'all' in alias_dict, '"all" alias is required'
	5113	requested = list(start or [])
	5114	for val in options:
	5115	discard = val.startswith('-')
	5116	if discard:
	5117	val = val[1:]
	5118
	5119	if val in alias_dict:
	5120	val = alias_dict[val] if not discard else [
	5121	i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
	5122	# NB: Do not allow regex in aliases for performance
	5123	requested = orderedSet_from_options(val, alias_dict, start=requested)
	5124	continue
	5125
	5126	current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
	5127	else [val] if val in alias_dict['all'] else None)
	5128	if current is None:
	5129	raise ValueError(val)
	5130
	5131	if discard:
	5132	for item in current:
	5133	while item in requested:
	5134	requested.remove(item)
	5135	else:
	5136	requested.extend(current)
	5137
	5138	return orderedSet(requested)
	5139
	5140
	5141	# TODO: Rewrite
	5142	class FormatSorter:
	5143	regex = r' ((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.?))?)? *$'
	5144
	5145	default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
	5146	'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
	5147	'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
	5148	ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
	5149	'height', 'width', 'proto', 'vext', 'abr', 'aext',
	5150	'fps', 'fs_approx', 'source', 'id')
	5151
	5152	settings = {
	5153	'vcodec': {'type': 'ordered', 'regex': True,
	5154	'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265\|he?vc?', '[hx]264\|avc', 'vp0?8', 'mp4v\|h263', 'theora', '', None, 'none']},
	5155	'acodec': {'type': 'ordered', 'regex': True,
	5156	'order': ['[af]lac', 'wav\|aiff', 'opus', 'vorbis\|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
	5157	'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
	5158	'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
	5159	'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
	5160	'order': ['(ht\|f)tps', '(ht\|f)tp$', 'm3u8.', '.dash', 'websocket_frag', 'rtmpe?', '', 'mms\|rtsp', 'ws\|websocket', 'f4']},
	5161	'vext': {'type': 'ordered', 'field': 'video_ext',
	5162	'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
	5163	'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
	5164	'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
	5165	'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
	5166	'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
	5167	'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
	5168	'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
	5169	'field': ('vcodec', 'acodec'),
	5170	'function': lambda it: int(any(v != 'none' for v in it))},
	5171	'ie_pref': {'priority': True, 'type': 'extractor'},
	5172	'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5173	'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5174	'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
	5175	'quality': {'convert': 'float', 'default': -1},
	5176	'filesize': {'convert': 'bytes'},
	5177	'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
	5178	'id': {'convert': 'string', 'field': 'format_id'},
	5179	'height': {'convert': 'float_none'},
	5180	'width': {'convert': 'float_none'},
	5181	'fps': {'convert': 'float_none'},
	5182	'channels': {'convert': 'float_none', 'field': 'audio_channels'},
	5183	'tbr': {'convert': 'float_none'},
	5184	'vbr': {'convert': 'float_none'},
	5185	'abr': {'convert': 'float_none'},
	5186	'asr': {'convert': 'float_none'},
	5187	'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
	5188
	5189	'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
	5190	'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
	5191	'function': lambda it: next(filter(None, it), None)},
	5192	'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
	5193	'function': lambda it: next(filter(None, it), None)},
	5194	'ext': {'type': 'combined', 'field': ('vext', 'aext')},
	5195	'res': {'type': 'multiple', 'field': ('height', 'width'),
	5196	'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
	5197
	5198	# Actual field names
	5199	'format_id': {'type': 'alias', 'field': 'id'},
	5200	'preference': {'type': 'alias', 'field': 'ie_pref'},
	5201	'language_preference': {'type': 'alias', 'field': 'lang'},
	5202	'source_preference': {'type': 'alias', 'field': 'source'},
	5203	'protocol': {'type': 'alias', 'field': 'proto'},
	5204	'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
	5205	'audio_channels': {'type': 'alias', 'field': 'channels'},
	5206
	5207	# Deprecated
	5208	'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5209	'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5210	'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
	5211	'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
	5212	'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
	5213	'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
	5214	'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
	5215	'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
	5216	'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
	5217	'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
	5218	'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
	5219	'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
	5220	'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
	5221	'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
	5222	'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5223	'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5224	'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5225	'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5226	'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5227	'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5228	}
	5229
	5230	def __init__(self, ydl, field_preference):
	5231	self.ydl = ydl
	5232	self._order = []
	5233	self.evaluate_params(self.ydl.params, field_preference)
	5234	if ydl.params.get('verbose'):
	5235	self.print_verbose_info(self.ydl.write_debug)
	5236
	5237	def _get_field_setting(self, field, key):
	5238	if field not in self.settings:
	5239	if key in ('forced', 'priority'):
	5240	return False
	5241	self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
	5242	'deprecated and may be removed in a future version')
	5243	self.settings[field] = {}
	5244	propObj = self.settings[field]
	5245	if key not in propObj:
	5246	type = propObj.get('type')
	5247	if key == 'field':
	5248	default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
	5249	elif key == 'convert':
	5250	default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
	5251	else:
	5252	default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
	5253	propObj[key] = default
	5254	return propObj[key]
	5255
	5256	def _resolve_field_value(self, field, value, convertNone=False):
	5257	if value is None:
	5258	if not convertNone:
	5259	return None
	5260	else:
	5261	value = value.lower()
	5262	conversion = self._get_field_setting(field, 'convert')
	5263	if conversion == 'ignore':
	5264	return None
	5265	if conversion == 'string':
	5266	return value
	5267	elif conversion == 'float_none':
	5268	return float_or_none(value)
	5269	elif conversion == 'bytes':
	5270	return parse_bytes(value)
	5271	elif conversion == 'order':
	5272	order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
	5273	use_regex = self._get_field_setting(field, 'regex')
	5274	list_length = len(order_list)
	5275	empty_pos = order_list.index('') if '' in order_list else list_length + 1
	5276	if use_regex and value is not None:
	5277	for i, regex in enumerate(order_list):
	5278	if regex and re.match(regex, value):
	5279	return list_length - i
	5280	return list_length - empty_pos # not in list
	5281	else: # not regex or value = None
	5282	return list_length - (order_list.index(value) if value in order_list else empty_pos)
	5283	else:
	5284	if value.isnumeric():
	5285	return float(value)
	5286	else:
	5287	self.settings[field]['convert'] = 'string'
	5288	return value
	5289
	5290	def evaluate_params(self, params, sort_extractor):
	5291	self._use_free_order = params.get('prefer_free_formats', False)
	5292	self._sort_user = params.get('format_sort', [])
	5293	self._sort_extractor = sort_extractor
	5294
	5295	def add_item(field, reverse, closest, limit_text):
	5296	field = field.lower()
	5297	if field in self._order:
	5298	return
	5299	self._order.append(field)
	5300	limit = self._resolve_field_value(field, limit_text)
	5301	data = {
	5302	'reverse': reverse,
	5303	'closest': False if limit is None else closest,
	5304	'limit_text': limit_text,
	5305	'limit': limit}
	5306	if field in self.settings:
	5307	self.settings[field].update(data)
	5308	else:
	5309	self.settings[field] = data
	5310
	5311	sort_list = (
	5312	tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
	5313	+ (tuple() if params.get('format_sort_force', False)
	5314	else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
	5315	+ tuple(self._sort_user) + tuple(sort_extractor) + self.default)
	5316
	5317	for item in sort_list:
	5318	match = re.match(self.regex, item)
	5319	if match is None:
	5320	raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
	5321	field = match.group('field')
	5322	if field is None:
	5323	continue
	5324	if self._get_field_setting(field, 'type') == 'alias':
	5325	alias, field = field, self._get_field_setting(field, 'field')
	5326	if self._get_field_setting(alias, 'deprecated'):
	5327	self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
	5328	f'be removed in a future version. Please use {field} instead')
	5329	reverse = match.group('reverse') is not None
	5330	closest = match.group('separator') == '~'
	5331	limit_text = match.group('limit')
	5332
	5333	has_limit = limit_text is not None
	5334	has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
	5335	has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
	5336
	5337	fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
	5338	limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
	5339	limit_count = len(limits)
	5340	for (i, f) in enumerate(fields):
	5341	add_item(f, reverse, closest,
	5342	limits[i] if i < limit_count
	5343	else limits[0] if has_limit and not has_multiple_limits
	5344	else None)
	5345
	5346	def print_verbose_info(self, write_debug):
	5347	if self._sort_user:
	5348	write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
	5349	if self._sort_extractor:
	5350	write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
	5351	write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
	5352	'+' if self._get_field_setting(field, 'reverse') else '', field,
	5353	'%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
	5354	self._get_field_setting(field, 'limit_text'),
	5355	self._get_field_setting(field, 'limit'))
	5356	if self._get_field_setting(field, 'limit_text') is not None else '')
	5357	for field in self._order if self._get_field_setting(field, 'visible')]))
	5358
	5359	def _calculate_field_preference_from_value(self, format, field, type, value):
	5360	reverse = self._get_field_setting(field, 'reverse')
	5361	closest = self._get_field_setting(field, 'closest')
	5362	limit = self._get_field_setting(field, 'limit')
	5363
	5364	if type == 'extractor':
	5365	maximum = self._get_field_setting(field, 'max')
	5366	if value is None or (maximum is not None and value >= maximum):
	5367	value = -1
	5368	elif type == 'boolean':
	5369	in_list = self._get_field_setting(field, 'in_list')
	5370	not_in_list = self._get_field_setting(field, 'not_in_list')
	5371	value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
	5372	elif type == 'ordered':
	5373	value = self._resolve_field_value(field, value, True)
	5374
	5375	# try to convert to number
	5376	val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
	5377	is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
	5378	if is_num:
	5379	value = val_num
	5380
	5381	return ((-10, 0) if value is None
	5382	else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
	5383	else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
	5384	else (0, value, 0) if not reverse and (limit is None or value <= limit)
	5385	else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
	5386	else (-1, value, 0))
	5387
	5388	def _calculate_field_preference(self, format, field):
	5389	type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
	5390	get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
	5391	if type == 'multiple':
	5392	type = 'field' # Only 'field' is allowed in multiple for now
	5393	actual_fields = self._get_field_setting(field, 'field')
	5394
	5395	value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
	5396	else:
	5397	value = get_value(field)
	5398	return self._calculate_field_preference_from_value(format, field, type, value)
	5399
	5400	def calculate_preference(self, format):
	5401	# Determine missing protocol
	5402	if not format.get('protocol'):
	5403	format['protocol'] = determine_protocol(format)
	5404
	5405	# Determine missing ext
	5406	if not format.get('ext') and 'url' in format:
	5407	format['ext'] = determine_ext(format['url'])
	5408	if format.get('vcodec') == 'none':
	5409	format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
	5410	format['video_ext'] = 'none'
	5411	else:
	5412	format['video_ext'] = format['ext']
	5413	format['audio_ext'] = 'none'
	5414	# if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
	5415	# format['preference'] = -1000
	5416
	5417	if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265\|he?vc?', format.get('vcodec') or ''):
	5418	# HEVC-over-FLV is out-of-spec by FLV's original spec
	5419	# ref. https://trac.ffmpeg.org/ticket/6389
	5420	# ref. https://github.com/yt-dlp/yt-dlp/pull/5821
	5421	format['preference'] = -100
	5422
	5423	# Determine missing bitrates
	5424	if format.get('vcodec') == 'none':
	5425	format['vbr'] = 0
	5426	if format.get('acodec') == 'none':
	5427	format['abr'] = 0
	5428	if not format.get('vbr') and format.get('vcodec') != 'none':
	5429	format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
	5430	if not format.get('abr') and format.get('acodec') != 'none':
	5431	format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
	5432	if not format.get('tbr'):
	5433	format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
	5434
	5435	return tuple(self._calculate_field_preference(format, field) for field in self._order)
	5436
	5437
	5438	def filesize_from_tbr(tbr, duration):
	5439	"""
	5440	@param tbr: Total bitrate in kbps (1000 bits/sec)
	5441	@param duration: Duration in seconds
	5442	@returns Filesize in bytes
	5443	"""
	5444	if tbr is None or duration is None:
	5445	return None
	5446	return int(duration * tbr * (1000 / 8))
	5447
	5448
	5449	# XXX: Temporary
	5450	class _YDLLogger:
	5451	def __init__(self, ydl=None):
	5452	self._ydl = ydl
	5453
	5454	def debug(self, message):
	5455	if self._ydl:
	5456	self._ydl.write_debug(message)
	5457
	5458	def info(self, message):
	5459	if self._ydl:
	5460	self._ydl.to_screen(message)
	5461
	5462	def warning(self, message, *, once=False):
	5463	if self._ydl:
	5464	self._ydl.report_warning(message, once)
	5465
	5466	def error(self, message, *, is_error=True):
	5467	if self._ydl:
	5468	self._ydl.report_error(message, is_error=is_error)
	5469
	5470	def stdout(self, message):
	5471	if self._ydl:
	5472	self._ydl.to_stdout(message)
	5473
	5474	def stderr(self, message):
	5475	if self._ydl:
	5476	self._ydl.to_stderr(message)