jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import base64
	2	import binascii
	3	import calendar
	4	import codecs
	5	import collections
	6	import collections.abc
	7	import contextlib
	8	import datetime as dt
	9	import email.header
	10	import email.utils
	11	import errno
	12	import hashlib
	13	import hmac
	14	import html.entities
	15	import html.parser
	16	import inspect
	17	import io
	18	import itertools
	19	import json
	20	import locale
	21	import math
	22	import mimetypes
	23	import netrc
	24	import operator
	25	import os
	26	import platform
	27	import random
	28	import re
	29	import shlex
	30	import socket
	31	import ssl
	32	import struct
	33	import subprocess
	34	import sys
	35	import tempfile
	36	import time
	37	import traceback
	38	import types
	39	import unicodedata
	40	import urllib.error
	41	import urllib.parse
	42	import urllib.request
	43	import xml.etree.ElementTree
	44
	45	from . import traversal
	46
	47	from ..compat import functools # isort: split
	48	from ..compat import (
	49	compat_etree_fromstring,
	50	compat_expanduser,
	51	compat_HTMLParseError,
	52	compat_os_name,
	53	)
	54	from ..dependencies import xattr
	55
	56	__name__ = __name__.rsplit('.', 1)[0] # Pretend to be the parent module
	57
	58	# This is not clearly defined otherwise
	59	compiled_regex_type = type(re.compile(''))
	60
	61
	62	class NO_DEFAULT:
	63	pass
	64
	65
	66	def IDENTITY(x):
	67	return x
	68
	69
	70	ENGLISH_MONTH_NAMES = [
	71	'January', 'February', 'March', 'April', 'May', 'June',
	72	'July', 'August', 'September', 'October', 'November', 'December']
	73
	74	MONTH_NAMES = {
	75	'en': ENGLISH_MONTH_NAMES,
	76	'fr': [
	77	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
	78	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
	79	# these follow the genitive grammatical case (dopełniacz)
	80	# some websites might be using nominative, which will require another month list
	81	# https://en.wikibooks.org/wiki/Polish/Noun_cases
	82	'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
	83	'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
	84	}
	85
	86	# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
	87	TIMEZONE_NAMES = {
	88	'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
	89	'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
	90	'EST': -5, 'EDT': -4, # Eastern
	91	'CST': -6, 'CDT': -5, # Central
	92	'MST': -7, 'MDT': -6, # Mountain
	93	'PST': -8, 'PDT': -7 # Pacific
	94	}
	95
	96	# needed for sanitizing filenames in restricted mode
	97	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
	98	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
	99	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
	100
	101	DATE_FORMATS = (
	102	'%d %B %Y',
	103	'%d %b %Y',
	104	'%B %d %Y',
	105	'%B %dst %Y',
	106	'%B %dnd %Y',
	107	'%B %drd %Y',
	108	'%B %dth %Y',
	109	'%b %d %Y',
	110	'%b %dst %Y',
	111	'%b %dnd %Y',
	112	'%b %drd %Y',
	113	'%b %dth %Y',
	114	'%b %dst %Y %I:%M',
	115	'%b %dnd %Y %I:%M',
	116	'%b %drd %Y %I:%M',
	117	'%b %dth %Y %I:%M',
	118	'%Y %m %d',
	119	'%Y-%m-%d',
	120	'%Y.%m.%d.',
	121	'%Y/%m/%d',
	122	'%Y/%m/%d %H:%M',
	123	'%Y/%m/%d %H:%M:%S',
	124	'%Y%m%d%H%M',
	125	'%Y%m%d%H%M%S',
	126	'%Y%m%d',
	127	'%Y-%m-%d %H:%M',
	128	'%Y-%m-%d %H:%M:%S',
	129	'%Y-%m-%d %H:%M:%S.%f',
	130	'%Y-%m-%d %H:%M:%S:%f',
	131	'%d.%m.%Y %H:%M',
	132	'%d.%m.%Y %H.%M',
	133	'%Y-%m-%dT%H:%M:%SZ',
	134	'%Y-%m-%dT%H:%M:%S.%fZ',
	135	'%Y-%m-%dT%H:%M:%S.%f0Z',
	136	'%Y-%m-%dT%H:%M:%S',
	137	'%Y-%m-%dT%H:%M:%S.%f',
	138	'%Y-%m-%dT%H:%M',
	139	'%b %d %Y at %H:%M',
	140	'%b %d %Y at %H:%M:%S',
	141	'%B %d %Y at %H:%M',
	142	'%B %d %Y at %H:%M:%S',
	143	'%H:%M %d-%b-%Y',
	144	)
	145
	146	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	147	DATE_FORMATS_DAY_FIRST.extend([
	148	'%d-%m-%Y',
	149	'%d.%m.%Y',
	150	'%d.%m.%y',
	151	'%d/%m/%Y',
	152	'%d/%m/%y',
	153	'%d/%m/%Y %H:%M:%S',
	154	'%d-%m-%Y %H:%M',
	155	'%H:%M %d/%m/%Y',
	156	])
	157
	158	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	159	DATE_FORMATS_MONTH_FIRST.extend([
	160	'%m-%d-%Y',
	161	'%m.%d.%Y',
	162	'%m/%d/%Y',
	163	'%m/%d/%y',
	164	'%m/%d/%Y %H:%M:%S',
	165	])
	166
	167	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
	168	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]>\s(?P<json_ld>{.+?}\|\[.+?\])\s*</script>'
	169
	170	NUMBER_RE = r'\d+(?:\.\d+)?'
	171
	172
	173	@functools.cache
	174	def preferredencoding():
	175	"""Get preferred encoding.
	176
	177	Returns the best encoding scheme for the system, based on
	178	locale.getpreferredencoding() and some further tweaks.
	179	"""
	180	try:
	181	pref = locale.getpreferredencoding()
	182	'TEST'.encode(pref)
	183	except Exception:
	184	pref = 'UTF-8'
	185
	186	return pref
	187
	188
	189	def write_json_file(obj, fn):
	190	""" Encode obj as JSON and write it to fn, atomically if possible """
	191
	192	tf = tempfile.NamedTemporaryFile(
	193	prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
	194	suffix='.tmp', delete=False, mode='w', encoding='utf-8')
	195
	196	try:
	197	with tf:
	198	json.dump(obj, tf, ensure_ascii=False)
	199	if sys.platform == 'win32':
	200	# Need to remove existing file on Windows, else os.rename raises
	201	# WindowsError or FileExistsError.
	202	with contextlib.suppress(OSError):
	203	os.unlink(fn)
	204	with contextlib.suppress(OSError):
	205	mask = os.umask(0)
	206	os.umask(mask)
	207	os.chmod(tf.name, 0o666 & ~mask)
	208	os.rename(tf.name, fn)
	209	except Exception:
	210	with contextlib.suppress(OSError):
	211	os.remove(tf.name)
	212	raise
	213
	214
	215	def find_xpath_attr(node, xpath, key, val=None):
	216	""" Find the xpath xpath[@key=val] """
	217	assert re.match(r'^[a-zA-Z_-]+$', key)
	218	expr = xpath + ('[@%s]' % key if val is None else f"[@{key}='{val}']")
	219	return node.find(expr)
	220
	221	# On python2.6 the xml.etree.ElementTree.Element methods don't support
	222	# the namespace parameter
	223
	224
	225	def xpath_with_ns(path, ns_map):
	226	components = [c.split(':') for c in path.split('/')]
	227	replaced = []
	228	for c in components:
	229	if len(c) == 1:
	230	replaced.append(c[0])
	231	else:
	232	ns, tag = c
	233	replaced.append('{%s}%s' % (ns_map[ns], tag))
	234	return '/'.join(replaced)
	235
	236
	237	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	238	def _find_xpath(xpath):
	239	return node.find(xpath)
	240
	241	if isinstance(xpath, str):
	242	n = _find_xpath(xpath)
	243	else:
	244	for xp in xpath:
	245	n = _find_xpath(xp)
	246	if n is not None:
	247	break
	248
	249	if n is None:
	250	if default is not NO_DEFAULT:
	251	return default
	252	elif fatal:
	253	name = xpath if name is None else name
	254	raise ExtractorError('Could not find XML element %s' % name)
	255	else:
	256	return None
	257	return n
	258
	259
	260	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
	261	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	262	if n is None or n == default:
	263	return n
	264	if n.text is None:
	265	if default is not NO_DEFAULT:
	266	return default
	267	elif fatal:
	268	name = xpath if name is None else name
	269	raise ExtractorError('Could not find XML element\'s text %s' % name)
	270	else:
	271	return None
	272	return n.text
	273
	274
	275	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	276	n = find_xpath_attr(node, xpath, key)
	277	if n is None:
	278	if default is not NO_DEFAULT:
	279	return default
	280	elif fatal:
	281	name = f'{xpath}[@{key}]' if name is None else name
	282	raise ExtractorError('Could not find XML attribute %s' % name)
	283	else:
	284	return None
	285	return n.attrib[key]
	286
	287
	288	def get_element_by_id(id, html, **kwargs):
	289	"""Return the content of the tag with the specified ID in the passed HTML document"""
	290	return get_element_by_attribute('id', id, html, **kwargs)
	291
	292
	293	def get_element_html_by_id(id, html, **kwargs):
	294	"""Return the html of the tag with the specified ID in the passed HTML document"""
	295	return get_element_html_by_attribute('id', id, html, **kwargs)
	296
	297
	298	def get_element_by_class(class_name, html):
	299	"""Return the content of the first tag with the specified class in the passed HTML document"""
	300	retval = get_elements_by_class(class_name, html)
	301	return retval[0] if retval else None
	302
	303
	304	def get_element_html_by_class(class_name, html):
	305	"""Return the html of the first tag with the specified class in the passed HTML document"""
	306	retval = get_elements_html_by_class(class_name, html)
	307	return retval[0] if retval else None
	308
	309
	310	def get_element_by_attribute(attribute, value, html, **kwargs):
	311	retval = get_elements_by_attribute(attribute, value, html, **kwargs)
	312	return retval[0] if retval else None
	313
	314
	315	def get_element_html_by_attribute(attribute, value, html, **kargs):
	316	retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
	317	return retval[0] if retval else None
	318
	319
	320	def get_elements_by_class(class_name, html, **kargs):
	321	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
	322	return get_elements_by_attribute(
	323	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	324	html, escape_value=False)
	325
	326
	327	def get_elements_html_by_class(class_name, html):
	328	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	329	return get_elements_html_by_attribute(
	330	'class', r'[^\'"](?<=[\'"\s])%s(?=[\'"\s])[^\'"]' % re.escape(class_name),
	331	html, escape_value=False)
	332
	333
	334	def get_elements_by_attribute(args, *kwargs):
	335	"""Return the content of the tag with the specified attribute in the passed HTML document"""
	336	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	337
	338
	339	def get_elements_html_by_attribute(args, *kwargs):
	340	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	341	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	342
	343
	344	def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
	345	"""
	346	Return the text (content) and the html (whole) of the tag with the specified
	347	attribute in the passed HTML document
	348	"""
	349	if not value:
	350	return
	351
	352	quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
	353
	354	value = re.escape(value) if escape_value else value
	355
	356	partial_element_re = rf'''(?x)
	357	<(?P<tag>{tag})
	358	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
	359	\s{re.escape(attribute)}\s=\s(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
	360	'''
	361
	362	for m in re.finditer(partial_element_re, html):
	363	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
	364
	365	yield (
	366	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
	367	whole
	368	)
	369
	370
	371	class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
	372	"""
	373	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	374	closing tag for the first opening tag it has encountered, and can be used
	375	as a context manager
	376	"""
	377
	378	class HTMLBreakOnClosingTagException(Exception):
	379	pass
	380
	381	def __init__(self):
	382	self.tagstack = collections.deque()
	383	html.parser.HTMLParser.__init__(self)
	384
	385	def __enter__(self):
	386	return self
	387
	388	def __exit__(self, *_):
	389	self.close()
	390
	391	def close(self):
	392	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	393	# so data remains buffered; we no longer have any interest in it, thus
	394	# override this method to discard it
	395	pass
	396
	397	def handle_starttag(self, tag, _):
	398	self.tagstack.append(tag)
	399
	400	def handle_endtag(self, tag):
	401	if not self.tagstack:
	402	raise compat_HTMLParseError('no tags in the stack')
	403	while self.tagstack:
	404	inner_tag = self.tagstack.pop()
	405	if inner_tag == tag:
	406	break
	407	else:
	408	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	409	if not self.tagstack:
	410	raise self.HTMLBreakOnClosingTagException()
	411
	412
	413	# XXX: This should be far less strict
	414	def get_element_text_and_html_by_tag(tag, html):
	415	"""
	416	For the first element with the specified tag in the passed HTML document
	417	return its' content (text) and the whole element (html)
	418	"""
	419	def find_or_raise(haystack, needle, exc):
	420	try:
	421	return haystack.index(needle)
	422	except ValueError:
	423	raise exc
	424	closing_tag = f'</{tag}>'
	425	whole_start = find_or_raise(
	426	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	427	content_start = find_or_raise(
	428	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	429	content_start += whole_start + 1
	430	with HTMLBreakOnClosingTagParser() as parser:
	431	parser.feed(html[whole_start:content_start])
	432	if not parser.tagstack or parser.tagstack[0] != tag:
	433	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	434	offset = content_start
	435	while offset < len(html):
	436	next_closing_tag_start = find_or_raise(
	437	html[offset:], closing_tag,
	438	compat_HTMLParseError(f'closing {tag} tag not found'))
	439	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	440	try:
	441	parser.feed(html[offset:offset + next_closing_tag_end])
	442	offset += next_closing_tag_end
	443	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	444	return html[content_start:offset + next_closing_tag_start], \
	445	html[whole_start:offset + next_closing_tag_end]
	446	raise compat_HTMLParseError('unexpected end of html')
	447
	448
	449	class HTMLAttributeParser(html.parser.HTMLParser):
	450	"""Trivial HTML parser to gather the attributes for a single element"""
	451
	452	def __init__(self):
	453	self.attrs = {}
	454	html.parser.HTMLParser.__init__(self)
	455
	456	def handle_starttag(self, tag, attrs):
	457	self.attrs = dict(attrs)
	458	raise compat_HTMLParseError('done')
	459
	460
	461	class HTMLListAttrsParser(html.parser.HTMLParser):
	462	"""HTML parser to gather the attributes for the elements of a list"""
	463
	464	def __init__(self):
	465	html.parser.HTMLParser.__init__(self)
	466	self.items = []
	467	self._level = 0
	468
	469	def handle_starttag(self, tag, attrs):
	470	if tag == 'li' and self._level == 0:
	471	self.items.append(dict(attrs))
	472	self._level += 1
	473
	474	def handle_endtag(self, tag):
	475	self._level -= 1
	476
	477
	478	def extract_attributes(html_element):
	479	"""Given a string for an HTML element such as
	480	<el
	481	a="foo" B="bar" c="&98;az" d=boz
	482	empty= noval entity="&"
	483	sq='"' dq="'"
	484	>
	485	Decode and return a dictionary of attributes.
	486	{
	487	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	488	'empty': '', 'noval': None, 'entity': '&',
	489	'sq': '"', 'dq': '\''
	490	}.
	491	"""
	492	parser = HTMLAttributeParser()
	493	with contextlib.suppress(compat_HTMLParseError):
	494	parser.feed(html_element)
	495	parser.close()
	496	return parser.attrs
	497
	498
	499	def parse_list(webpage):
	500	"""Given a string for an series of HTML <li> elements,
	501	return a dictionary of their attributes"""
	502	parser = HTMLListAttrsParser()
	503	parser.feed(webpage)
	504	parser.close()
	505	return parser.items
	506
	507
	508	def clean_html(html):
	509	"""Clean an HTML snippet into a readable string"""
	510
	511	if html is None: # Convenience for sanitizing descriptions etc.
	512	return html
	513
	514	html = re.sub(r'\s+', ' ', html)
	515	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	516	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
	517	# Strip html tags
	518	html = re.sub('<.*?>', '', html)
	519	# Replace html entities
	520	html = unescapeHTML(html)
	521	return html.strip()
	522
	523
	524	class LenientJSONDecoder(json.JSONDecoder):
	525	# TODO: Write tests
	526	def __init__(self, args, transform_source=None, ignore_extra=False, close_objects=0, *kwargs):
	527	self.transform_source, self.ignore_extra = transform_source, ignore_extra
	528	self._close_attempts = 2 * close_objects
	529	super().__init__(args, *kwargs)
	530
	531	@staticmethod
	532	def _close_object(err):
	533	doc = err.doc[:err.pos]
	534	# We need to add comma first to get the correct error message
	535	if err.msg.startswith('Expecting \',\''):
	536	return doc + ','
	537	elif not doc.endswith(','):
	538	return
	539
	540	if err.msg.startswith('Expecting property name'):
	541	return doc[:-1] + '}'
	542	elif err.msg.startswith('Expecting value'):
	543	return doc[:-1] + ']'
	544
	545	def decode(self, s):
	546	if self.transform_source:
	547	s = self.transform_source(s)
	548	for attempt in range(self._close_attempts + 1):
	549	try:
	550	if self.ignore_extra:
	551	return self.raw_decode(s.lstrip())[0]
	552	return super().decode(s)
	553	except json.JSONDecodeError as e:
	554	if e.pos is None:
	555	raise
	556	elif attempt < self._close_attempts:
	557	s = self._close_object(e)
	558	if s is not None:
	559	continue
	560	raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
	561	assert False, 'Too many attempts to decode JSON'
	562
	563
	564	def sanitize_open(filename, open_mode):
	565	"""Try to open the given filename, and slightly tweak it if this fails.
	566
	567	Attempts to open the given filename. If this fails, it tries to change
	568	the filename slightly, step by step, until it's either able to open it
	569	or it fails and raises a final exception, like the standard open()
	570	function.
	571
	572	It returns the tuple (stream, definitive_file_name).
	573	"""
	574	if filename == '-':
	575	if sys.platform == 'win32':
	576	import msvcrt
	577
	578	# stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
	579	with contextlib.suppress(io.UnsupportedOperation):
	580	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
	581	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
	582
	583	for attempt in range(2):
	584	try:
	585	try:
	586	if sys.platform == 'win32':
	587	# FIXME: An exclusive lock also locks the file from being read.
	588	# Since windows locks are mandatory, don't lock the file on windows (for now).
	589	# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
	590	raise LockingUnsupportedError()
	591	stream = locked_file(filename, open_mode, block=False).__enter__()
	592	except OSError:
	593	stream = open(filename, open_mode)
	594	return stream, filename
	595	except OSError as err:
	596	if attempt or err.errno in (errno.EACCES,):
	597	raise
	598	old_filename, filename = filename, sanitize_path(filename)
	599	if old_filename == filename:
	600	raise
	601
	602
	603	def timeconvert(timestr):
	604	"""Convert RFC 2822 defined time string into system timestamp"""
	605	timestamp = None
	606	timetuple = email.utils.parsedate_tz(timestr)
	607	if timetuple is not None:
	608	timestamp = email.utils.mktime_tz(timetuple)
	609	return timestamp
	610
	611
	612	def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
	613	"""Sanitizes a string so it could be used as part of a filename.
	614	@param restricted Use a stricter subset of allowed characters
	615	@param is_id Whether this is an ID that should be kept unchanged if possible.
	616	If unset, yt-dlp's new sanitization rules are in effect
	617	"""
	618	if s == '':
	619	return ''
	620
	621	def replace_insane(char):
	622	if restricted and char in ACCENT_CHARS:
	623	return ACCENT_CHARS[char]
	624	elif not restricted and char == '\n':
	625	return '\0 '
	626	elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?\|/\\':
	627	# Replace with their full-width unicode counterparts
	628	return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
	629	elif char == '?' or ord(char) < 32 or ord(char) == 127:
	630	return ''
	631	elif char == '"':
	632	return '' if restricted else '\''
	633	elif char == ':':
	634	return '\0_\0-' if restricted else '\0 \0-'
	635	elif char in '\\/\|*<>':
	636	return '\0_'
	637	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
	638	return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
	639	return char
	640
	641	# Replace look-alike Unicode glyphs
	642	if restricted and (is_id is NO_DEFAULT or not is_id):
	643	s = unicodedata.normalize('NFKC', s)
	644	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
	645	result = ''.join(map(replace_insane, s))
	646	if is_id is NO_DEFAULT:
	647	result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
	648	STRIP_RE = r'(?:\0.\|[ _-])*'
	649	result = re.sub(f'^\0.{STRIP_RE}\|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
	650	result = result.replace('\0', '') or '_'
	651
	652	if not is_id:
	653	while '__' in result:
	654	result = result.replace('__', '_')
	655	result = result.strip('_')
	656	# Common case of "Foreign band name - English song title"
	657	if restricted and result.startswith('-_'):
	658	result = result[2:]
	659	if result.startswith('-'):
	660	result = '_' + result[len('-'):]
	661	result = result.lstrip('.')
	662	if not result:
	663	result = '_'
	664	return result
	665
	666
	667	def sanitize_path(s, force=False):
	668	"""Sanitizes and normalizes path on Windows"""
	669	# XXX: this handles drive relative paths (c:sth) incorrectly
	670	if sys.platform == 'win32':
	671	force = False
	672	drive_or_unc, _ = os.path.splitdrive(s)
	673	elif force:
	674	drive_or_unc = ''
	675	else:
	676	return s
	677
	678	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
	679	if drive_or_unc:
	680	norm_path.pop(0)
	681	sanitized_path = [
	682	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
	683	for path_part in norm_path]
	684	if drive_or_unc:
	685	sanitized_path.insert(0, drive_or_unc + os.path.sep)
	686	elif force and s and s[0] == os.path.sep:
	687	sanitized_path.insert(0, os.path.sep)
	688	# TODO: Fix behavioral differences <3.12
	689	# The workaround using `normpath` only superficially passes tests
	690	# Ref: https://github.com/python/cpython/pull/100351
	691	return os.path.normpath(os.path.join(*sanitized_path))
	692
	693
	694	def sanitize_url(url, *, scheme='http'):
	695	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
	696	# the number of unwanted failures due to missing protocol
	697	if url is None:
	698	return
	699	elif url.startswith('//'):
	700	return f'{scheme}:{url}'
	701	# Fix some common typos seen so far
	702	COMMON_TYPOS = (
	703	# https://github.com/ytdl-org/youtube-dl/issues/15649
	704	(r'^httpss://', r'https://'),
	705	# https://bx1.be/lives/direct-tv/
	706	(r'^rmtp([es]?)://', r'rtmp\1://'),
	707	)
	708	for mistake, fixup in COMMON_TYPOS:
	709	if re.match(mistake, url):
	710	return re.sub(mistake, fixup, url)
	711	return url
	712
	713
	714	def extract_basic_auth(url):
	715	parts = urllib.parse.urlsplit(url)
	716	if parts.username is None:
	717	return url, None
	718	url = urllib.parse.urlunsplit(parts._replace(netloc=(
	719	parts.hostname if parts.port is None
	720	else '%s:%d' % (parts.hostname, parts.port))))
	721	auth_payload = base64.b64encode(
	722	('%s:%s' % (parts.username, parts.password or '')).encode())
	723	return url, f'Basic {auth_payload.decode()}'
	724
	725
	726	def expand_path(s):
	727	"""Expand shell variables and ~"""
	728	return os.path.expandvars(compat_expanduser(s))
	729
	730
	731	def orderedSet(iterable, *, lazy=False):
	732	"""Remove all duplicates from the input iterable"""
	733	def _iter():
	734	seen = [] # Do not use set since the items can be unhashable
	735	for x in iterable:
	736	if x not in seen:
	737	seen.append(x)
	738	yield x
	739
	740	return _iter() if lazy else list(_iter())
	741
	742
	743	def _htmlentity_transform(entity_with_semicolon):
	744	"""Transforms an HTML entity to a character."""
	745	entity = entity_with_semicolon[:-1]
	746
	747	# Known non-numeric HTML entity
	748	if entity in html.entities.name2codepoint:
	749	return chr(html.entities.name2codepoint[entity])
	750
	751	# TODO: HTML5 allows entities without a semicolon.
	752	# E.g. '&Eacuteric' should be decoded as 'Éric'.
	753	if entity_with_semicolon in html.entities.html5:
	754	return html.entities.html5[entity_with_semicolon]
	755
	756	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
	757	if mobj is not None:
	758	numstr = mobj.group(1)
	759	if numstr.startswith('x'):
	760	base = 16
	761	numstr = '0%s' % numstr
	762	else:
	763	base = 10
	764	# See https://github.com/ytdl-org/youtube-dl/issues/7518
	765	with contextlib.suppress(ValueError):
	766	return chr(int(numstr, base))
	767
	768	# Unknown entity in name, return its literal representation
	769	return '&%s;' % entity
	770
	771
	772	def unescapeHTML(s):
	773	if s is None:
	774	return None
	775	assert isinstance(s, str)
	776
	777	return re.sub(
	778	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
	779
	780
	781	def escapeHTML(text):
	782	return (
	783	text
	784	.replace('&', '&')
	785	.replace('<', '<')
	786	.replace('>', '>')
	787	.replace('"', '"')
	788	.replace("'", ''')
	789	)
	790
	791
	792	class netrc_from_content(netrc.netrc):
	793	def __init__(self, content):
	794	self.hosts, self.macros = {}, {}
	795	with io.StringIO(content) as stream:
	796	self._parse('-', stream, False)
	797
	798
	799	class Popen(subprocess.Popen):
	800	if sys.platform == 'win32':
	801	_startupinfo = subprocess.STARTUPINFO()
	802	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	803	else:
	804	_startupinfo = None
	805
	806	@staticmethod
	807	def _fix_pyinstaller_ld_path(env):
	808	"""Restore LD_LIBRARY_PATH when using PyInstaller
	809	Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
	810	https://github.com/yt-dlp/yt-dlp/issues/4573
	811	"""
	812	if not hasattr(sys, '_MEIPASS'):
	813	return
	814
	815	def _fix(key):
	816	orig = env.get(f'{key}_ORIG')
	817	if orig is None:
	818	env.pop(key, None)
	819	else:
	820	env[key] = orig
	821
	822	_fix('LD_LIBRARY_PATH') # Linux
	823	_fix('DYLD_LIBRARY_PATH') # macOS
	824
	825	def __init__(self, args, remaining, env=None, text=False, shell=False, *kwargs):
	826	if env is None:
	827	env = os.environ.copy()
	828	self._fix_pyinstaller_ld_path(env)
	829
	830	self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
	831	if text is True:
	832	kwargs['universal_newlines'] = True # For 3.6 compatibility
	833	kwargs.setdefault('encoding', 'utf-8')
	834	kwargs.setdefault('errors', 'replace')
	835
	836	if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
	837	if not isinstance(args, str):
	838	args = shell_quote(args, shell=True)
	839	shell = False
	840	# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
	841	env['='] = '"^\n\n"'
	842	args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
	843
	844	super().__init__(args, remaining, env=env, shell=shell, *kwargs, startupinfo=self._startupinfo)
	845
	846	def __comspec(self):
	847	comspec = os.environ.get('ComSpec') or os.path.join(
	848	os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
	849	if os.path.isabs(comspec):
	850	return comspec
	851	raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
	852
	853	def communicate_or_kill(self, args, *kwargs):
	854	try:
	855	return self.communicate(args, *kwargs)
	856	except BaseException: # Including KeyboardInterrupt
	857	self.kill(timeout=None)
	858	raise
	859
	860	def kill(self, *, timeout=0):
	861	super().kill()
	862	if timeout != 0:
	863	self.wait(timeout=timeout)
	864
	865	@classmethod
	866	def run(cls, args, timeout=None, *kwargs):
	867	with cls(args, *kwargs) as proc:
	868	default = '' if proc.__text_mode else b''
	869	stdout, stderr = proc.communicate_or_kill(timeout=timeout)
	870	return stdout or default, stderr or default, proc.returncode
	871
	872
	873	def encodeArgument(s):
	874	# Legacy code that uses byte strings
	875	# Uncomment the following line after fixing all post processors
	876	# assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
	877	return s if isinstance(s, str) else s.decode('ascii')
	878
	879
	880	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	881
	882
	883	def timetuple_from_msec(msec):
	884	secs, msec = divmod(msec, 1000)
	885	mins, secs = divmod(secs, 60)
	886	hrs, mins = divmod(mins, 60)
	887	return _timetuple(hrs, mins, secs, msec)
	888
	889
	890	def formatSeconds(secs, delim=':', msec=False):
	891	time = timetuple_from_msec(secs * 1000)
	892	if time.hours:
	893	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	894	elif time.minutes:
	895	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
	896	else:
	897	ret = '%d' % time.seconds
	898	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
	899
	900
	901	def bug_reports_message(before=';'):
	902	from ..update import REPOSITORY
	903
	904	msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
	905	'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
	906
	907	before = before.rstrip()
	908	if not before or before.endswith(('.', '!', '?')):
	909	msg = msg[0].title() + msg[1:]
	910
	911	return (before + ' ' if before else '') + msg
	912
	913
	914	class YoutubeDLError(Exception):
	915	"""Base exception for YoutubeDL errors."""
	916	msg = None
	917
	918	def __init__(self, msg=None):
	919	if msg is not None:
	920	self.msg = msg
	921	elif self.msg is None:
	922	self.msg = type(self).__name__
	923	super().__init__(self.msg)
	924
	925
	926	class ExtractorError(YoutubeDLError):
	927	"""Error during info extraction."""
	928
	929	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
	930	""" tb, if given, is the original traceback (so that it can be printed out).
	931	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
	932	"""
	933	from ..networking.exceptions import network_exceptions
	934	if sys.exc_info()[0] in network_exceptions:
	935	expected = True
	936
	937	self.orig_msg = str(msg)
	938	self.traceback = tb
	939	self.expected = expected
	940	self.cause = cause
	941	self.video_id = video_id
	942	self.ie = ie
	943	self.exc_info = sys.exc_info() # preserve original exception
	944	if isinstance(self.exc_info[1], ExtractorError):
	945	self.exc_info = self.exc_info[1].exc_info
	946	super().__init__(self.__msg)
	947
	948	@property
	949	def __msg(self):
	950	return ''.join((
	951	format_field(self.ie, None, '[%s] '),
	952	format_field(self.video_id, None, '%s: '),
	953	self.orig_msg,
	954	format_field(self.cause, None, ' (caused by %r)'),
	955	'' if self.expected else bug_reports_message()))
	956
	957	def format_traceback(self):
	958	return join_nonempty(
	959	self.traceback and ''.join(traceback.format_tb(self.traceback)),
	960	self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
	961	delim='\n') or None
	962
	963	def __setattr__(self, name, value):
	964	super().__setattr__(name, value)
	965	if getattr(self, 'msg', None) and name not in ('msg', 'args'):
	966	self.msg = self.__msg or type(self).__name__
	967	self.args = (self.msg, ) # Cannot be property
	968
	969
	970	class UnsupportedError(ExtractorError):
	971	def __init__(self, url):
	972	super().__init__(
	973	'Unsupported URL: %s' % url, expected=True)
	974	self.url = url
	975
	976
	977	class RegexNotFoundError(ExtractorError):
	978	"""Error when a regex didn't match"""
	979	pass
	980
	981
	982	class GeoRestrictedError(ExtractorError):
	983	"""Geographic restriction Error exception.
	984
	985	This exception may be thrown when a video is not available from your
	986	geographic location due to geographic restrictions imposed by a website.
	987	"""
	988
	989	def __init__(self, msg, countries=None, **kwargs):
	990	kwargs['expected'] = True
	991	super().__init__(msg, **kwargs)
	992	self.countries = countries
	993
	994
	995	class UserNotLive(ExtractorError):
	996	"""Error when a channel/user is not live"""
	997
	998	def __init__(self, msg=None, **kwargs):
	999	kwargs['expected'] = True
	1000	super().__init__(msg or 'The channel is not currently live', **kwargs)
	1001
	1002
	1003	class DownloadError(YoutubeDLError):
	1004	"""Download Error exception.
	1005
	1006	This exception may be thrown by FileDownloader objects if they are not
	1007	configured to continue on errors. They will contain the appropriate
	1008	error message.
	1009	"""
	1010
	1011	def __init__(self, msg, exc_info=None):
	1012	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
	1013	super().__init__(msg)
	1014	self.exc_info = exc_info
	1015
	1016
	1017	class EntryNotInPlaylist(YoutubeDLError):
	1018	"""Entry not in playlist exception.
	1019
	1020	This exception will be thrown by YoutubeDL when a requested entry
	1021	is not found in the playlist info_dict
	1022	"""
	1023	msg = 'Entry not found in info'
	1024
	1025
	1026	class SameFileError(YoutubeDLError):
	1027	"""Same File exception.
	1028
	1029	This exception will be thrown by FileDownloader objects if they detect
	1030	multiple files would have to be downloaded to the same file on disk.
	1031	"""
	1032	msg = 'Fixed output name but more than one file to download'
	1033
	1034	def __init__(self, filename=None):
	1035	if filename is not None:
	1036	self.msg += f': {filename}'
	1037	super().__init__(self.msg)
	1038
	1039
	1040	class PostProcessingError(YoutubeDLError):
	1041	"""Post Processing exception.
	1042
	1043	This exception may be raised by PostProcessor's .run() method to
	1044	indicate an error in the postprocessing task.
	1045	"""
	1046
	1047
	1048	class DownloadCancelled(YoutubeDLError):
	1049	""" Exception raised when the download queue should be interrupted """
	1050	msg = 'The download was cancelled'
	1051
	1052
	1053	class ExistingVideoReached(DownloadCancelled):
	1054	""" --break-on-existing triggered """
	1055	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
	1056
	1057
	1058	class RejectedVideoReached(DownloadCancelled):
	1059	""" --break-match-filter triggered """
	1060	msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
	1061
	1062
	1063	class MaxDownloadsReached(DownloadCancelled):
	1064	""" --max-downloads limit has been reached. """
	1065	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1066
	1067
	1068	class ReExtractInfo(YoutubeDLError):
	1069	""" Video info needs to be re-extracted. """
	1070
	1071	def __init__(self, msg, expected=False):
	1072	super().__init__(msg)
	1073	self.expected = expected
	1074
	1075
	1076	class ThrottledDownload(ReExtractInfo):
	1077	""" Download speed below --throttled-rate. """
	1078	msg = 'The download speed is below throttle limit'
	1079
	1080	def __init__(self):
	1081	super().__init__(self.msg, expected=False)
	1082
	1083
	1084	class UnavailableVideoError(YoutubeDLError):
	1085	"""Unavailable Format exception.
	1086
	1087	This exception will be thrown when a video is requested
	1088	in a format that is not available for that video.
	1089	"""
	1090	msg = 'Unable to download video'
	1091
	1092	def __init__(self, err=None):
	1093	if err is not None:
	1094	self.msg += f': {err}'
	1095	super().__init__(self.msg)
	1096
	1097
	1098	class ContentTooShortError(YoutubeDLError):
	1099	"""Content Too Short exception.
	1100
	1101	This exception may be raised by FileDownloader objects when a file they
	1102	download is too small for what the server announced first, indicating
	1103	the connection was probably interrupted.
	1104	"""
	1105
	1106	def __init__(self, downloaded, expected):
	1107	super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
	1108	# Both in bytes
	1109	self.downloaded = downloaded
	1110	self.expected = expected
	1111
	1112
	1113	class XAttrMetadataError(YoutubeDLError):
	1114	def __init__(self, code=None, msg='Unknown error'):
	1115	super().__init__(msg)
	1116	self.code = code
	1117	self.msg = msg
	1118
	1119	# Parsing code and msg
	1120	if (self.code in (errno.ENOSPC, errno.EDQUOT)
	1121	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
	1122	self.reason = 'NO_SPACE'
	1123	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1124	self.reason = 'VALUE_TOO_LONG'
	1125	else:
	1126	self.reason = 'NOT_SUPPORTED'
	1127
	1128
	1129	class XAttrUnavailableError(YoutubeDLError):
	1130	pass
	1131
	1132
	1133	def is_path_like(f):
	1134	return isinstance(f, (str, bytes, os.PathLike))
	1135
	1136
	1137	def extract_timezone(date_str):
	1138	m = re.search(
	1139	r'''(?x)
	1140	^.{8,}? # >=8 char non-TZ prefix, if present
	1141	(?P<tz>Z\| # just the UTC Z, or
	1142	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1143	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1144	[ ]? # optional space
	1145	(?P<sign>\+\|-) # +/-
	1146	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1147	$)
	1148	''', date_str)
	1149	if not m:
	1150	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1151	timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
	1152	if timezone is not None:
	1153	date_str = date_str[:-len(m.group('tz'))]
	1154	timezone = dt.timedelta(hours=timezone or 0)
	1155	else:
	1156	date_str = date_str[:-len(m.group('tz'))]
	1157	if not m.group('sign'):
	1158	timezone = dt.timedelta()
	1159	else:
	1160	sign = 1 if m.group('sign') == '+' else -1
	1161	timezone = dt.timedelta(
	1162	hours=sign * int(m.group('hours')),
	1163	minutes=sign * int(m.group('minutes')))
	1164	return timezone, date_str
	1165
	1166
	1167	def parse_iso8601(date_str, delimiter='T', timezone=None):
	1168	""" Return a UNIX timestamp from the given date """
	1169
	1170	if date_str is None:
	1171	return None
	1172
	1173	date_str = re.sub(r'\.[0-9]+', '', date_str)
	1174
	1175	if timezone is None:
	1176	timezone, date_str = extract_timezone(date_str)
	1177
	1178	with contextlib.suppress(ValueError):
	1179	date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
	1180	dt_ = dt.datetime.strptime(date_str, date_format) - timezone
	1181	return calendar.timegm(dt_.timetuple())
	1182
	1183
	1184	def date_formats(day_first=True):
	1185	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1186
	1187
	1188	def unified_strdate(date_str, day_first=True):
	1189	"""Return a string with the date in the format YYYYMMDD"""
	1190
	1191	if date_str is None:
	1192	return None
	1193	upload_date = None
	1194	# Replace commas
	1195	date_str = date_str.replace(',', ' ')
	1196	# Remove AM/PM + timezone
	1197	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1198	_, date_str = extract_timezone(date_str)
	1199
	1200	for expression in date_formats(day_first):
	1201	with contextlib.suppress(ValueError):
	1202	upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
	1203	if upload_date is None:
	1204	timetuple = email.utils.parsedate_tz(date_str)
	1205	if timetuple:
	1206	with contextlib.suppress(ValueError):
	1207	upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
	1208	if upload_date is not None:
	1209	return str(upload_date)
	1210
	1211
	1212	def unified_timestamp(date_str, day_first=True):
	1213	if not isinstance(date_str, str):
	1214	return None
	1215
	1216	date_str = re.sub(r'\s+', ' ', re.sub(
	1217	r'(?i)[,\|]\|(mon\|tues?\|wed(nes)?\|thu(rs)?\|fri\|sat(ur)?)(day)?', '', date_str))
	1218
	1219	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
	1220	timezone, date_str = extract_timezone(date_str)
	1221
	1222	# Remove AM/PM + timezone
	1223	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1224
	1225	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1226	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1227	if m:
	1228	date_str = date_str[:-len(m.group('tz'))]
	1229
	1230	# Python only supports microseconds, so remove nanoseconds
	1231	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1232	if m:
	1233	date_str = m.group(1)
	1234
	1235	for expression in date_formats(day_first):
	1236	with contextlib.suppress(ValueError):
	1237	dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
	1238	return calendar.timegm(dt_.timetuple())
	1239
	1240	timetuple = email.utils.parsedate_tz(date_str)
	1241	if timetuple:
	1242	return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
	1243
	1244
	1245	def determine_ext(url, default_ext='unknown_video'):
	1246	if url is None or '.' not in url:
	1247	return default_ext
	1248	guess = url.partition('?')[0].rpartition('.')[2]
	1249	if re.match(r'^[A-Za-z0-9]+$', guess):
	1250	return guess
	1251	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
	1252	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
	1253	return guess.rstrip('/')
	1254	else:
	1255	return default_ext
	1256
	1257
	1258	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
	1259	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
	1260
	1261
	1262	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
	1263	R"""
	1264	Return a datetime object from a string.
	1265	Supported format:
	1266	(now\|today\|yesterday\|DATE)([+-]\d+(microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?)?
	1267
	1268	@param format strftime format of DATE
	1269	@param precision Round the datetime object: auto\|microsecond\|second\|minute\|hour\|day
	1270	auto: round to the unit provided in date_str (if applicable).
	1271	"""
	1272	auto_precision = False
	1273	if precision == 'auto':
	1274	auto_precision = True
	1275	precision = 'microsecond'
	1276	today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
	1277	if date_str in ('now', 'today'):
	1278	return today
	1279	if date_str == 'yesterday':
	1280	return today - dt.timedelta(days=1)
	1281	match = re.match(
	1282	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?',
	1283	date_str)
	1284	if match is not None:
	1285	start_time = datetime_from_str(match.group('start'), precision, format)
	1286	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
	1287	unit = match.group('unit')
	1288	if unit == 'month' or unit == 'year':
	1289	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
	1290	unit = 'day'
	1291	else:
	1292	if unit == 'week':
	1293	unit = 'day'
	1294	time *= 7
	1295	delta = dt.timedelta(**{unit + 's': time})
	1296	new_date = start_time + delta
	1297	if auto_precision:
	1298	return datetime_round(new_date, unit)
	1299	return new_date
	1300
	1301	return datetime_round(dt.datetime.strptime(date_str, format), precision)
	1302
	1303
	1304	def date_from_str(date_str, format='%Y%m%d', strict=False):
	1305	R"""
	1306	Return a date object from a string using datetime_from_str
	1307
	1308	@param strict Restrict allowed patterns to "YYYYMMDD" and
	1309	(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?
	1310	"""
	1311	if strict and not re.fullmatch(r'\d{8}\|(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?', date_str):
	1312	raise ValueError(f'Invalid date format "{date_str}"')
	1313	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1314
	1315
	1316	def datetime_add_months(dt_, months):
	1317	"""Increment/Decrement a datetime object by months."""
	1318	month = dt_.month + months - 1
	1319	year = dt_.year + month // 12
	1320	month = month % 12 + 1
	1321	day = min(dt_.day, calendar.monthrange(year, month)[1])
	1322	return dt_.replace(year, month, day)
	1323
	1324
	1325	def datetime_round(dt_, precision='day'):
	1326	"""
	1327	Round a datetime object's time to a specific precision
	1328	"""
	1329	if precision == 'microsecond':
	1330	return dt_
	1331
	1332	unit_seconds = {
	1333	'day': 86400,
	1334	'hour': 3600,
	1335	'minute': 60,
	1336	'second': 1,
	1337	}
	1338	roundto = lambda x, n: ((x + n / 2) // n) * n
	1339	timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
	1340	return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
	1341
	1342
	1343	def hyphenate_date(date_str):
	1344	"""
	1345	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1346	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1347	if match is not None:
	1348	return '-'.join(match.groups())
	1349	else:
	1350	return date_str
	1351
	1352
	1353	class DateRange:
	1354	"""Represents a time interval between two dates"""
	1355
	1356	def __init__(self, start=None, end=None):
	1357	"""start and end must be strings in the format accepted by date"""
	1358	if start is not None:
	1359	self.start = date_from_str(start, strict=True)
	1360	else:
	1361	self.start = dt.datetime.min.date()
	1362	if end is not None:
	1363	self.end = date_from_str(end, strict=True)
	1364	else:
	1365	self.end = dt.datetime.max.date()
	1366	if self.start > self.end:
	1367	raise ValueError('Date range: "%s" , the start date must be before the end date' % self)
	1368
	1369	@classmethod
	1370	def day(cls, day):
	1371	"""Returns a range that only contains the given day"""
	1372	return cls(day, day)
	1373
	1374	def __contains__(self, date):
	1375	"""Check if the date is in the range"""
	1376	if not isinstance(date, dt.date):
	1377	date = date_from_str(date)
	1378	return self.start <= date <= self.end
	1379
	1380	def __repr__(self):
	1381	return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
	1382
	1383	def __str__(self):
	1384	return f'{self.start} to {self.end}'
	1385
	1386	def __eq__(self, other):
	1387	return (isinstance(other, DateRange)
	1388	and self.start == other.start and self.end == other.end)
	1389
	1390
	1391	@functools.cache
	1392	def system_identifier():
	1393	python_implementation = platform.python_implementation()
	1394	if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
	1395	python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
	1396	libc_ver = []
	1397	with contextlib.suppress(OSError): # We may not have access to the executable
	1398	libc_ver = platform.libc_ver()
	1399
	1400	return 'Python %s (%s %s %s) - %s (%s%s)' % (
	1401	platform.python_version(),
	1402	python_implementation,
	1403	platform.machine(),
	1404	platform.architecture()[0],
	1405	platform.platform(),
	1406	ssl.OPENSSL_VERSION,
	1407	format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
	1408	)
	1409
	1410
	1411	@functools.cache
	1412	def get_windows_version():
	1413	''' Get Windows version. returns () if it's not running on Windows '''
	1414	if compat_os_name == 'nt':
	1415	return version_tuple(platform.win32_ver()[1])
	1416	else:
	1417	return ()
	1418
	1419
	1420	def write_string(s, out=None, encoding=None):
	1421	assert isinstance(s, str)
	1422	out = out or sys.stderr
	1423	# `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
	1424	if not out:
	1425	return
	1426
	1427	if compat_os_name == 'nt' and supports_terminal_sequences(out):
	1428	s = re.sub(r'([\r\n]+)', r' \1', s)
	1429
	1430	enc, buffer = None, out
	1431	# `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
	1432	if 'b' in (getattr(out, 'mode', None) or ''):
	1433	enc = encoding or preferredencoding()
	1434	elif hasattr(out, 'buffer'):
	1435	buffer = out.buffer
	1436	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
	1437
	1438	buffer.write(s.encode(enc, 'ignore') if enc else s)
	1439	out.flush()
	1440
	1441
	1442	# TODO: Use global logger
	1443	def deprecation_warning(msg, , printer=None, stacklevel=0, *kwargs):
	1444	from .. import _IN_CLI
	1445	if _IN_CLI:
	1446	if msg in deprecation_warning._cache:
	1447	return
	1448	deprecation_warning._cache.add(msg)
	1449	if printer:
	1450	return printer(f'{msg}{bug_reports_message()}', **kwargs)
	1451	return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
	1452	else:
	1453	import warnings
	1454	warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
	1455
	1456
	1457	deprecation_warning._cache = set()
	1458
	1459
	1460	def bytes_to_intlist(bs):
	1461	if not bs:
	1462	return []
	1463	if isinstance(bs[0], int): # Python 3
	1464	return list(bs)
	1465	else:
	1466	return [ord(c) for c in bs]
	1467
	1468
	1469	def intlist_to_bytes(xs):
	1470	if not xs:
	1471	return b''
	1472	return struct.pack('%dB' % len(xs), *xs)
	1473
	1474
	1475	class LockingUnsupportedError(OSError):
	1476	msg = 'File locking is not supported'
	1477
	1478	def __init__(self):
	1479	super().__init__(self.msg)
	1480
	1481
	1482	# Cross-platform file locking
	1483	if sys.platform == 'win32':
	1484	import ctypes
	1485	import ctypes.wintypes
	1486	import msvcrt
	1487
	1488	class OVERLAPPED(ctypes.Structure):
	1489	_fields_ = [
	1490	('Internal', ctypes.wintypes.LPVOID),
	1491	('InternalHigh', ctypes.wintypes.LPVOID),
	1492	('Offset', ctypes.wintypes.DWORD),
	1493	('OffsetHigh', ctypes.wintypes.DWORD),
	1494	('hEvent', ctypes.wintypes.HANDLE),
	1495	]
	1496
	1497	kernel32 = ctypes.WinDLL('kernel32')
	1498	LockFileEx = kernel32.LockFileEx
	1499	LockFileEx.argtypes = [
	1500	ctypes.wintypes.HANDLE, # hFile
	1501	ctypes.wintypes.DWORD, # dwFlags
	1502	ctypes.wintypes.DWORD, # dwReserved
	1503	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1504	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1505	ctypes.POINTER(OVERLAPPED) # Overlapped
	1506	]
	1507	LockFileEx.restype = ctypes.wintypes.BOOL
	1508	UnlockFileEx = kernel32.UnlockFileEx
	1509	UnlockFileEx.argtypes = [
	1510	ctypes.wintypes.HANDLE, # hFile
	1511	ctypes.wintypes.DWORD, # dwReserved
	1512	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1513	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
	1514	ctypes.POINTER(OVERLAPPED) # Overlapped
	1515	]
	1516	UnlockFileEx.restype = ctypes.wintypes.BOOL
	1517	whole_low = 0xffffffff
	1518	whole_high = 0x7fffffff
	1519
	1520	def _lock_file(f, exclusive, block):
	1521	overlapped = OVERLAPPED()
	1522	overlapped.Offset = 0
	1523	overlapped.OffsetHigh = 0
	1524	overlapped.hEvent = 0
	1525	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
	1526
	1527	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	1528	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	1529	0, whole_low, whole_high, f._lock_file_overlapped_p):
	1530	# NB: No argument form of "ctypes.FormatError" does not work on PyPy
	1531	raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
	1532
	1533	def _unlock_file(f):
	1534	assert f._lock_file_overlapped_p
	1535	handle = msvcrt.get_osfhandle(f.fileno())
	1536	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
	1537	raise OSError('Unlocking file failed: %r' % ctypes.FormatError())
	1538
	1539	else:
	1540	try:
	1541	import fcntl
	1542
	1543	def _lock_file(f, exclusive, block):
	1544	flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
	1545	if not block:
	1546	flags \|= fcntl.LOCK_NB
	1547	try:
	1548	fcntl.flock(f, flags)
	1549	except BlockingIOError:
	1550	raise
	1551	except OSError: # AOSP does not have flock()
	1552	fcntl.lockf(f, flags)
	1553
	1554	def _unlock_file(f):
	1555	with contextlib.suppress(OSError):
	1556	return fcntl.flock(f, fcntl.LOCK_UN)
	1557	with contextlib.suppress(OSError):
	1558	return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
	1559	return fcntl.flock(f, fcntl.LOCK_UN \| fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
	1560
	1561	except ImportError:
	1562
	1563	def _lock_file(f, exclusive, block):
	1564	raise LockingUnsupportedError()
	1565
	1566	def _unlock_file(f):
	1567	raise LockingUnsupportedError()
	1568
	1569
	1570	class locked_file:
	1571	locked = False
	1572
	1573	def __init__(self, filename, mode, block=True, encoding=None):
	1574	if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
	1575	raise NotImplementedError(mode)
	1576	self.mode, self.block = mode, block
	1577
	1578	writable = any(f in mode for f in 'wax+')
	1579	readable = any(f in mode for f in 'r+')
	1580	flags = functools.reduce(operator.ior, (
	1581	getattr(os, 'O_CLOEXEC', 0), # UNIX only
	1582	getattr(os, 'O_BINARY', 0), # Windows only
	1583	getattr(os, 'O_NOINHERIT', 0), # Windows only
	1584	os.O_CREAT if writable else 0, # O_TRUNC only after locking
	1585	os.O_APPEND if 'a' in mode else 0,
	1586	os.O_EXCL if 'x' in mode else 0,
	1587	os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
	1588	))
	1589
	1590	self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
	1591
	1592	def __enter__(self):
	1593	exclusive = 'r' not in self.mode
	1594	try:
	1595	_lock_file(self.f, exclusive, self.block)
	1596	self.locked = True
	1597	except OSError:
	1598	self.f.close()
	1599	raise
	1600	if 'w' in self.mode:
	1601	try:
	1602	self.f.truncate()
	1603	except OSError as e:
	1604	if e.errno not in (
	1605	errno.ESPIPE, # Illegal seek - expected for FIFO
	1606	errno.EINVAL, # Invalid argument - expected for /dev/null
	1607	):
	1608	raise
	1609	return self
	1610
	1611	def unlock(self):
	1612	if not self.locked:
	1613	return
	1614	try:
	1615	_unlock_file(self.f)
	1616	finally:
	1617	self.locked = False
	1618
	1619	def __exit__(self, *_):
	1620	try:
	1621	self.unlock()
	1622	finally:
	1623	self.f.close()
	1624
	1625	open = __enter__
	1626	close = __exit__
	1627
	1628	def __getattr__(self, attr):
	1629	return getattr(self.f, attr)
	1630
	1631	def __iter__(self):
	1632	return iter(self.f)
	1633
	1634
	1635	@functools.cache
	1636	def get_filesystem_encoding():
	1637	encoding = sys.getfilesystemencoding()
	1638	return encoding if encoding is not None else 'utf-8'
	1639
	1640
	1641	_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
	1642	_CMD_QUOTE_TRANS = str.maketrans({
	1643	# Keep quotes balanced by replacing them with `""` instead of `\\"`
	1644	'"': '""',
	1645	# These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
	1646	# `=` should be unique since variables containing `=` cannot be set using cmd
	1647	'\n': '%=%',
	1648	'\r': '%=%',
	1649	# Use zero length variable replacement so `%` doesn't get expanded
	1650	# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
	1651	'%': '%%cd:~,%',
	1652	})
	1653
	1654
	1655	def shell_quote(args, *, shell=False):
	1656	args = list(variadic(args))
	1657
	1658	if compat_os_name != 'nt':
	1659	return shlex.join(args)
	1660
	1661	trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
	1662	return ' '.join(
	1663	s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
	1664	else re.sub(r'(\\+)("\|$)', r'\1\1\2', s).translate(trans).join('""')
	1665	for s in args)
	1666
	1667
	1668	def smuggle_url(url, data):
	1669	""" Pass additional data in a URL for internal use. """
	1670
	1671	url, idata = unsmuggle_url(url, {})
	1672	data.update(idata)
	1673	sdata = urllib.parse.urlencode(
	1674	{'__youtubedl_smuggle': json.dumps(data)})
	1675	return url + '#' + sdata
	1676
	1677
	1678	def unsmuggle_url(smug_url, default=None):
	1679	if '#__youtubedl_smuggle' not in smug_url:
	1680	return smug_url, default
	1681	url, _, sdata = smug_url.rpartition('#')
	1682	jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
	1683	data = json.loads(jsond)
	1684	return url, data
	1685
	1686
	1687	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	1688	""" Formats numbers with decimal sufixes like K, M, etc """
	1689	num, factor = float_or_none(num), float(factor)
	1690	if num is None or num < 0:
	1691	return None
	1692	POSSIBLE_SUFFIXES = 'kMGTPEZY'
	1693	exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
	1694	suffix = ['', *POSSIBLE_SUFFIXES][exponent]
	1695	if factor == 1024:
	1696	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
	1697	converted = num / (factor ** exponent)
	1698	return fmt % (converted, suffix)
	1699
	1700
	1701	def format_bytes(bytes):
	1702	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
	1703
	1704
	1705	def lookup_unit_table(unit_table, s, strict=False):
	1706	num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
	1707	units_re = '\|'.join(re.escape(u) for u in unit_table)
	1708	m = (re.fullmatch if strict else re.match)(
	1709	rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
	1710	if not m:
	1711	return None
	1712
	1713	num = float(m.group('num').replace(',', '.'))
	1714	mult = unit_table[m.group('unit')]
	1715	return round(num * mult)
	1716
	1717
	1718	def parse_bytes(s):
	1719	"""Parse a string indicating a byte quantity into an integer"""
	1720	return lookup_unit_table(
	1721	{u: 1024*i for i, u in enumerate(['', 'KMGTPEZY'])},
	1722	s.upper(), strict=True)
	1723
	1724
	1725	def parse_filesize(s):
	1726	if s is None:
	1727	return None
	1728
	1729	# The lower-case forms are of course incorrect and unofficial,
	1730	# but we support those too
	1731	_UNIT_TABLE = {
	1732	'B': 1,
	1733	'b': 1,
	1734	'bytes': 1,
	1735	'KiB': 1024,
	1736	'KB': 1000,
	1737	'kB': 1024,
	1738	'Kb': 1000,
	1739	'kb': 1000,
	1740	'kilobytes': 1000,
	1741	'kibibytes': 1024,
	1742	'MiB': 1024 ** 2,
	1743	'MB': 1000 ** 2,
	1744	'mB': 1024 ** 2,
	1745	'Mb': 1000 ** 2,
	1746	'mb': 1000 ** 2,
	1747	'megabytes': 1000 ** 2,
	1748	'mebibytes': 1024 ** 2,
	1749	'GiB': 1024 ** 3,
	1750	'GB': 1000 ** 3,
	1751	'gB': 1024 ** 3,
	1752	'Gb': 1000 ** 3,
	1753	'gb': 1000 ** 3,
	1754	'gigabytes': 1000 ** 3,
	1755	'gibibytes': 1024 ** 3,
	1756	'TiB': 1024 ** 4,
	1757	'TB': 1000 ** 4,
	1758	'tB': 1024 ** 4,
	1759	'Tb': 1000 ** 4,
	1760	'tb': 1000 ** 4,
	1761	'terabytes': 1000 ** 4,
	1762	'tebibytes': 1024 ** 4,
	1763	'PiB': 1024 ** 5,
	1764	'PB': 1000 ** 5,
	1765	'pB': 1024 ** 5,
	1766	'Pb': 1000 ** 5,
	1767	'pb': 1000 ** 5,
	1768	'petabytes': 1000 ** 5,
	1769	'pebibytes': 1024 ** 5,
	1770	'EiB': 1024 ** 6,
	1771	'EB': 1000 ** 6,
	1772	'eB': 1024 ** 6,
	1773	'Eb': 1000 ** 6,
	1774	'eb': 1000 ** 6,
	1775	'exabytes': 1000 ** 6,
	1776	'exbibytes': 1024 ** 6,
	1777	'ZiB': 1024 ** 7,
	1778	'ZB': 1000 ** 7,
	1779	'zB': 1024 ** 7,
	1780	'Zb': 1000 ** 7,
	1781	'zb': 1000 ** 7,
	1782	'zettabytes': 1000 ** 7,
	1783	'zebibytes': 1024 ** 7,
	1784	'YiB': 1024 ** 8,
	1785	'YB': 1000 ** 8,
	1786	'yB': 1024 ** 8,
	1787	'Yb': 1000 ** 8,
	1788	'yb': 1000 ** 8,
	1789	'yottabytes': 1000 ** 8,
	1790	'yobibytes': 1024 ** 8,
	1791	}
	1792
	1793	return lookup_unit_table(_UNIT_TABLE, s)
	1794
	1795
	1796	def parse_count(s):
	1797	if s is None:
	1798	return None
	1799
	1800	s = re.sub(r'^[^\d]+\s', '', s).strip()
	1801
	1802	if re.match(r'^[\d,.]+$', s):
	1803	return str_to_int(s)
	1804
	1805	_UNIT_TABLE = {
	1806	'k': 1000,
	1807	'K': 1000,
	1808	'm': 1000 ** 2,
	1809	'M': 1000 ** 2,
	1810	'kk': 1000 ** 2,
	1811	'KK': 1000 ** 2,
	1812	'b': 1000 ** 3,
	1813	'B': 1000 ** 3,
	1814	}
	1815
	1816	ret = lookup_unit_table(_UNIT_TABLE, s)
	1817	if ret is not None:
	1818	return ret
	1819
	1820	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	1821	if mobj:
	1822	return str_to_int(mobj.group(1))
	1823
	1824
	1825	def parse_resolution(s, *, lenient=False):
	1826	if s is None:
	1827	return {}
	1828
	1829	if lenient:
	1830	mobj = re.search(r'(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)', s)
	1831	else:
	1832	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
	1833	if mobj:
	1834	return {
	1835	'width': int(mobj.group('w')),
	1836	'height': int(mobj.group('h')),
	1837	}
	1838
	1839	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
	1840	if mobj:
	1841	return {'height': int(mobj.group(1))}
	1842
	1843	mobj = re.search(r'\b([48])[kK]\b', s)
	1844	if mobj:
	1845	return {'height': int(mobj.group(1)) * 540}
	1846
	1847	return {}
	1848
	1849
	1850	def parse_bitrate(s):
	1851	if not isinstance(s, str):
	1852	return
	1853	mobj = re.search(r'\b(\d+)\s*kbps', s)
	1854	if mobj:
	1855	return int(mobj.group(1))
	1856
	1857
	1858	def month_by_name(name, lang='en'):
	1859	""" Return the number of a month by (locale-independently) English name """
	1860
	1861	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
	1862
	1863	try:
	1864	return month_names.index(name) + 1
	1865	except ValueError:
	1866	return None
	1867
	1868
	1869	def month_by_abbreviation(abbrev):
	1870	""" Return the number of a month by (locale-independently) English
	1871	abbreviations """
	1872
	1873	try:
	1874	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
	1875	except ValueError:
	1876	return None
	1877
	1878
	1879	def fix_xml_ampersands(xml_str):
	1880	"""Replace all the '&' by '&' in XML"""
	1881	return re.sub(
	1882	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
	1883	'&',
	1884	xml_str)
	1885
	1886
	1887	def setproctitle(title):
	1888	assert isinstance(title, str)
	1889
	1890	# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
	1891	try:
	1892	import ctypes
	1893	except ImportError:
	1894	return
	1895
	1896	try:
	1897	libc = ctypes.cdll.LoadLibrary('libc.so.6')
	1898	except OSError:
	1899	return
	1900	except TypeError:
	1901	# LoadLibrary in Windows Python 2.7.13 only expects
	1902	# a bytestring, but since unicode_literals turns
	1903	# every string into a unicode string, it fails.
	1904	return
	1905	title_bytes = title.encode()
	1906	buf = ctypes.create_string_buffer(len(title_bytes))
	1907	buf.value = title_bytes
	1908	try:
	1909	# PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
	1910	libc.prctl(15, buf, 0, 0, 0)
	1911	except AttributeError:
	1912	return # Strange libc, just skip this
	1913
	1914
	1915	def remove_start(s, start):
	1916	return s[len(start):] if s is not None and s.startswith(start) else s
	1917
	1918
	1919	def remove_end(s, end):
	1920	return s[:-len(end)] if s is not None and s.endswith(end) else s
	1921
	1922
	1923	def remove_quotes(s):
	1924	if s is None or len(s) < 2:
	1925	return s
	1926	for quote in ('"', "'", ):
	1927	if s[0] == quote and s[-1] == quote:
	1928	return s[1:-1]
	1929	return s
	1930
	1931
	1932	def get_domain(url):
	1933	"""
	1934	This implementation is inconsistent, but is kept for compatibility.
	1935	Use this only for "webpage_url_domain"
	1936	"""
	1937	return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
	1938
	1939
	1940	def url_basename(url):
	1941	path = urllib.parse.urlparse(url).path
	1942	return path.strip('/').split('/')[-1]
	1943
	1944
	1945	def base_url(url):
	1946	return re.match(r'https?://[^?#]+/', url).group()
	1947
	1948
	1949	def urljoin(base, path):
	1950	if isinstance(path, bytes):
	1951	path = path.decode()
	1952	if not isinstance(path, str) or not path:
	1953	return None
	1954	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
	1955	return path
	1956	if isinstance(base, bytes):
	1957	base = base.decode()
	1958	if not isinstance(base, str) or not re.match(
	1959	r'^(?:https?:)?//', base):
	1960	return None
	1961	return urllib.parse.urljoin(base, path)
	1962
	1963
	1964	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
	1965	if get_attr and v is not None:
	1966	v = getattr(v, get_attr, None)
	1967	try:
	1968	return int(v) * invscale // scale
	1969	except (ValueError, TypeError, OverflowError):
	1970	return default
	1971
	1972
	1973	def str_or_none(v, default=None):
	1974	return default if v is None else str(v)
	1975
	1976
	1977	def str_to_int(int_str):
	1978	""" A more relaxed version of int_or_none """
	1979	if isinstance(int_str, int):
	1980	return int_str
	1981	elif isinstance(int_str, str):
	1982	int_str = re.sub(r'[,\.\+]', '', int_str)
	1983	return int_or_none(int_str)
	1984
	1985
	1986	def float_or_none(v, scale=1, invscale=1, default=None):
	1987	if v is None:
	1988	return default
	1989	try:
	1990	return float(v) * invscale / scale
	1991	except (ValueError, TypeError):
	1992	return default
	1993
	1994
	1995	def bool_or_none(v, default=None):
	1996	return v if isinstance(v, bool) else default
	1997
	1998
	1999	def strip_or_none(v, default=None):
	2000	return v.strip() if isinstance(v, str) else default
	2001
	2002
	2003	def url_or_none(url):
	2004	if not url or not isinstance(url, str):
	2005	return None
	2006	url = url.strip()
	2007	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
	2008
	2009
	2010	def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
	2011	datetime_object = None
	2012	try:
	2013	if isinstance(timestamp, (int, float)): # unix timestamp
	2014	# Using naive datetime here can break timestamp() in Windows
	2015	# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
	2016	# Also, dt.datetime.fromtimestamp breaks for negative timestamps
	2017	# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
	2018	datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
	2019	+ dt.timedelta(seconds=timestamp))
	2020	elif isinstance(timestamp, str): # assume YYYYMMDD
	2021	datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
	2022	date_format = re.sub( # Support %s on windows
	2023	r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
	2024	return datetime_object.strftime(date_format)
	2025	except (ValueError, TypeError, AttributeError):
	2026	return default
	2027
	2028
	2029	def parse_duration(s):
	2030	if not isinstance(s, str):
	2031	return None
	2032	s = s.strip()
	2033	if not s:
	2034	return None
	2035
	2036	days, hours, mins, secs, ms = [None] * 5
	2037	m = re.match(r'''(?x)
	2038	(?P<before_secs>
	2039	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2040	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2041	(?P<ms>[.:][0-9]+)?Z?$
	2042	''', s)
	2043	if m:
	2044	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
	2045	else:
	2046	m = re.match(
	2047	r'''(?ix)(?:P?
	2048	(?:
	2049	[0-9]+\sy(?:ears?)?,?\s
	2050	)?
	2051	(?:
	2052	[0-9]+\sm(?:onths?)?,?\s
	2053	)?
	2054	(?:
	2055	[0-9]+\sw(?:eeks?)?,?\s
	2056	)?
	2057	(?:
	2058	(?P<days>[0-9]+)\sd(?:ays?)?,?\s
	2059	)?
	2060	T)?
	2061	(?:
	2062	(?P<hours>[0-9]+)\sh(?:(?:ou)?rs?)?,?\s
	2063	)?
	2064	(?:
	2065	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?,?\s
	2066	)?
	2067	(?:
	2068	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
	2069	)?Z?$''', s)
	2070	if m:
	2071	days, hours, mins, secs, ms = m.groups()
	2072	else:
	2073	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
	2074	if m:
	2075	hours, mins = m.groups()
	2076	else:
	2077	return None
	2078
	2079	if ms:
	2080	ms = ms.replace(':', '.')
	2081	return sum(float(part or 0) * mult for part, mult in (
	2082	(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
	2083
	2084
	2085	def prepend_extension(filename, ext, expected_real_ext=None):
	2086	name, real_ext = os.path.splitext(filename)
	2087	return (
	2088	f'{name}.{ext}{real_ext}'
	2089	if not expected_real_ext or real_ext[1:] == expected_real_ext
	2090	else f'{filename}.{ext}')
	2091
	2092
	2093	def replace_extension(filename, ext, expected_real_ext=None):
	2094	name, real_ext = os.path.splitext(filename)
	2095	return '{}.{}'.format(
	2096	name if not expected_real_ext or real_ext[1:] == expected_real_ext else filename,
	2097	ext)
	2098
	2099
	2100	def check_executable(exe, args=[]):
	2101	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2102	args can be a list of arguments for a short output (like -version) """
	2103	try:
	2104	Popen.run([exe] + args, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
	2105	except OSError:
	2106	return False
	2107	return exe
	2108
	2109
	2110	def _get_exe_version_output(exe, args):
	2111	try:
	2112	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
	2113	# SIGTTOU if yt-dlp is run in the background.
	2114	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
	2115	stdout, _, ret = Popen.run([encodeArgument(exe)] + args, text=True,
	2116	stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
	2117	if ret:
	2118	return None
	2119	except OSError:
	2120	return False
	2121	return stdout
	2122
	2123
	2124	def detect_exe_version(output, version_re=None, unrecognized='present'):
	2125	assert isinstance(output, str)
	2126	if version_re is None:
	2127	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2128	m = re.search(version_re, output)
	2129	if m:
	2130	return m.group(1)
	2131	else:
	2132	return unrecognized
	2133
	2134
	2135	def get_exe_version(exe, args=['--version'],
	2136	version_re=None, unrecognized=('present', 'broken')):
	2137	""" Returns the version of the specified executable,
	2138	or False if the executable is not present """
	2139	unrecognized = variadic(unrecognized)
	2140	assert len(unrecognized) in (1, 2)
	2141	out = _get_exe_version_output(exe, args)
	2142	if out is None:
	2143	return unrecognized[-1]
	2144	return out and detect_exe_version(out, version_re, unrecognized[0])
	2145
	2146
	2147	def frange(start=0, stop=None, step=1):
	2148	"""Float range"""
	2149	if stop is None:
	2150	start, stop = 0, start
	2151	sign = [-1, 1][step > 0] if step else 0
	2152	while sign * start < sign * stop:
	2153	yield start
	2154	start += step
	2155
	2156
	2157	class LazyList(collections.abc.Sequence):
	2158	"""Lazy immutable list from an iterable
	2159	Note that slices of a LazyList are lists and not LazyList"""
	2160
	2161	class IndexError(IndexError):
	2162	pass
	2163
	2164	def __init__(self, iterable, *, reverse=False, _cache=None):
	2165	self._iterable = iter(iterable)
	2166	self._cache = [] if _cache is None else _cache
	2167	self._reversed = reverse
	2168
	2169	def __iter__(self):
	2170	if self._reversed:
	2171	# We need to consume the entire iterable to iterate in reverse
	2172	yield from self.exhaust()
	2173	return
	2174	yield from self._cache
	2175	for item in self._iterable:
	2176	self._cache.append(item)
	2177	yield item
	2178
	2179	def _exhaust(self):
	2180	self._cache.extend(self._iterable)
	2181	self._iterable = [] # Discard the emptied iterable to make it pickle-able
	2182	return self._cache
	2183
	2184	def exhaust(self):
	2185	"""Evaluate the entire iterable"""
	2186	return self._exhaust()[::-1 if self._reversed else 1]
	2187
	2188	@staticmethod
	2189	def _reverse_index(x):
	2190	return None if x is None else ~x
	2191
	2192	def __getitem__(self, idx):
	2193	if isinstance(idx, slice):
	2194	if self._reversed:
	2195	idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
	2196	start, stop, step = idx.start, idx.stop, idx.step or 1
	2197	elif isinstance(idx, int):
	2198	if self._reversed:
	2199	idx = self._reverse_index(idx)
	2200	start, stop, step = idx, idx, 0
	2201	else:
	2202	raise TypeError('indices must be integers or slices')
	2203	if ((start or 0) < 0 or (stop or 0) < 0
	2204	or (start is None and step < 0)
	2205	or (stop is None and step > 0)):
	2206	# We need to consume the entire iterable to be able to slice from the end
	2207	# Obviously, never use this with infinite iterables
	2208	self._exhaust()
	2209	try:
	2210	return self._cache[idx]
	2211	except IndexError as e:
	2212	raise self.IndexError(e) from e
	2213	n = max(start or 0, stop or 0) - len(self._cache) + 1
	2214	if n > 0:
	2215	self._cache.extend(itertools.islice(self._iterable, n))
	2216	try:
	2217	return self._cache[idx]
	2218	except IndexError as e:
	2219	raise self.IndexError(e) from e
	2220
	2221	def __bool__(self):
	2222	try:
	2223	self[-1] if self._reversed else self[0]
	2224	except self.IndexError:
	2225	return False
	2226	return True
	2227
	2228	def __len__(self):
	2229	self._exhaust()
	2230	return len(self._cache)
	2231
	2232	def __reversed__(self):
	2233	return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
	2234
	2235	def __copy__(self):
	2236	return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
	2237
	2238	def __repr__(self):
	2239	# repr and str should mimic a list. So we exhaust the iterable
	2240	return repr(self.exhaust())
	2241
	2242	def __str__(self):
	2243	return repr(self.exhaust())
	2244
	2245
	2246	class PagedList:
	2247
	2248	class IndexError(IndexError):
	2249	pass
	2250
	2251	def __len__(self):
	2252	# This is only useful for tests
	2253	return len(self.getslice())
	2254
	2255	def __init__(self, pagefunc, pagesize, use_cache=True):
	2256	self._pagefunc = pagefunc
	2257	self._pagesize = pagesize
	2258	self._pagecount = float('inf')
	2259	self._use_cache = use_cache
	2260	self._cache = {}
	2261
	2262	def getpage(self, pagenum):
	2263	page_results = self._cache.get(pagenum)
	2264	if page_results is None:
	2265	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
	2266	if self._use_cache:
	2267	self._cache[pagenum] = page_results
	2268	return page_results
	2269
	2270	def getslice(self, start=0, end=None):
	2271	return list(self._getslice(start, end))
	2272
	2273	def _getslice(self, start, end):
	2274	raise NotImplementedError('This method must be implemented by subclasses')
	2275
	2276	def __getitem__(self, idx):
	2277	assert self._use_cache, 'Indexing PagedList requires cache'
	2278	if not isinstance(idx, int) or idx < 0:
	2279	raise TypeError('indices must be non-negative integers')
	2280	entries = self.getslice(idx, idx + 1)
	2281	if not entries:
	2282	raise self.IndexError()
	2283	return entries[0]
	2284
	2285	def __bool__(self):
	2286	return bool(self.getslice(0, 1))
	2287
	2288
	2289	class OnDemandPagedList(PagedList):
	2290	"""Download pages until a page with less than maximum results"""
	2291
	2292	def _getslice(self, start, end):
	2293	for pagenum in itertools.count(start // self._pagesize):
	2294	firstid = pagenum * self._pagesize
	2295	nextfirstid = pagenum * self._pagesize + self._pagesize
	2296	if start >= nextfirstid:
	2297	continue
	2298
	2299	startv = (
	2300	start % self._pagesize
	2301	if firstid <= start < nextfirstid
	2302	else 0)
	2303	endv = (
	2304	((end - 1) % self._pagesize) + 1
	2305	if (end is not None and firstid <= end <= nextfirstid)
	2306	else None)
	2307
	2308	try:
	2309	page_results = self.getpage(pagenum)
	2310	except Exception:
	2311	self._pagecount = pagenum - 1
	2312	raise
	2313	if startv != 0 or endv is not None:
	2314	page_results = page_results[startv:endv]
	2315	yield from page_results
	2316
	2317	# A little optimization - if current page is not "full", ie. does
	2318	# not contain page_size videos then we can assume that this page
	2319	# is the last one - there are no more ids on further pages -
	2320	# i.e. no need to query again.
	2321	if len(page_results) + startv < self._pagesize:
	2322	break
	2323
	2324	# If we got the whole page, but the next page is not interesting,
	2325	# break out early as well
	2326	if end == nextfirstid:
	2327	break
	2328
	2329
	2330	class InAdvancePagedList(PagedList):
	2331	"""PagedList with total number of pages known in advance"""
	2332
	2333	def __init__(self, pagefunc, pagecount, pagesize):
	2334	PagedList.__init__(self, pagefunc, pagesize, True)
	2335	self._pagecount = pagecount
	2336
	2337	def _getslice(self, start, end):
	2338	start_page = start // self._pagesize
	2339	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
	2340	skip_elems = start - start_page * self._pagesize
	2341	only_more = None if end is None else end - start
	2342	for pagenum in range(start_page, end_page):
	2343	page_results = self.getpage(pagenum)
	2344	if skip_elems:
	2345	page_results = page_results[skip_elems:]
	2346	skip_elems = None
	2347	if only_more is not None:
	2348	if len(page_results) < only_more:
	2349	only_more -= len(page_results)
	2350	else:
	2351	yield from page_results[:only_more]
	2352	break
	2353	yield from page_results
	2354
	2355
	2356	class PlaylistEntries:
	2357	MissingEntry = object()
	2358	is_exhausted = False
	2359
	2360	def __init__(self, ydl, info_dict):
	2361	self.ydl = ydl
	2362
	2363	# _entries must be assigned now since infodict can change during iteration
	2364	entries = info_dict.get('entries')
	2365	if entries is None:
	2366	raise EntryNotInPlaylist('There are no entries')
	2367	elif isinstance(entries, list):
	2368	self.is_exhausted = True
	2369
	2370	requested_entries = info_dict.get('requested_entries')
	2371	self.is_incomplete = requested_entries is not None
	2372	if self.is_incomplete:
	2373	assert self.is_exhausted
	2374	self._entries = [self.MissingEntry] * max(requested_entries or [0])
	2375	for i, entry in zip(requested_entries, entries):
	2376	self._entries[i - 1] = entry
	2377	elif isinstance(entries, (list, PagedList, LazyList)):
	2378	self._entries = entries
	2379	else:
	2380	self._entries = LazyList(entries)
	2381
	2382	PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
	2383	(?P<start>[+-]?\d+)?
	2384	(?P<range>[:-]
	2385	(?P<end>[+-]?\d+\|inf(?:inite)?)?
	2386	(?::(?P<step>[+-]?\d+))?
	2387	)?''')
	2388
	2389	@classmethod
	2390	def parse_playlist_items(cls, string):
	2391	for segment in string.split(','):
	2392	if not segment:
	2393	raise ValueError('There is two or more consecutive commas')
	2394	mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
	2395	if not mobj:
	2396	raise ValueError(f'{segment!r} is not a valid specification')
	2397	start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
	2398	if int_or_none(step) == 0:
	2399	raise ValueError(f'Step in {segment!r} cannot be zero')
	2400	yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
	2401
	2402	def get_requested_items(self):
	2403	playlist_items = self.ydl.params.get('playlist_items')
	2404	playlist_start = self.ydl.params.get('playliststart', 1)
	2405	playlist_end = self.ydl.params.get('playlistend')
	2406	# For backwards compatibility, interpret -1 as whole list
	2407	if playlist_end in (-1, None):
	2408	playlist_end = ''
	2409	if not playlist_items:
	2410	playlist_items = f'{playlist_start}:{playlist_end}'
	2411	elif playlist_start != 1 or playlist_end:
	2412	self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
	2413
	2414	for index in self.parse_playlist_items(playlist_items):
	2415	for i, entry in self[index]:
	2416	yield i, entry
	2417	if not entry:
	2418	continue
	2419	try:
	2420	# The item may have just been added to archive. Don't break due to it
	2421	if not self.ydl.params.get('lazy_playlist'):
	2422	# TODO: Add auto-generated fields
	2423	self.ydl._match_entry(entry, incomplete=True, silent=True)
	2424	except (ExistingVideoReached, RejectedVideoReached):
	2425	return
	2426
	2427	def get_full_count(self):
	2428	if self.is_exhausted and not self.is_incomplete:
	2429	return len(self)
	2430	elif isinstance(self._entries, InAdvancePagedList):
	2431	if self._entries._pagesize == 1:
	2432	return self._entries._pagecount
	2433
	2434	@functools.cached_property
	2435	def _getter(self):
	2436	if isinstance(self._entries, list):
	2437	def get_entry(i):
	2438	try:
	2439	entry = self._entries[i]
	2440	except IndexError:
	2441	entry = self.MissingEntry
	2442	if not self.is_incomplete:
	2443	raise self.IndexError()
	2444	if entry is self.MissingEntry:
	2445	raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
	2446	return entry
	2447	else:
	2448	def get_entry(i):
	2449	try:
	2450	return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
	2451	except (LazyList.IndexError, PagedList.IndexError):
	2452	raise self.IndexError()
	2453	return get_entry
	2454
	2455	def __getitem__(self, idx):
	2456	if isinstance(idx, int):
	2457	idx = slice(idx, idx)
	2458
	2459	# NB: PlaylistEntries[1:10] => (0, 1, ... 9)
	2460	step = 1 if idx.step is None else idx.step
	2461	if idx.start is None:
	2462	start = 0 if step > 0 else len(self) - 1
	2463	else:
	2464	start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
	2465
	2466	# NB: Do not call len(self) when idx == [:]
	2467	if idx.stop is None:
	2468	stop = 0 if step < 0 else float('inf')
	2469	else:
	2470	stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
	2471	stop += [-1, 1][step > 0]
	2472
	2473	for i in frange(start, stop, step):
	2474	if i < 0:
	2475	continue
	2476	try:
	2477	entry = self._getter(i)
	2478	except self.IndexError:
	2479	self.is_exhausted = True
	2480	if step > 0:
	2481	break
	2482	continue
	2483	yield i + 1, entry
	2484
	2485	def __len__(self):
	2486	return len(tuple(self[:]))
	2487
	2488	class IndexError(IndexError):
	2489	pass
	2490
	2491
	2492	def uppercase_escape(s):
	2493	unicode_escape = codecs.getdecoder('unicode_escape')
	2494	return re.sub(
	2495	r'\\U[0-9a-fA-F]{8}',
	2496	lambda m: unicode_escape(m.group(0))[0],
	2497	s)
	2498
	2499
	2500	def lowercase_escape(s):
	2501	unicode_escape = codecs.getdecoder('unicode_escape')
	2502	return re.sub(
	2503	r'\\u[0-9a-fA-F]{4}',
	2504	lambda m: unicode_escape(m.group(0))[0],
	2505	s)
	2506
	2507
	2508	def parse_qs(url, **kwargs):
	2509	return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
	2510
	2511
	2512	def read_batch_urls(batch_fd):
	2513	def fixup(url):
	2514	if not isinstance(url, str):
	2515	url = url.decode('utf-8', 'replace')
	2516	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2517	for bom in BOM_UTF8:
	2518	if url.startswith(bom):
	2519	url = url[len(bom):]
	2520	url = url.lstrip()
	2521	if not url or url.startswith(('#', ';', ']')):
	2522	return False
	2523	# "#" cannot be stripped out since it is part of the URI
	2524	# However, it can be safely stripped out if following a whitespace
	2525	return re.split(r'\s#', url, 1)[0].rstrip()
	2526
	2527	with contextlib.closing(batch_fd) as fd:
	2528	return [url for url in map(fixup, fd) if url]
	2529
	2530
	2531	def urlencode_postdata(args, *kargs):
	2532	return urllib.parse.urlencode(args, *kargs).encode('ascii')
	2533
	2534
	2535	def update_url(url, , query_update=None, *kwargs):
	2536	"""Replace URL components specified by kwargs
	2537	@param url str or parse url tuple
	2538	@param query_update update query
	2539	@returns str
	2540	"""
	2541	if isinstance(url, str):
	2542	if not kwargs and not query_update:
	2543	return url
	2544	else:
	2545	url = urllib.parse.urlparse(url)
	2546	if query_update:
	2547	assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
	2548	kwargs['query'] = urllib.parse.urlencode({
	2549	**urllib.parse.parse_qs(url.query),
	2550	**query_update
	2551	}, True)
	2552	return urllib.parse.urlunparse(url._replace(**kwargs))
	2553
	2554
	2555	def update_url_query(url, query):
	2556	return update_url(url, query_update=query)
	2557
	2558
	2559	def _multipart_encode_impl(data, boundary):
	2560	content_type = 'multipart/form-data; boundary=%s' % boundary
	2561
	2562	out = b''
	2563	for k, v in data.items():
	2564	out += b'--' + boundary.encode('ascii') + b'\r\n'
	2565	if isinstance(k, str):
	2566	k = k.encode()
	2567	if isinstance(v, str):
	2568	v = v.encode()
	2569	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
	2570	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
	2571	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
	2572	if boundary.encode('ascii') in content:
	2573	raise ValueError('Boundary overlaps with data')
	2574	out += content
	2575
	2576	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	2577
	2578	return out, content_type
	2579
	2580
	2581	def multipart_encode(data, boundary=None):
	2582	'''
	2583	Encode a dict to RFC 7578-compliant form-data
	2584
	2585	data:
	2586	A dict where keys and values can be either Unicode or bytes-like
	2587	objects.
	2588	boundary:
	2589	If specified a Unicode object, it's used as the boundary. Otherwise
	2590	a random boundary is generated.
	2591
	2592	Reference: https://tools.ietf.org/html/rfc7578
	2593	'''
	2594	has_specified_boundary = boundary is not None
	2595
	2596	while True:
	2597	if boundary is None:
	2598	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	2599
	2600	try:
	2601	out, content_type = _multipart_encode_impl(data, boundary)
	2602	break
	2603	except ValueError:
	2604	if has_specified_boundary:
	2605	raise
	2606	boundary = None
	2607
	2608	return out, content_type
	2609
	2610
	2611	def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
	2612	if blocked_types is NO_DEFAULT:
	2613	blocked_types = (str, bytes, collections.abc.Mapping)
	2614	return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
	2615
	2616
	2617	def variadic(x, allowed_types=NO_DEFAULT):
	2618	if not isinstance(allowed_types, (tuple, type)):
	2619	deprecation_warning('allowed_types should be a tuple or a type')
	2620	allowed_types = tuple(allowed_types)
	2621	return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
	2622
	2623
	2624	def try_call(*funcs, expected_type=None, args=[], kwargs={}):
	2625	for f in funcs:
	2626	try:
	2627	val = f(args, *kwargs)
	2628	except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
	2629	pass
	2630	else:
	2631	if expected_type is None or isinstance(val, expected_type):
	2632	return val
	2633
	2634
	2635	def try_get(src, getter, expected_type=None):
	2636	return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
	2637
	2638
	2639	def filter_dict(dct, cndn=lambda _, v: v is not None):
	2640	return {k: v for k, v in dct.items() if cndn(k, v)}
	2641
	2642
	2643	def merge_dicts(*dicts):
	2644	merged = {}
	2645	for a_dict in dicts:
	2646	for k, v in a_dict.items():
	2647	if (v is not None and k not in merged
	2648	or isinstance(v, str) and merged[k] == ''):
	2649	merged[k] = v
	2650	return merged
	2651
	2652
	2653	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
	2654	return string if isinstance(string, str) else str(string, encoding, errors)
	2655
	2656
	2657	US_RATINGS = {
	2658	'G': 0,
	2659	'PG': 10,
	2660	'PG-13': 13,
	2661	'R': 16,
	2662	'NC': 18,
	2663	}
	2664
	2665
	2666	TV_PARENTAL_GUIDELINES = {
	2667	'TV-Y': 0,
	2668	'TV-Y7': 7,
	2669	'TV-G': 0,
	2670	'TV-PG': 0,
	2671	'TV-14': 14,
	2672	'TV-MA': 17,
	2673	}
	2674
	2675
	2676	def parse_age_limit(s):
	2677	# isinstance(False, int) is True. So type() must be used instead
	2678	if type(s) is int: # noqa: E721
	2679	return s if 0 <= s <= 21 else None
	2680	elif not isinstance(s, str):
	2681	return None
	2682	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
	2683	if m:
	2684	return int(m.group('age'))
	2685	s = s.upper()
	2686	if s in US_RATINGS:
	2687	return US_RATINGS[s]
	2688	m = re.match(r'^TV[_-]?(%s)$' % '\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES), s)
	2689	if m:
	2690	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
	2691	return None
	2692
	2693
	2694	def strip_jsonp(code):
	2695	return re.sub(
	2696	r'''(?sx)^
	2697	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
	2698	(?:\s&&\s(?P=func_name))?
	2699	\s$\s(?P<callback_data>.*)$;?
	2700	\s?(?://[^\n])*$''',
	2701	r'\g<callback_data>', code)
	2702
	2703
	2704	def js_to_json(code, vars={}, *, strict=False):
	2705	# vars is a dict of var, val pairs to substitute
	2706	STRING_QUOTES = '\'"`'
	2707	STRING_RE = '\|'.join(rf'{q}(?:\\.\|[^\\{q}])*{q}' for q in STRING_QUOTES)
	2708	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
	2709	SKIP_RE = fr'\s(?:{COMMENT_RE})?\s'
	2710	INTEGER_TABLE = (
	2711	(fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
	2712	(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
	2713	)
	2714
	2715	def process_escape(match):
	2716	JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
	2717	escape = match.group(1) or match.group(2)
	2718
	2719	return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
	2720	else R'\u00' if escape == 'x'
	2721	else '' if escape == '\n'
	2722	else escape)
	2723
	2724	def template_substitute(match):
	2725	evaluated = js_to_json(match.group(1), vars, strict=strict)
	2726	if evaluated[0] == '"':
	2727	return json.loads(evaluated)
	2728	return evaluated
	2729
	2730	def fix_kv(m):
	2731	v = m.group(0)
	2732	if v in ('true', 'false', 'null'):
	2733	return v
	2734	elif v in ('undefined', 'void 0'):
	2735	return 'null'
	2736	elif v.startswith('/*') or v.startswith('//') or v.startswith('!') or v == ',':
	2737	return ''
	2738
	2739	if v[0] in STRING_QUOTES:
	2740	v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
	2741	escaped = re.sub(r'(?s)(")\|\\(.)', process_escape, v)
	2742	return f'"{escaped}"'
	2743
	2744	for regex, base in INTEGER_TABLE:
	2745	im = re.match(regex, v)
	2746	if im:
	2747	i = int(im.group(1), base)
	2748	return f'"{i}":' if v.endswith(':') else str(i)
	2749
	2750	if v in vars:
	2751	try:
	2752	if not strict:
	2753	json.loads(vars[v])
	2754	except json.JSONDecodeError:
	2755	return json.dumps(vars[v])
	2756	else:
	2757	return vars[v]
	2758
	2759	if not strict:
	2760	return f'"{v}"'
	2761
	2762	raise ValueError(f'Unknown value: {v}')
	2763
	2764	def create_map(mobj):
	2765	return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
	2766
	2767	code = re.sub(r'(?:new\s+)?Array$(.*?)$', r'[\g<1>]', code)
	2768	code = re.sub(r'new Map$(\[.*?\])?$', create_map, code)
	2769	if not strict:
	2770	code = re.sub(rf'new Date$({STRING_RE})$', r'\g<1>', code)
	2771	code = re.sub(r'new \w+$(.*?)$', lambda m: json.dumps(m.group(0)), code)
	2772	code = re.sub(r'parseInt$[^\d]+(\d+)[^\d]+$', r'\1', code)
	2773	code = re.sub(r'$function\([^)]$\s\{[^}]\}\s\)\s$\s(["\'][^)]["\'])\s$', r'\1', code)
	2774
	2775	return re.sub(rf'''(?sx)
	2776	{STRING_RE}\|
	2777	{COMMENT_RE}\|,(?={SKIP_RE}[\]}}])\|
	2778	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
	2779	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{SKIP_RE}:)?\|
	2780	[0-9]+(?={SKIP_RE}:)\|
	2781	!+
	2782	''', fix_kv, code)
	2783
	2784
	2785	def qualities(quality_ids):
	2786	""" Get a numeric quality value out of a list of possible values """
	2787	def q(qid):
	2788	try:
	2789	return quality_ids.index(qid)
	2790	except ValueError:
	2791	return -1
	2792	return q
	2793
	2794
	2795	POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
	2796
	2797
	2798	DEFAULT_OUTTMPL = {
	2799	'default': '%(title)s [%(id)s].%(ext)s',
	2800	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
	2801	}
	2802	OUTTMPL_TYPES = {
	2803	'chapter': None,
	2804	'subtitle': None,
	2805	'thumbnail': None,
	2806	'description': 'description',
	2807	'annotation': 'annotations.xml',
	2808	'infojson': 'info.json',
	2809	'link': None,
	2810	'pl_video': None,
	2811	'pl_thumbnail': None,
	2812	'pl_description': 'description',
	2813	'pl_infojson': 'info.json',
	2814	}
	2815
	2816	# As of [1] format syntax is:
	2817	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	2818	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
	2819	STR_FORMAT_RE_TMPL = r'''(?x)
	2820	(?<!%)(?P<prefix>(?:%%)*)
	2821	%
	2822	(?P<has_key>$(?P<key>{0})$)?
	2823	(?P<format>
	2824	(?P<conversion>[#0\-+ ]+)?
	2825	(?P<min_width>\d+)?
	2826	(?P<precision>\.\d+)?
	2827	(?P<len_mod>[hlL])? # unused in python
	2828	{1} # conversion type
	2829	)
	2830	'''
	2831
	2832
	2833	STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
	2834
	2835
	2836	def limit_length(s, length):
	2837	""" Add ellipses to overly long strings """
	2838	if s is None:
	2839	return None
	2840	ELLIPSES = '...'
	2841	if len(s) > length:
	2842	return s[:length - len(ELLIPSES)] + ELLIPSES
	2843	return s
	2844
	2845
	2846	def version_tuple(v):
	2847	return tuple(int(e) for e in re.split(r'[-.]', v))
	2848
	2849
	2850	def is_outdated_version(version, limit, assume_new=True):
	2851	if not version:
	2852	return not assume_new
	2853	try:
	2854	return version_tuple(version) < version_tuple(limit)
	2855	except ValueError:
	2856	return not assume_new
	2857
	2858
	2859	def ytdl_is_updateable():
	2860	""" Returns if yt-dlp can be updated with -U """
	2861
	2862	from ..update import is_non_updateable
	2863
	2864	return not is_non_updateable()
	2865
	2866
	2867	def args_to_str(args):
	2868	# Get a short string representation for a subprocess command
	2869	return shell_quote(args)
	2870
	2871
	2872	def error_to_str(err):
	2873	return f'{type(err).__name__}: {err}'
	2874
	2875
	2876	def mimetype2ext(mt, default=NO_DEFAULT):
	2877	if not isinstance(mt, str):
	2878	if default is not NO_DEFAULT:
	2879	return default
	2880	return None
	2881
	2882	MAP = {
	2883	# video
	2884	'3gpp': '3gp',
	2885	'mp2t': 'ts',
	2886	'mp4': 'mp4',
	2887	'mpeg': 'mpeg',
	2888	'mpegurl': 'm3u8',
	2889	'quicktime': 'mov',
	2890	'webm': 'webm',
	2891	'vp9': 'vp9',
	2892	'video/ogg': 'ogv',
	2893	'x-flv': 'flv',
	2894	'x-m4v': 'm4v',
	2895	'x-matroska': 'mkv',
	2896	'x-mng': 'mng',
	2897	'x-mp4-fragmented': 'mp4',
	2898	'x-ms-asf': 'asf',
	2899	'x-ms-wmv': 'wmv',
	2900	'x-msvideo': 'avi',
	2901
	2902	# application (streaming playlists)
	2903	'dash+xml': 'mpd',
	2904	'f4m+xml': 'f4m',
	2905	'hds+xml': 'f4m',
	2906	'vnd.apple.mpegurl': 'm3u8',
	2907	'vnd.ms-sstr+xml': 'ism',
	2908	'x-mpegurl': 'm3u8',
	2909
	2910	# audio
	2911	'audio/mp4': 'm4a',
	2912	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
	2913	# Using .mp3 as it's the most popular one
	2914	'audio/mpeg': 'mp3',
	2915	'audio/webm': 'webm',
	2916	'audio/x-matroska': 'mka',
	2917	'audio/x-mpegurl': 'm3u',
	2918	'midi': 'mid',
	2919	'ogg': 'ogg',
	2920	'wav': 'wav',
	2921	'wave': 'wav',
	2922	'x-aac': 'aac',
	2923	'x-flac': 'flac',
	2924	'x-m4a': 'm4a',
	2925	'x-realaudio': 'ra',
	2926	'x-wav': 'wav',
	2927
	2928	# image
	2929	'avif': 'avif',
	2930	'bmp': 'bmp',
	2931	'gif': 'gif',
	2932	'jpeg': 'jpg',
	2933	'png': 'png',
	2934	'svg+xml': 'svg',
	2935	'tiff': 'tif',
	2936	'vnd.wap.wbmp': 'wbmp',
	2937	'webp': 'webp',
	2938	'x-icon': 'ico',
	2939	'x-jng': 'jng',
	2940	'x-ms-bmp': 'bmp',
	2941
	2942	# caption
	2943	'filmstrip+json': 'fs',
	2944	'smptett+xml': 'tt',
	2945	'ttaf+xml': 'dfxp',
	2946	'ttml+xml': 'ttml',
	2947	'x-ms-sami': 'sami',
	2948
	2949	# misc
	2950	'gzip': 'gz',
	2951	'json': 'json',
	2952	'xml': 'xml',
	2953	'zip': 'zip',
	2954	}
	2955
	2956	mimetype = mt.partition(';')[0].strip().lower()
	2957	_, _, subtype = mimetype.rpartition('/')
	2958
	2959	ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
	2960	if ext:
	2961	return ext
	2962	elif default is not NO_DEFAULT:
	2963	return default
	2964	return subtype.replace('+', '.')
	2965
	2966
	2967	def ext2mimetype(ext_or_url):
	2968	if not ext_or_url:
	2969	return None
	2970	if '.' not in ext_or_url:
	2971	ext_or_url = f'file.{ext_or_url}'
	2972	return mimetypes.guess_type(ext_or_url)[0]
	2973
	2974
	2975	def parse_codecs(codecs_str):
	2976	# http://tools.ietf.org/html/rfc6381
	2977	if not codecs_str:
	2978	return {}
	2979	split_codecs = list(filter(None, map(
	2980	str.strip, codecs_str.strip().strip(',').split(','))))
	2981	vcodec, acodec, scodec, hdr = None, None, None, None
	2982	for full_codec in split_codecs:
	2983	parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
	2984	if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	2985	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	2986	if vcodec:
	2987	continue
	2988	vcodec = full_codec
	2989	if parts[0] in ('dvh1', 'dvhe'):
	2990	hdr = 'DV'
	2991	elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
	2992	hdr = 'HDR10'
	2993	elif parts[:2] == ['vp9', '2']:
	2994	hdr = 'HDR10'
	2995	elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
	2996	'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	2997	acodec = acodec or full_codec
	2998	elif parts[0] in ('stpp', 'wvtt'):
	2999	scodec = scodec or full_codec
	3000	else:
	3001	write_string(f'WARNING: Unknown codec {full_codec}\n')
	3002	if vcodec or acodec or scodec:
	3003	return {
	3004	'vcodec': vcodec or 'none',
	3005	'acodec': acodec or 'none',
	3006	'dynamic_range': hdr,
	3007	**({'scodec': scodec} if scodec is not None else {}),
	3008	}
	3009	elif len(split_codecs) == 2:
	3010	return {
	3011	'vcodec': split_codecs[0],
	3012	'acodec': split_codecs[1],
	3013	}
	3014	return {}
	3015
	3016
	3017	def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
	3018	assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
	3019
	3020	allow_mkv = not preferences or 'mkv' in preferences
	3021
	3022	if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
	3023	return 'mkv' # TODO: any other format allows this?
	3024
	3025	# TODO: All codecs supported by parse_codecs isn't handled here
	3026	COMPATIBLE_CODECS = {
	3027	'mp4': {
	3028	'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
	3029	'h264', 'aacl', 'ec-3', # Set in ISM
	3030	},
	3031	'webm': {
	3032	'av1', 'vp9', 'vp8', 'opus', 'vrbs',
	3033	'vp9x', 'vp8x', # in the webm spec
	3034	},
	3035	}
	3036
	3037	sanitize_codec = functools.partial(
	3038	try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
	3039	vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
	3040
	3041	for ext in preferences or COMPATIBLE_CODECS.keys():
	3042	codec_set = COMPATIBLE_CODECS.get(ext, set())
	3043	if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
	3044	return ext
	3045
	3046	COMPATIBLE_EXTS = (
	3047	{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
	3048	{'webm', 'weba'},
	3049	)
	3050	for ext in preferences or vexts:
	3051	current_exts = {ext, vexts, aexts}
	3052	if ext == 'mkv' or current_exts == {ext} or any(
	3053	ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
	3054	return ext
	3055	return 'mkv' if allow_mkv else preferences[-1]
	3056
	3057
	3058	def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
	3059	getheader = url_handle.headers.get
	3060
	3061	cd = getheader('Content-Disposition')
	3062	if cd:
	3063	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3064	if m:
	3065	e = determine_ext(m.group('filename'), default_ext=None)
	3066	if e:
	3067	return e
	3068
	3069	meta_ext = getheader('x-amz-meta-name')
	3070	if meta_ext:
	3071	e = meta_ext.rpartition('.')[2]
	3072	if e:
	3073	return e
	3074
	3075	return mimetype2ext(getheader('Content-Type'), default=default)
	3076
	3077
	3078	def encode_data_uri(data, mime_type):
	3079	return 'data:%s;base64,%s' % (mime_type, base64.b64encode(data).decode('ascii'))
	3080
	3081
	3082	def age_restricted(content_limit, age_limit):
	3083	""" Returns True iff the content should be blocked """
	3084
	3085	if age_limit is None: # No limit set
	3086	return False
	3087	if content_limit is None:
	3088	return False # Content available for everyone
	3089	return age_limit < content_limit
	3090
	3091
	3092	# List of known byte-order-marks (BOM)
	3093	BOMS = [
	3094	(b'\xef\xbb\xbf', 'utf-8'),
	3095	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3096	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3097	(b'\xff\xfe', 'utf-16-le'),
	3098	(b'\xfe\xff', 'utf-16-be'),
	3099	]
	3100
	3101
	3102	def is_html(first_bytes):
	3103	""" Detect whether a file contains HTML by examining its first bytes. """
	3104
	3105	encoding = 'utf-8'
	3106	for bom, enc in BOMS:
	3107	while first_bytes.startswith(bom):
	3108	encoding, first_bytes = enc, first_bytes[len(bom):]
	3109
	3110	return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
	3111
	3112
	3113	def determine_protocol(info_dict):
	3114	protocol = info_dict.get('protocol')
	3115	if protocol is not None:
	3116	return protocol
	3117
	3118	url = sanitize_url(info_dict['url'])
	3119	if url.startswith('rtmp'):
	3120	return 'rtmp'
	3121	elif url.startswith('mms'):
	3122	return 'mms'
	3123	elif url.startswith('rtsp'):
	3124	return 'rtsp'
	3125
	3126	ext = determine_ext(url)
	3127	if ext == 'm3u8':
	3128	return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
	3129	elif ext == 'f4m':
	3130	return 'f4m'
	3131
	3132	return urllib.parse.urlparse(url).scheme
	3133
	3134
	3135	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3136	""" Render a list of rows, each as a list of values.
	3137	Text after a \t will be right aligned """
	3138	def width(string):
	3139	return len(remove_terminal_sequences(string).replace('\t', ''))
	3140
	3141	def get_max_lens(table):
	3142	return [max(width(str(v)) for v in col) for col in zip(*table)]
	3143
	3144	def filter_using_list(row, filterArray):
	3145	return [col for take, col in itertools.zip_longest(filterArray, row, fillvalue=True) if take]
	3146
	3147	max_lens = get_max_lens(data) if hide_empty else []
	3148	header_row = filter_using_list(header_row, max_lens)
	3149	data = [filter_using_list(row, max_lens) for row in data]
	3150
	3151	table = [header_row] + data
	3152	max_lens = get_max_lens(table)
	3153	extra_gap += 1
	3154	if delim:
	3155	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens]] + data
	3156	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
	3157	for row in table:
	3158	for pos, text in enumerate(map(str, row)):
	3159	if '\t' in text:
	3160	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3161	else:
	3162	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
	3163	ret = '\n'.join(''.join(row).rstrip() for row in table)
	3164	return ret
	3165
	3166
	3167	def _match_one(filter_part, dct, incomplete):
	3168	# TODO: Generalize code with YoutubeDL._build_format_filter
	3169	STRING_OPERATORS = {
	3170	'*=': operator.contains,
	3171	'^=': lambda attr, value: attr.startswith(value),
	3172	'$=': lambda attr, value: attr.endswith(value),
	3173	'~=': lambda attr, value: re.search(value, attr),
	3174	}
	3175	COMPARISON_OPERATORS = {
	3176	**STRING_OPERATORS,
	3177	'<=': operator.le, # "<=" must be defined above "<"
	3178	'<': operator.lt,
	3179	'>=': operator.ge,
	3180	'>': operator.gt,
	3181	'=': operator.eq,
	3182	}
	3183
	3184	if isinstance(incomplete, bool):
	3185	is_incomplete = lambda _: incomplete
	3186	else:
	3187	is_incomplete = lambda k: k in incomplete
	3188
	3189	operator_rex = re.compile(r'''(?x)
	3190	(?P<key>[a-z_]+)
	3191	\s(?P<negation>!\s)?(?P<op>%s)(?P<none_inclusive>\s\?)?\s
	3192	(?:
	3193	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
	3194	(?P<strval>.+?)
	3195	)
	3196	''' % '\|'.join(map(re.escape, COMPARISON_OPERATORS.keys())))
	3197	m = operator_rex.fullmatch(filter_part.strip())
	3198	if m:
	3199	m = m.groupdict()
	3200	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3201	if m['negation']:
	3202	op = lambda attr, value: not unnegated_op(attr, value)
	3203	else:
	3204	op = unnegated_op
	3205	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
	3206	if m['quote']:
	3207	comparison_value = comparison_value.replace(r'\%s' % m['quote'], m['quote'])
	3208	actual_value = dct.get(m['key'])
	3209	numeric_comparison = None
	3210	if isinstance(actual_value, (int, float)):
	3211	# If the original field is a string and matching comparisonvalue is
	3212	# a number we should respect the origin of the original field
	3213	# and process comparison value as a string (see
	3214	# https://github.com/ytdl-org/youtube-dl/issues/11082)
	3215	try:
	3216	numeric_comparison = int(comparison_value)
	3217	except ValueError:
	3218	numeric_comparison = parse_filesize(comparison_value)
	3219	if numeric_comparison is None:
	3220	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3221	if numeric_comparison is None:
	3222	numeric_comparison = parse_duration(comparison_value)
	3223	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
	3224	raise ValueError('Operator %s only supports string values!' % m['op'])
	3225	if actual_value is None:
	3226	return is_incomplete(m['key']) or m['none_inclusive']
	3227	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
	3228
	3229	UNARY_OPERATORS = {
	3230	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
	3231	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
	3232	}
	3233	operator_rex = re.compile(r'''(?x)
	3234	(?P<op>%s)\s*(?P<key>[a-z_]+)
	3235	''' % '\|'.join(map(re.escape, UNARY_OPERATORS.keys())))
	3236	m = operator_rex.fullmatch(filter_part.strip())
	3237	if m:
	3238	op = UNARY_OPERATORS[m.group('op')]
	3239	actual_value = dct.get(m.group('key'))
	3240	if is_incomplete(m.group('key')) and actual_value is None:
	3241	return True
	3242	return op(actual_value)
	3243
	3244	raise ValueError('Invalid filter part %r' % filter_part)
	3245
	3246
	3247	def match_str(filter_str, dct, incomplete=False):
	3248	""" Filter a dictionary with a simple string syntax.
	3249	@returns Whether the filter passes
	3250	@param incomplete Set of keys that is expected to be missing from dct.
	3251	Can be True/False to indicate all/none of the keys may be missing.
	3252	All conditions on incomplete keys pass if the key is missing
	3253	"""
	3254	return all(
	3255	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
	3256	for filter_part in re.split(r'(?<!\\)&', filter_str))
	3257
	3258
	3259	def match_filter_func(filters, breaking_filters=None):
	3260	if not filters and not breaking_filters:
	3261	return None
	3262	repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'
	3263
	3264	breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
	3265	filters = set(variadic(filters or []))
	3266
	3267	interactive = '-' in filters
	3268	if interactive:
	3269	filters.remove('-')
	3270
	3271	@function_with_repr.set_repr(repr_)
	3272	def _match_func(info_dict, incomplete=False):
	3273	ret = breaking_filters(info_dict, incomplete)
	3274	if ret is not None:
	3275	raise RejectedVideoReached(ret)
	3276
	3277	if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
	3278	return NO_DEFAULT if interactive and not incomplete else None
	3279	else:
	3280	video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
	3281	filter_str = ') \| ('.join(map(str.strip, filters))
	3282	return f'{video_title} does not pass filter ({filter_str}), skipping ..'
	3283	return _match_func
	3284
	3285
	3286	class download_range_func:
	3287	def __init__(self, chapters, ranges, from_info=False):
	3288	self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
	3289
	3290	def __call__(self, info_dict, ydl):
	3291
	3292	warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
	3293	else 'Cannot match chapters since chapter information is unavailable')
	3294	for regex in self.chapters or []:
	3295	for i, chapter in enumerate(info_dict.get('chapters') or []):
	3296	if re.search(regex, chapter['title']):
	3297	warning = None
	3298	yield {**chapter, 'index': i}
	3299	if self.chapters and warning:
	3300	ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
	3301
	3302	for start, end in self.ranges or []:
	3303	yield {
	3304	'start_time': self._handle_negative_timestamp(start, info_dict),
	3305	'end_time': self._handle_negative_timestamp(end, info_dict),
	3306	}
	3307
	3308	if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
	3309	yield {
	3310	'start_time': info_dict.get('start_time') or 0,
	3311	'end_time': info_dict.get('end_time') or float('inf'),
	3312	}
	3313	elif not self.ranges and not self.chapters:
	3314	yield {}
	3315
	3316	@staticmethod
	3317	def _handle_negative_timestamp(time, info):
	3318	return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
	3319
	3320	def __eq__(self, other):
	3321	return (isinstance(other, download_range_func)
	3322	and self.chapters == other.chapters and self.ranges == other.ranges)
	3323
	3324	def __repr__(self):
	3325	return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
	3326
	3327
	3328	def parse_dfxp_time_expr(time_expr):
	3329	if not time_expr:
	3330	return
	3331
	3332	mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
	3333	if mobj:
	3334	return float(mobj.group('time_offset'))
	3335
	3336	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
	3337	if mobj:
	3338	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
	3339
	3340
	3341	def srt_subtitles_timecode(seconds):
	3342	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3343
	3344
	3345	def ass_subtitles_timecode(seconds):
	3346	time = timetuple_from_msec(seconds * 1000)
	3347	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
	3348
	3349
	3350	def dfxp2srt(dfxp_data):
	3351	'''
	3352	@param dfxp_data A bytes-like object containing DFXP data
	3353	@returns A unicode object containing converted SRT data
	3354	'''
	3355	LEGACY_NAMESPACES = (
	3356	(b'http://www.w3.org/ns/ttml', [
	3357	b'http://www.w3.org/2004/11/ttaf1',
	3358	b'http://www.w3.org/2006/04/ttaf1',
	3359	b'http://www.w3.org/2006/10/ttaf1',
	3360	]),
	3361	(b'http://www.w3.org/ns/ttml#styling', [
	3362	b'http://www.w3.org/ns/ttml#style',
	3363	]),
	3364	)
	3365
	3366	SUPPORTED_STYLING = [
	3367	'color',
	3368	'fontFamily',
	3369	'fontSize',
	3370	'fontStyle',
	3371	'fontWeight',
	3372	'textDecoration'
	3373	]
	3374
	3375	_x = functools.partial(xpath_with_ns, ns_map={
	3376	'xml': 'http://www.w3.org/XML/1998/namespace',
	3377	'ttml': 'http://www.w3.org/ns/ttml',
	3378	'tts': 'http://www.w3.org/ns/ttml#styling',
	3379	})
	3380
	3381	styles = {}
	3382	default_style = {}
	3383
	3384	class TTMLPElementParser:
	3385	_out = ''
	3386	_unclosed_elements = []
	3387	_applied_styles = []
	3388
	3389	def start(self, tag, attrib):
	3390	if tag in (_x('ttml:br'), 'br'):
	3391	self._out += '\n'
	3392	else:
	3393	unclosed_elements = []
	3394	style = {}
	3395	element_style_id = attrib.get('style')
	3396	if default_style:
	3397	style.update(default_style)
	3398	if element_style_id:
	3399	style.update(styles.get(element_style_id, {}))
	3400	for prop in SUPPORTED_STYLING:
	3401	prop_val = attrib.get(_x('tts:' + prop))
	3402	if prop_val:
	3403	style[prop] = prop_val
	3404	if style:
	3405	font = ''
	3406	for k, v in sorted(style.items()):
	3407	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3408	continue
	3409	if k == 'color':
	3410	font += ' color="%s"' % v
	3411	elif k == 'fontSize':
	3412	font += ' size="%s"' % v
	3413	elif k == 'fontFamily':
	3414	font += ' face="%s"' % v
	3415	elif k == 'fontWeight' and v == 'bold':
	3416	self._out += '<b>'
	3417	unclosed_elements.append('b')
	3418	elif k == 'fontStyle' and v == 'italic':
	3419	self._out += '<i>'
	3420	unclosed_elements.append('i')
	3421	elif k == 'textDecoration' and v == 'underline':
	3422	self._out += '<u>'
	3423	unclosed_elements.append('u')
	3424	if font:
	3425	self._out += '<font' + font + '>'
	3426	unclosed_elements.append('font')
	3427	applied_style = {}
	3428	if self._applied_styles:
	3429	applied_style.update(self._applied_styles[-1])
	3430	applied_style.update(style)
	3431	self._applied_styles.append(applied_style)
	3432	self._unclosed_elements.append(unclosed_elements)
	3433
	3434	def end(self, tag):
	3435	if tag not in (_x('ttml:br'), 'br'):
	3436	unclosed_elements = self._unclosed_elements.pop()
	3437	for element in reversed(unclosed_elements):
	3438	self._out += '</%s>' % element
	3439	if unclosed_elements and self._applied_styles:
	3440	self._applied_styles.pop()
	3441
	3442	def data(self, data):
	3443	self._out += data
	3444
	3445	def close(self):
	3446	return self._out.strip()
	3447
	3448	# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
	3449	# This will not trigger false positives since only UTF-8 text is being replaced
	3450	dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
	3451
	3452	def parse_node(node):
	3453	target = TTMLPElementParser()
	3454	parser = xml.etree.ElementTree.XMLParser(target=target)
	3455	parser.feed(xml.etree.ElementTree.tostring(node))
	3456	return parser.close()
	3457
	3458	for k, v in LEGACY_NAMESPACES:
	3459	for ns in v:
	3460	dfxp_data = dfxp_data.replace(ns, k)
	3461
	3462	dfxp = compat_etree_fromstring(dfxp_data)
	3463	out = []
	3464	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
	3465
	3466	if not paras:
	3467	raise ValueError('Invalid dfxp/TTML subtitle')
	3468
	3469	repeat = False
	3470	while True:
	3471	for style in dfxp.findall(_x('.//ttml:style')):
	3472	style_id = style.get('id') or style.get(_x('xml:id'))
	3473	if not style_id:
	3474	continue
	3475	parent_style_id = style.get('style')
	3476	if parent_style_id:
	3477	if parent_style_id not in styles:
	3478	repeat = True
	3479	continue
	3480	styles[style_id] = styles[parent_style_id].copy()
	3481	for prop in SUPPORTED_STYLING:
	3482	prop_val = style.get(_x('tts:' + prop))
	3483	if prop_val:
	3484	styles.setdefault(style_id, {})[prop] = prop_val
	3485	if repeat:
	3486	repeat = False
	3487	else:
	3488	break
	3489
	3490	for p in ('body', 'div'):
	3491	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3492	if ele is None:
	3493	continue
	3494	style = styles.get(ele.get('style'))
	3495	if not style:
	3496	continue
	3497	default_style.update(style)
	3498
	3499	for para, index in zip(paras, itertools.count(1)):
	3500	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
	3501	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
	3502	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3503	if begin_time is None:
	3504	continue
	3505	if not end_time:
	3506	if not dur:
	3507	continue
	3508	end_time = begin_time + dur
	3509	out.append('%d\n%s --> %s\n%s\n\n' % (
	3510	index,
	3511	srt_subtitles_timecode(begin_time),
	3512	srt_subtitles_timecode(end_time),
	3513	parse_node(para)))
	3514
	3515	return ''.join(out)
	3516
	3517
	3518	def cli_option(params, command_option, param, separator=None):
	3519	param = params.get(param)
	3520	return ([] if param is None
	3521	else [command_option, str(param)] if separator is None
	3522	else [f'{command_option}{separator}{param}'])
	3523
	3524
	3525	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3526	param = params.get(param)
	3527	assert param in (True, False, None)
	3528	return cli_option({True: true_value, False: false_value}, command_option, param, separator)
	3529
	3530
	3531	def cli_valueless_option(params, command_option, param, expected_value=True):
	3532	return [command_option] if params.get(param) == expected_value else []
	3533
	3534
	3535	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
	3536	if isinstance(argdict, (list, tuple)): # for backward compatibility
	3537	if use_compat:
	3538	return argdict
	3539	else:
	3540	argdict = None
	3541	if argdict is None:
	3542	return default
	3543	assert isinstance(argdict, dict)
	3544
	3545	assert isinstance(keys, (list, tuple))
	3546	for key_list in keys:
	3547	arg_list = list(filter(
	3548	lambda x: x is not None,
	3549	[argdict.get(key.lower()) for key in variadic(key_list)]))
	3550	if arg_list:
	3551	return [arg for args in arg_list for arg in args]
	3552	return default
	3553
	3554
	3555	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3556	main_key, exe = main_key.lower(), exe.lower()
	3557	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3558	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3559	if root_key in keys:
	3560	if main_key != exe:
	3561	keys.append((main_key, exe))
	3562	keys.append('default')
	3563	else:
	3564	use_compat = False
	3565	return cli_configuration_args(argdict, keys, default, use_compat)
	3566
	3567
	3568	class ISO639Utils:
	3569	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3570	_lang_map = {
	3571	'aa': 'aar',
	3572	'ab': 'abk',
	3573	'ae': 'ave',
	3574	'af': 'afr',
	3575	'ak': 'aka',
	3576	'am': 'amh',
	3577	'an': 'arg',
	3578	'ar': 'ara',
	3579	'as': 'asm',
	3580	'av': 'ava',
	3581	'ay': 'aym',
	3582	'az': 'aze',
	3583	'ba': 'bak',
	3584	'be': 'bel',
	3585	'bg': 'bul',
	3586	'bh': 'bih',
	3587	'bi': 'bis',
	3588	'bm': 'bam',
	3589	'bn': 'ben',
	3590	'bo': 'bod',
	3591	'br': 'bre',
	3592	'bs': 'bos',
	3593	'ca': 'cat',
	3594	'ce': 'che',
	3595	'ch': 'cha',
	3596	'co': 'cos',
	3597	'cr': 'cre',
	3598	'cs': 'ces',
	3599	'cu': 'chu',
	3600	'cv': 'chv',
	3601	'cy': 'cym',
	3602	'da': 'dan',
	3603	'de': 'deu',
	3604	'dv': 'div',
	3605	'dz': 'dzo',
	3606	'ee': 'ewe',
	3607	'el': 'ell',
	3608	'en': 'eng',
	3609	'eo': 'epo',
	3610	'es': 'spa',
	3611	'et': 'est',
	3612	'eu': 'eus',
	3613	'fa': 'fas',
	3614	'ff': 'ful',
	3615	'fi': 'fin',
	3616	'fj': 'fij',
	3617	'fo': 'fao',
	3618	'fr': 'fra',
	3619	'fy': 'fry',
	3620	'ga': 'gle',
	3621	'gd': 'gla',
	3622	'gl': 'glg',
	3623	'gn': 'grn',
	3624	'gu': 'guj',
	3625	'gv': 'glv',
	3626	'ha': 'hau',
	3627	'he': 'heb',
	3628	'iw': 'heb', # Replaced by he in 1989 revision
	3629	'hi': 'hin',
	3630	'ho': 'hmo',
	3631	'hr': 'hrv',
	3632	'ht': 'hat',
	3633	'hu': 'hun',
	3634	'hy': 'hye',
	3635	'hz': 'her',
	3636	'ia': 'ina',
	3637	'id': 'ind',
	3638	'in': 'ind', # Replaced by id in 1989 revision
	3639	'ie': 'ile',
	3640	'ig': 'ibo',
	3641	'ii': 'iii',
	3642	'ik': 'ipk',
	3643	'io': 'ido',
	3644	'is': 'isl',
	3645	'it': 'ita',
	3646	'iu': 'iku',
	3647	'ja': 'jpn',
	3648	'jv': 'jav',
	3649	'ka': 'kat',
	3650	'kg': 'kon',
	3651	'ki': 'kik',
	3652	'kj': 'kua',
	3653	'kk': 'kaz',
	3654	'kl': 'kal',
	3655	'km': 'khm',
	3656	'kn': 'kan',
	3657	'ko': 'kor',
	3658	'kr': 'kau',
	3659	'ks': 'kas',
	3660	'ku': 'kur',
	3661	'kv': 'kom',
	3662	'kw': 'cor',
	3663	'ky': 'kir',
	3664	'la': 'lat',
	3665	'lb': 'ltz',
	3666	'lg': 'lug',
	3667	'li': 'lim',
	3668	'ln': 'lin',
	3669	'lo': 'lao',
	3670	'lt': 'lit',
	3671	'lu': 'lub',
	3672	'lv': 'lav',
	3673	'mg': 'mlg',
	3674	'mh': 'mah',
	3675	'mi': 'mri',
	3676	'mk': 'mkd',
	3677	'ml': 'mal',
	3678	'mn': 'mon',
	3679	'mr': 'mar',
	3680	'ms': 'msa',
	3681	'mt': 'mlt',
	3682	'my': 'mya',
	3683	'na': 'nau',
	3684	'nb': 'nob',
	3685	'nd': 'nde',
	3686	'ne': 'nep',
	3687	'ng': 'ndo',
	3688	'nl': 'nld',
	3689	'nn': 'nno',
	3690	'no': 'nor',
	3691	'nr': 'nbl',
	3692	'nv': 'nav',
	3693	'ny': 'nya',
	3694	'oc': 'oci',
	3695	'oj': 'oji',
	3696	'om': 'orm',
	3697	'or': 'ori',
	3698	'os': 'oss',
	3699	'pa': 'pan',
	3700	'pe': 'per',
	3701	'pi': 'pli',
	3702	'pl': 'pol',
	3703	'ps': 'pus',
	3704	'pt': 'por',
	3705	'qu': 'que',
	3706	'rm': 'roh',
	3707	'rn': 'run',
	3708	'ro': 'ron',
	3709	'ru': 'rus',
	3710	'rw': 'kin',
	3711	'sa': 'san',
	3712	'sc': 'srd',
	3713	'sd': 'snd',
	3714	'se': 'sme',
	3715	'sg': 'sag',
	3716	'si': 'sin',
	3717	'sk': 'slk',
	3718	'sl': 'slv',
	3719	'sm': 'smo',
	3720	'sn': 'sna',
	3721	'so': 'som',
	3722	'sq': 'sqi',
	3723	'sr': 'srp',
	3724	'ss': 'ssw',
	3725	'st': 'sot',
	3726	'su': 'sun',
	3727	'sv': 'swe',
	3728	'sw': 'swa',
	3729	'ta': 'tam',
	3730	'te': 'tel',
	3731	'tg': 'tgk',
	3732	'th': 'tha',
	3733	'ti': 'tir',
	3734	'tk': 'tuk',
	3735	'tl': 'tgl',
	3736	'tn': 'tsn',
	3737	'to': 'ton',
	3738	'tr': 'tur',
	3739	'ts': 'tso',
	3740	'tt': 'tat',
	3741	'tw': 'twi',
	3742	'ty': 'tah',
	3743	'ug': 'uig',
	3744	'uk': 'ukr',
	3745	'ur': 'urd',
	3746	'uz': 'uzb',
	3747	've': 'ven',
	3748	'vi': 'vie',
	3749	'vo': 'vol',
	3750	'wa': 'wln',
	3751	'wo': 'wol',
	3752	'xh': 'xho',
	3753	'yi': 'yid',
	3754	'ji': 'yid', # Replaced by yi in 1989 revision
	3755	'yo': 'yor',
	3756	'za': 'zha',
	3757	'zh': 'zho',
	3758	'zu': 'zul',
	3759	}
	3760
	3761	@classmethod
	3762	def short2long(cls, code):
	3763	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	3764	return cls._lang_map.get(code[:2])
	3765
	3766	@classmethod
	3767	def long2short(cls, code):
	3768	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	3769	for short_name, long_name in cls._lang_map.items():
	3770	if long_name == code:
	3771	return short_name
	3772
	3773
	3774	class ISO3166Utils:
	3775	# From http://data.okfn.org/data/core/country-list
	3776	_country_map = {
	3777	'AF': 'Afghanistan',
	3778	'AX': 'Åland Islands',
	3779	'AL': 'Albania',
	3780	'DZ': 'Algeria',
	3781	'AS': 'American Samoa',
	3782	'AD': 'Andorra',
	3783	'AO': 'Angola',
	3784	'AI': 'Anguilla',
	3785	'AQ': 'Antarctica',
	3786	'AG': 'Antigua and Barbuda',
	3787	'AR': 'Argentina',
	3788	'AM': 'Armenia',
	3789	'AW': 'Aruba',
	3790	'AU': 'Australia',
	3791	'AT': 'Austria',
	3792	'AZ': 'Azerbaijan',
	3793	'BS': 'Bahamas',
	3794	'BH': 'Bahrain',
	3795	'BD': 'Bangladesh',
	3796	'BB': 'Barbados',
	3797	'BY': 'Belarus',
	3798	'BE': 'Belgium',
	3799	'BZ': 'Belize',
	3800	'BJ': 'Benin',
	3801	'BM': 'Bermuda',
	3802	'BT': 'Bhutan',
	3803	'BO': 'Bolivia, Plurinational State of',
	3804	'BQ': 'Bonaire, Sint Eustatius and Saba',
	3805	'BA': 'Bosnia and Herzegovina',
	3806	'BW': 'Botswana',
	3807	'BV': 'Bouvet Island',
	3808	'BR': 'Brazil',
	3809	'IO': 'British Indian Ocean Territory',
	3810	'BN': 'Brunei Darussalam',
	3811	'BG': 'Bulgaria',
	3812	'BF': 'Burkina Faso',
	3813	'BI': 'Burundi',
	3814	'KH': 'Cambodia',
	3815	'CM': 'Cameroon',
	3816	'CA': 'Canada',
	3817	'CV': 'Cape Verde',
	3818	'KY': 'Cayman Islands',
	3819	'CF': 'Central African Republic',
	3820	'TD': 'Chad',
	3821	'CL': 'Chile',
	3822	'CN': 'China',
	3823	'CX': 'Christmas Island',
	3824	'CC': 'Cocos (Keeling) Islands',
	3825	'CO': 'Colombia',
	3826	'KM': 'Comoros',
	3827	'CG': 'Congo',
	3828	'CD': 'Congo, the Democratic Republic of the',
	3829	'CK': 'Cook Islands',
	3830	'CR': 'Costa Rica',
	3831	'CI': 'Côte d\'Ivoire',
	3832	'HR': 'Croatia',
	3833	'CU': 'Cuba',
	3834	'CW': 'Curaçao',
	3835	'CY': 'Cyprus',
	3836	'CZ': 'Czech Republic',
	3837	'DK': 'Denmark',
	3838	'DJ': 'Djibouti',
	3839	'DM': 'Dominica',
	3840	'DO': 'Dominican Republic',
	3841	'EC': 'Ecuador',
	3842	'EG': 'Egypt',
	3843	'SV': 'El Salvador',
	3844	'GQ': 'Equatorial Guinea',
	3845	'ER': 'Eritrea',
	3846	'EE': 'Estonia',
	3847	'ET': 'Ethiopia',
	3848	'FK': 'Falkland Islands (Malvinas)',
	3849	'FO': 'Faroe Islands',
	3850	'FJ': 'Fiji',
	3851	'FI': 'Finland',
	3852	'FR': 'France',
	3853	'GF': 'French Guiana',
	3854	'PF': 'French Polynesia',
	3855	'TF': 'French Southern Territories',
	3856	'GA': 'Gabon',
	3857	'GM': 'Gambia',
	3858	'GE': 'Georgia',
	3859	'DE': 'Germany',
	3860	'GH': 'Ghana',
	3861	'GI': 'Gibraltar',
	3862	'GR': 'Greece',
	3863	'GL': 'Greenland',
	3864	'GD': 'Grenada',
	3865	'GP': 'Guadeloupe',
	3866	'GU': 'Guam',
	3867	'GT': 'Guatemala',
	3868	'GG': 'Guernsey',
	3869	'GN': 'Guinea',
	3870	'GW': 'Guinea-Bissau',
	3871	'GY': 'Guyana',
	3872	'HT': 'Haiti',
	3873	'HM': 'Heard Island and McDonald Islands',
	3874	'VA': 'Holy See (Vatican City State)',
	3875	'HN': 'Honduras',
	3876	'HK': 'Hong Kong',
	3877	'HU': 'Hungary',
	3878	'IS': 'Iceland',
	3879	'IN': 'India',
	3880	'ID': 'Indonesia',
	3881	'IR': 'Iran, Islamic Republic of',
	3882	'IQ': 'Iraq',
	3883	'IE': 'Ireland',
	3884	'IM': 'Isle of Man',
	3885	'IL': 'Israel',
	3886	'IT': 'Italy',
	3887	'JM': 'Jamaica',
	3888	'JP': 'Japan',
	3889	'JE': 'Jersey',
	3890	'JO': 'Jordan',
	3891	'KZ': 'Kazakhstan',
	3892	'KE': 'Kenya',
	3893	'KI': 'Kiribati',
	3894	'KP': 'Korea, Democratic People\'s Republic of',
	3895	'KR': 'Korea, Republic of',
	3896	'KW': 'Kuwait',
	3897	'KG': 'Kyrgyzstan',
	3898	'LA': 'Lao People\'s Democratic Republic',
	3899	'LV': 'Latvia',
	3900	'LB': 'Lebanon',
	3901	'LS': 'Lesotho',
	3902	'LR': 'Liberia',
	3903	'LY': 'Libya',
	3904	'LI': 'Liechtenstein',
	3905	'LT': 'Lithuania',
	3906	'LU': 'Luxembourg',
	3907	'MO': 'Macao',
	3908	'MK': 'Macedonia, the Former Yugoslav Republic of',
	3909	'MG': 'Madagascar',
	3910	'MW': 'Malawi',
	3911	'MY': 'Malaysia',
	3912	'MV': 'Maldives',
	3913	'ML': 'Mali',
	3914	'MT': 'Malta',
	3915	'MH': 'Marshall Islands',
	3916	'MQ': 'Martinique',
	3917	'MR': 'Mauritania',
	3918	'MU': 'Mauritius',
	3919	'YT': 'Mayotte',
	3920	'MX': 'Mexico',
	3921	'FM': 'Micronesia, Federated States of',
	3922	'MD': 'Moldova, Republic of',
	3923	'MC': 'Monaco',
	3924	'MN': 'Mongolia',
	3925	'ME': 'Montenegro',
	3926	'MS': 'Montserrat',
	3927	'MA': 'Morocco',
	3928	'MZ': 'Mozambique',
	3929	'MM': 'Myanmar',
	3930	'NA': 'Namibia',
	3931	'NR': 'Nauru',
	3932	'NP': 'Nepal',
	3933	'NL': 'Netherlands',
	3934	'NC': 'New Caledonia',
	3935	'NZ': 'New Zealand',
	3936	'NI': 'Nicaragua',
	3937	'NE': 'Niger',
	3938	'NG': 'Nigeria',
	3939	'NU': 'Niue',
	3940	'NF': 'Norfolk Island',
	3941	'MP': 'Northern Mariana Islands',
	3942	'NO': 'Norway',
	3943	'OM': 'Oman',
	3944	'PK': 'Pakistan',
	3945	'PW': 'Palau',
	3946	'PS': 'Palestine, State of',
	3947	'PA': 'Panama',
	3948	'PG': 'Papua New Guinea',
	3949	'PY': 'Paraguay',
	3950	'PE': 'Peru',
	3951	'PH': 'Philippines',
	3952	'PN': 'Pitcairn',
	3953	'PL': 'Poland',
	3954	'PT': 'Portugal',
	3955	'PR': 'Puerto Rico',
	3956	'QA': 'Qatar',
	3957	'RE': 'Réunion',
	3958	'RO': 'Romania',
	3959	'RU': 'Russian Federation',
	3960	'RW': 'Rwanda',
	3961	'BL': 'Saint Barthélemy',
	3962	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
	3963	'KN': 'Saint Kitts and Nevis',
	3964	'LC': 'Saint Lucia',
	3965	'MF': 'Saint Martin (French part)',
	3966	'PM': 'Saint Pierre and Miquelon',
	3967	'VC': 'Saint Vincent and the Grenadines',
	3968	'WS': 'Samoa',
	3969	'SM': 'San Marino',
	3970	'ST': 'Sao Tome and Principe',
	3971	'SA': 'Saudi Arabia',
	3972	'SN': 'Senegal',
	3973	'RS': 'Serbia',
	3974	'SC': 'Seychelles',
	3975	'SL': 'Sierra Leone',
	3976	'SG': 'Singapore',
	3977	'SX': 'Sint Maarten (Dutch part)',
	3978	'SK': 'Slovakia',
	3979	'SI': 'Slovenia',
	3980	'SB': 'Solomon Islands',
	3981	'SO': 'Somalia',
	3982	'ZA': 'South Africa',
	3983	'GS': 'South Georgia and the South Sandwich Islands',
	3984	'SS': 'South Sudan',
	3985	'ES': 'Spain',
	3986	'LK': 'Sri Lanka',
	3987	'SD': 'Sudan',
	3988	'SR': 'Suriname',
	3989	'SJ': 'Svalbard and Jan Mayen',
	3990	'SZ': 'Swaziland',
	3991	'SE': 'Sweden',
	3992	'CH': 'Switzerland',
	3993	'SY': 'Syrian Arab Republic',
	3994	'TW': 'Taiwan, Province of China',
	3995	'TJ': 'Tajikistan',
	3996	'TZ': 'Tanzania, United Republic of',
	3997	'TH': 'Thailand',
	3998	'TL': 'Timor-Leste',
	3999	'TG': 'Togo',
	4000	'TK': 'Tokelau',
	4001	'TO': 'Tonga',
	4002	'TT': 'Trinidad and Tobago',
	4003	'TN': 'Tunisia',
	4004	'TR': 'Turkey',
	4005	'TM': 'Turkmenistan',
	4006	'TC': 'Turks and Caicos Islands',
	4007	'TV': 'Tuvalu',
	4008	'UG': 'Uganda',
	4009	'UA': 'Ukraine',
	4010	'AE': 'United Arab Emirates',
	4011	'GB': 'United Kingdom',
	4012	'US': 'United States',
	4013	'UM': 'United States Minor Outlying Islands',
	4014	'UY': 'Uruguay',
	4015	'UZ': 'Uzbekistan',
	4016	'VU': 'Vanuatu',
	4017	'VE': 'Venezuela, Bolivarian Republic of',
	4018	'VN': 'Viet Nam',
	4019	'VG': 'Virgin Islands, British',
	4020	'VI': 'Virgin Islands, U.S.',
	4021	'WF': 'Wallis and Futuna',
	4022	'EH': 'Western Sahara',
	4023	'YE': 'Yemen',
	4024	'ZM': 'Zambia',
	4025	'ZW': 'Zimbabwe',
	4026	# Not ISO 3166 codes, but used for IP blocks
	4027	'AP': 'Asia/Pacific Region',
	4028	'EU': 'Europe',
	4029	}
	4030
	4031	@classmethod
	4032	def short2full(cls, code):
	4033	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4034	return cls._country_map.get(code.upper())
	4035
	4036
	4037	class GeoUtils:
	4038	# Major IPv4 address blocks per country
	4039	_country_ip_map = {
	4040	'AD': '46.172.224.0/19',
	4041	'AE': '94.200.0.0/13',
	4042	'AF': '149.54.0.0/17',
	4043	'AG': '209.59.64.0/18',
	4044	'AI': '204.14.248.0/21',
	4045	'AL': '46.99.0.0/16',
	4046	'AM': '46.70.0.0/15',
	4047	'AO': '105.168.0.0/13',
	4048	'AP': '182.50.184.0/21',
	4049	'AQ': '23.154.160.0/24',
	4050	'AR': '181.0.0.0/12',
	4051	'AS': '202.70.112.0/20',
	4052	'AT': '77.116.0.0/14',
	4053	'AU': '1.128.0.0/11',
	4054	'AW': '181.41.0.0/18',
	4055	'AX': '185.217.4.0/22',
	4056	'AZ': '5.197.0.0/16',
	4057	'BA': '31.176.128.0/17',
	4058	'BB': '65.48.128.0/17',
	4059	'BD': '114.130.0.0/16',
	4060	'BE': '57.0.0.0/8',
	4061	'BF': '102.178.0.0/15',
	4062	'BG': '95.42.0.0/15',
	4063	'BH': '37.131.0.0/17',
	4064	'BI': '154.117.192.0/18',
	4065	'BJ': '137.255.0.0/16',
	4066	'BL': '185.212.72.0/23',
	4067	'BM': '196.12.64.0/18',
	4068	'BN': '156.31.0.0/16',
	4069	'BO': '161.56.0.0/16',
	4070	'BQ': '161.0.80.0/20',
	4071	'BR': '191.128.0.0/12',
	4072	'BS': '24.51.64.0/18',
	4073	'BT': '119.2.96.0/19',
	4074	'BW': '168.167.0.0/16',
	4075	'BY': '178.120.0.0/13',
	4076	'BZ': '179.42.192.0/18',
	4077	'CA': '99.224.0.0/11',
	4078	'CD': '41.243.0.0/16',
	4079	'CF': '197.242.176.0/21',
	4080	'CG': '160.113.0.0/16',
	4081	'CH': '85.0.0.0/13',
	4082	'CI': '102.136.0.0/14',
	4083	'CK': '202.65.32.0/19',
	4084	'CL': '152.172.0.0/14',
	4085	'CM': '102.244.0.0/14',
	4086	'CN': '36.128.0.0/10',
	4087	'CO': '181.240.0.0/12',
	4088	'CR': '201.192.0.0/12',
	4089	'CU': '152.206.0.0/15',
	4090	'CV': '165.90.96.0/19',
	4091	'CW': '190.88.128.0/17',
	4092	'CY': '31.153.0.0/16',
	4093	'CZ': '88.100.0.0/14',
	4094	'DE': '53.0.0.0/8',
	4095	'DJ': '197.241.0.0/17',
	4096	'DK': '87.48.0.0/12',
	4097	'DM': '192.243.48.0/20',
	4098	'DO': '152.166.0.0/15',
	4099	'DZ': '41.96.0.0/12',
	4100	'EC': '186.68.0.0/15',
	4101	'EE': '90.190.0.0/15',
	4102	'EG': '156.160.0.0/11',
	4103	'ER': '196.200.96.0/20',
	4104	'ES': '88.0.0.0/11',
	4105	'ET': '196.188.0.0/14',
	4106	'EU': '2.16.0.0/13',
	4107	'FI': '91.152.0.0/13',
	4108	'FJ': '144.120.0.0/16',
	4109	'FK': '80.73.208.0/21',
	4110	'FM': '119.252.112.0/20',
	4111	'FO': '88.85.32.0/19',
	4112	'FR': '90.0.0.0/9',
	4113	'GA': '41.158.0.0/15',
	4114	'GB': '25.0.0.0/8',
	4115	'GD': '74.122.88.0/21',
	4116	'GE': '31.146.0.0/16',
	4117	'GF': '161.22.64.0/18',
	4118	'GG': '62.68.160.0/19',
	4119	'GH': '154.160.0.0/12',
	4120	'GI': '95.164.0.0/16',
	4121	'GL': '88.83.0.0/19',
	4122	'GM': '160.182.0.0/15',
	4123	'GN': '197.149.192.0/18',
	4124	'GP': '104.250.0.0/19',
	4125	'GQ': '105.235.224.0/20',
	4126	'GR': '94.64.0.0/13',
	4127	'GT': '168.234.0.0/16',
	4128	'GU': '168.123.0.0/16',
	4129	'GW': '197.214.80.0/20',
	4130	'GY': '181.41.64.0/18',
	4131	'HK': '113.252.0.0/14',
	4132	'HN': '181.210.0.0/16',
	4133	'HR': '93.136.0.0/13',
	4134	'HT': '148.102.128.0/17',
	4135	'HU': '84.0.0.0/14',
	4136	'ID': '39.192.0.0/10',
	4137	'IE': '87.32.0.0/12',
	4138	'IL': '79.176.0.0/13',
	4139	'IM': '5.62.80.0/20',
	4140	'IN': '117.192.0.0/10',
	4141	'IO': '203.83.48.0/21',
	4142	'IQ': '37.236.0.0/14',
	4143	'IR': '2.176.0.0/12',
	4144	'IS': '82.221.0.0/16',
	4145	'IT': '79.0.0.0/10',
	4146	'JE': '87.244.64.0/18',
	4147	'JM': '72.27.0.0/17',
	4148	'JO': '176.29.0.0/16',
	4149	'JP': '133.0.0.0/8',
	4150	'KE': '105.48.0.0/12',
	4151	'KG': '158.181.128.0/17',
	4152	'KH': '36.37.128.0/17',
	4153	'KI': '103.25.140.0/22',
	4154	'KM': '197.255.224.0/20',
	4155	'KN': '198.167.192.0/19',
	4156	'KP': '175.45.176.0/22',
	4157	'KR': '175.192.0.0/10',
	4158	'KW': '37.36.0.0/14',
	4159	'KY': '64.96.0.0/15',
	4160	'KZ': '2.72.0.0/13',
	4161	'LA': '115.84.64.0/18',
	4162	'LB': '178.135.0.0/16',
	4163	'LC': '24.92.144.0/20',
	4164	'LI': '82.117.0.0/19',
	4165	'LK': '112.134.0.0/15',
	4166	'LR': '102.183.0.0/16',
	4167	'LS': '129.232.0.0/17',
	4168	'LT': '78.56.0.0/13',
	4169	'LU': '188.42.0.0/16',
	4170	'LV': '46.109.0.0/16',
	4171	'LY': '41.252.0.0/14',
	4172	'MA': '105.128.0.0/11',
	4173	'MC': '88.209.64.0/18',
	4174	'MD': '37.246.0.0/16',
	4175	'ME': '178.175.0.0/17',
	4176	'MF': '74.112.232.0/21',
	4177	'MG': '154.126.0.0/17',
	4178	'MH': '117.103.88.0/21',
	4179	'MK': '77.28.0.0/15',
	4180	'ML': '154.118.128.0/18',
	4181	'MM': '37.111.0.0/17',
	4182	'MN': '49.0.128.0/17',
	4183	'MO': '60.246.0.0/16',
	4184	'MP': '202.88.64.0/20',
	4185	'MQ': '109.203.224.0/19',
	4186	'MR': '41.188.64.0/18',
	4187	'MS': '208.90.112.0/22',
	4188	'MT': '46.11.0.0/16',
	4189	'MU': '105.16.0.0/12',
	4190	'MV': '27.114.128.0/18',
	4191	'MW': '102.70.0.0/15',
	4192	'MX': '187.192.0.0/11',
	4193	'MY': '175.136.0.0/13',
	4194	'MZ': '197.218.0.0/15',
	4195	'NA': '41.182.0.0/16',
	4196	'NC': '101.101.0.0/18',
	4197	'NE': '197.214.0.0/18',
	4198	'NF': '203.17.240.0/22',
	4199	'NG': '105.112.0.0/12',
	4200	'NI': '186.76.0.0/15',
	4201	'NL': '145.96.0.0/11',
	4202	'NO': '84.208.0.0/13',
	4203	'NP': '36.252.0.0/15',
	4204	'NR': '203.98.224.0/19',
	4205	'NU': '49.156.48.0/22',
	4206	'NZ': '49.224.0.0/14',
	4207	'OM': '5.36.0.0/15',
	4208	'PA': '186.72.0.0/15',
	4209	'PE': '186.160.0.0/14',
	4210	'PF': '123.50.64.0/18',
	4211	'PG': '124.240.192.0/19',
	4212	'PH': '49.144.0.0/13',
	4213	'PK': '39.32.0.0/11',
	4214	'PL': '83.0.0.0/11',
	4215	'PM': '70.36.0.0/20',
	4216	'PR': '66.50.0.0/16',
	4217	'PS': '188.161.0.0/16',
	4218	'PT': '85.240.0.0/13',
	4219	'PW': '202.124.224.0/20',
	4220	'PY': '181.120.0.0/14',
	4221	'QA': '37.210.0.0/15',
	4222	'RE': '102.35.0.0/16',
	4223	'RO': '79.112.0.0/13',
	4224	'RS': '93.86.0.0/15',
	4225	'RU': '5.136.0.0/13',
	4226	'RW': '41.186.0.0/16',
	4227	'SA': '188.48.0.0/13',
	4228	'SB': '202.1.160.0/19',
	4229	'SC': '154.192.0.0/11',
	4230	'SD': '102.120.0.0/13',
	4231	'SE': '78.64.0.0/12',
	4232	'SG': '8.128.0.0/10',
	4233	'SI': '188.196.0.0/14',
	4234	'SK': '78.98.0.0/15',
	4235	'SL': '102.143.0.0/17',
	4236	'SM': '89.186.32.0/19',
	4237	'SN': '41.82.0.0/15',
	4238	'SO': '154.115.192.0/18',
	4239	'SR': '186.179.128.0/17',
	4240	'SS': '105.235.208.0/21',
	4241	'ST': '197.159.160.0/19',
	4242	'SV': '168.243.0.0/16',
	4243	'SX': '190.102.0.0/20',
	4244	'SY': '5.0.0.0/16',
	4245	'SZ': '41.84.224.0/19',
	4246	'TC': '65.255.48.0/20',
	4247	'TD': '154.68.128.0/19',
	4248	'TG': '196.168.0.0/14',
	4249	'TH': '171.96.0.0/13',
	4250	'TJ': '85.9.128.0/18',
	4251	'TK': '27.96.24.0/21',
	4252	'TL': '180.189.160.0/20',
	4253	'TM': '95.85.96.0/19',
	4254	'TN': '197.0.0.0/11',
	4255	'TO': '175.176.144.0/21',
	4256	'TR': '78.160.0.0/11',
	4257	'TT': '186.44.0.0/15',
	4258	'TV': '202.2.96.0/19',
	4259	'TW': '120.96.0.0/11',
	4260	'TZ': '156.156.0.0/14',
	4261	'UA': '37.52.0.0/14',
	4262	'UG': '102.80.0.0/13',
	4263	'US': '6.0.0.0/8',
	4264	'UY': '167.56.0.0/13',
	4265	'UZ': '84.54.64.0/18',
	4266	'VA': '212.77.0.0/19',
	4267	'VC': '207.191.240.0/21',
	4268	'VE': '186.88.0.0/13',
	4269	'VG': '66.81.192.0/20',
	4270	'VI': '146.226.0.0/16',
	4271	'VN': '14.160.0.0/11',
	4272	'VU': '202.80.32.0/20',
	4273	'WF': '117.20.32.0/21',
	4274	'WS': '202.4.32.0/19',
	4275	'YE': '134.35.0.0/16',
	4276	'YT': '41.242.116.0/22',
	4277	'ZA': '41.0.0.0/11',
	4278	'ZM': '102.144.0.0/13',
	4279	'ZW': '102.177.192.0/18',
	4280	}
	4281
	4282	@classmethod
	4283	def random_ipv4(cls, code_or_block):
	4284	if len(code_or_block) == 2:
	4285	block = cls._country_ip_map.get(code_or_block.upper())
	4286	if not block:
	4287	return None
	4288	else:
	4289	block = code_or_block
	4290	addr, preflen = block.split('/')
	4291	addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
	4292	addr_max = addr_min \| (0xffffffff >> int(preflen))
	4293	return str(socket.inet_ntoa(
	4294	struct.pack('!L', random.randint(addr_min, addr_max))))
	4295
	4296
	4297	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4298	# released into Public Domain
	4299	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4300
	4301	def long_to_bytes(n, blocksize=0):
	4302	"""long_to_bytes(n:long, blocksize:int) : string
	4303	Convert a long integer to a byte string.
	4304
	4305	If optional blocksize is given and greater than zero, pad the front of the
	4306	byte string with binary zeros so that the length is a multiple of
	4307	blocksize.
	4308	"""
	4309	# after much testing, this algorithm was deemed to be the fastest
	4310	s = b''
	4311	n = int(n)
	4312	while n > 0:
	4313	s = struct.pack('>I', n & 0xffffffff) + s
	4314	n = n >> 32
	4315	# strip off leading zeros
	4316	for i in range(len(s)):
	4317	if s[i] != b'\000'[0]:
	4318	break
	4319	else:
	4320	# only happens when n == 0
	4321	s = b'\000'
	4322	i = 0
	4323	s = s[i:]
	4324	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4325	# de-padding being done above, but sigh...
	4326	if blocksize > 0 and len(s) % blocksize:
	4327	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4328	return s
	4329
	4330
	4331	def bytes_to_long(s):
	4332	"""bytes_to_long(string) : long
	4333	Convert a byte string to a long integer.
	4334
	4335	This is (essentially) the inverse of long_to_bytes().
	4336	"""
	4337	acc = 0
	4338	length = len(s)
	4339	if length % 4:
	4340	extra = (4 - length % 4)
	4341	s = b'\000' * extra + s
	4342	length = length + extra
	4343	for i in range(0, length, 4):
	4344	acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
	4345	return acc
	4346
	4347
	4348	def ohdave_rsa_encrypt(data, exponent, modulus):
	4349	'''
	4350	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4351
	4352	Input:
	4353	data: data to encrypt, bytes-like object
	4354	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4355	Output: hex string of encrypted data
	4356
	4357	Limitation: supports one block encryption only
	4358	'''
	4359
	4360	payload = int(binascii.hexlify(data[::-1]), 16)
	4361	encrypted = pow(payload, exponent, modulus)
	4362	return '%x' % encrypted
	4363
	4364
	4365	def pkcs1pad(data, length):
	4366	"""
	4367	Padding input data with PKCS#1 scheme
	4368
	4369	@param {int[]} data input data
	4370	@param {int} length target length
	4371	@returns {int[]} padded data
	4372	"""
	4373	if len(data) > length - 11:
	4374	raise ValueError('Input data too long for PKCS#1 padding')
	4375
	4376	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
	4377	return [0, 2] + pseudo_random + [0] + data
	4378
	4379
	4380	def _base_n_table(n, table):
	4381	if not table and not n:
	4382	raise ValueError('Either table or n must be specified')
	4383	table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
	4384
	4385	if n and n != len(table):
	4386	raise ValueError(f'base {n} exceeds table length {len(table)}')
	4387	return table
	4388
	4389
	4390	def encode_base_n(num, n=None, table=None):
	4391	"""Convert given int to a base-n string"""
	4392	table = _base_n_table(n, table)
	4393	if not num:
	4394	return table[0]
	4395
	4396	result, base = '', len(table)
	4397	while num:
	4398	result = table[num % base] + result
	4399	num = num // base
	4400	return result
	4401
	4402
	4403	def decode_base_n(string, n=None, table=None):
	4404	"""Convert given base-n string to int"""
	4405	table = {char: index for index, char in enumerate(_base_n_table(n, table))}
	4406	result, base = 0, len(table)
	4407	for char in string:
	4408	result = result * base + table[char]
	4409	return result
	4410
	4411
	4412	def decode_packed_codes(code):
	4413	mobj = re.search(PACKED_CODES_RE, code)
	4414	obfuscated_code, base, count, symbols = mobj.groups()
	4415	base = int(base)
	4416	count = int(count)
	4417	symbols = symbols.split('\|')
	4418	symbol_table = {}
	4419
	4420	while count:
	4421	count -= 1
	4422	base_n_count = encode_base_n(count, base)
	4423	symbol_table[base_n_count] = symbols[count] or base_n_count
	4424
	4425	return re.sub(
	4426	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
	4427	obfuscated_code)
	4428
	4429
	4430	def caesar(s, alphabet, shift):
	4431	if shift == 0:
	4432	return s
	4433	l = len(alphabet)
	4434	return ''.join(
	4435	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4436	for c in s)
	4437
	4438
	4439	def rot47(s):
	4440	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4441
	4442
	4443	def parse_m3u8_attributes(attrib):
	4444	info = {}
	4445	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4446	if val.startswith('"'):
	4447	val = val[1:-1]
	4448	info[key] = val
	4449	return info
	4450
	4451
	4452	def urshift(val, n):
	4453	return val >> n if val >= 0 else (val + 0x100000000) >> n
	4454
	4455
	4456	def write_xattr(path, key, value):
	4457	# Windows: Write xattrs to NTFS Alternate Data Streams:
	4458	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4459	if compat_os_name == 'nt':
	4460	assert ':' not in key
	4461	assert os.path.exists(path)
	4462
	4463	try:
	4464	with open(f'{path}:{key}', 'wb') as f:
	4465	f.write(value)
	4466	except OSError as e:
	4467	raise XAttrMetadataError(e.errno, e.strerror)
	4468	return
	4469
	4470	# UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
	4471
	4472	setxattr = None
	4473	if callable(getattr(os, 'setxattr', None)):
	4474	setxattr = os.setxattr
	4475	elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
	4476	# Unicode arguments are not supported in pyxattr until version 0.5.0
	4477	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4478	if version_tuple(xattr.__version__) >= (0, 5, 0):
	4479	setxattr = xattr.set
	4480	elif xattr:
	4481	setxattr = xattr.setxattr
	4482
	4483	if setxattr:
	4484	try:
	4485	setxattr(path, key, value)
	4486	except OSError as e:
	4487	raise XAttrMetadataError(e.errno, e.strerror)
	4488	return
	4489
	4490	# UNIX Method 2. Use setfattr/xattr executables
	4491	exe = ('setfattr' if check_executable('setfattr', ['--version'])
	4492	else 'xattr' if check_executable('xattr', ['-h']) else None)
	4493	if not exe:
	4494	raise XAttrUnavailableError(
	4495	'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
	4496	+ ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
	4497
	4498	value = value.decode()
	4499	try:
	4500	_, stderr, returncode = Popen.run(
	4501	[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
	4502	text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
	4503	except OSError as e:
	4504	raise XAttrMetadataError(e.errno, e.strerror)
	4505	if returncode:
	4506	raise XAttrMetadataError(returncode, stderr)
	4507
	4508
	4509	def random_birthday(year_field, month_field, day_field):
	4510	start_date = dt.date(1950, 1, 1)
	4511	end_date = dt.date(1995, 12, 31)
	4512	offset = random.randint(0, (end_date - start_date).days)
	4513	random_date = start_date + dt.timedelta(offset)
	4514	return {
	4515	year_field: str(random_date.year),
	4516	month_field: str(random_date.month),
	4517	day_field: str(random_date.day),
	4518	}
	4519
	4520
	4521	def find_available_port(interface=''):
	4522	try:
	4523	with socket.socket() as sock:
	4524	sock.bind((interface, 0))
	4525	return sock.getsockname()[1]
	4526	except OSError:
	4527	return None
	4528
	4529
	4530	# Templates for internet shortcut files, which are plain text files.
	4531	DOT_URL_LINK_TEMPLATE = '''\
	4532	[InternetShortcut]
	4533	URL=%(url)s
	4534	'''
	4535
	4536	DOT_WEBLOC_LINK_TEMPLATE = '''\
	4537	<?xml version="1.0" encoding="UTF-8"?>
	4538	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4539	<plist version="1.0">
	4540	<dict>
	4541	\t<key>URL</key>
	4542	\t<string>%(url)s</string>
	4543	</dict>
	4544	</plist>
	4545	'''
	4546
	4547	DOT_DESKTOP_LINK_TEMPLATE = '''\
	4548	[Desktop Entry]
	4549	Encoding=UTF-8
	4550	Name=%(filename)s
	4551	Type=Link
	4552	URL=%(url)s
	4553	Icon=text-html
	4554	'''
	4555
	4556	LINK_TEMPLATES = {
	4557	'url': DOT_URL_LINK_TEMPLATE,
	4558	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	4559	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	4560	}
	4561
	4562
	4563	def iri_to_uri(iri):
	4564	"""
	4565	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	4566
	4567	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	4568	"""
	4569
	4570	iri_parts = urllib.parse.urlparse(iri)
	4571
	4572	if '[' in iri_parts.netloc:
	4573	raise ValueError('IPv6 URIs are not, yet, supported.')
	4574	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	4575
	4576	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	4577
	4578	net_location = ''
	4579	if iri_parts.username:
	4580	net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
	4581	if iri_parts.password is not None:
	4582	net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
	4583	net_location += '@'
	4584
	4585	net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
	4586	# The 'idna' encoding produces ASCII text.
	4587	if iri_parts.port is not None and iri_parts.port != 80:
	4588	net_location += ':' + str(iri_parts.port)
	4589
	4590	return urllib.parse.urlunparse(
	4591	(iri_parts.scheme,
	4592	net_location,
	4593
	4594	urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
	4595
	4596	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
	4597	urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
	4598
	4599	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
	4600	urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
	4601
	4602	urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
	4603
	4604	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	4605
	4606
	4607	def to_high_limit_path(path):
	4608	if sys.platform in ['win32', 'cygwin']:
	4609	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
	4610	return '\\\\?\\' + os.path.abspath(path)
	4611
	4612	return path
	4613
	4614
	4615	def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
	4616	val = traversal.traverse_obj(obj, *variadic(field))
	4617	if not val if ignore is NO_DEFAULT else val in variadic(ignore):
	4618	return default
	4619	return template % func(val)
	4620
	4621
	4622	def clean_podcast_url(url):
	4623	url = re.sub(r'''(?x)
	4624	(?:
	4625	(?:
	4626	chtbl\.com/track\|
	4627	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
	4628	play\.podtrac\.com\|
	4629	chrt\.fm/track\|
	4630	mgln\.ai/e
	4631	)(?:/[^/.]+)?\|
	4632	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	4633	flex\.acast\.com\|
	4634	pd(?:
	4635	cn\.co\| # https://podcorn.com/analytics-prefix/
	4636	st\.fm # https://podsights.com/docs/
	4637	)/e\|
	4638	[0-9]\.gum\.fm\|
	4639	pscrb\.fm/rss/p
	4640	)/''', '', url)
	4641	return re.sub(r'^\w+://(\w+://)', r'\1', url)
	4642
	4643
	4644	_HEX_TABLE = '0123456789abcdef'
	4645
	4646
	4647	def random_uuidv4():
	4648	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
	4649
	4650
	4651	def make_dir(path, to_screen=None):
	4652	try:
	4653	dn = os.path.dirname(path)
	4654	if dn:
	4655	os.makedirs(dn, exist_ok=True)
	4656	return True
	4657	except OSError as err:
	4658	if callable(to_screen) is not None:
	4659	to_screen(f'unable to create directory {err}')
	4660	return False
	4661
	4662
	4663	def get_executable_path():
	4664	from ..update import _get_variant_and_executable_path
	4665
	4666	return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
	4667
	4668
	4669	def get_user_config_dirs(package_name):
	4670	# .config (e.g. ~/.config/package_name)
	4671	xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
	4672	yield os.path.join(xdg_config_home, package_name)
	4673
	4674	# appdata (%APPDATA%/package_name)
	4675	appdata_dir = os.getenv('appdata')
	4676	if appdata_dir:
	4677	yield os.path.join(appdata_dir, package_name)
	4678
	4679	# home (~/.package_name)
	4680	yield os.path.join(compat_expanduser('~'), f'.{package_name}')
	4681
	4682
	4683	def get_system_config_dirs(package_name):
	4684	# /etc/package_name
	4685	yield os.path.join('/etc', package_name)
	4686
	4687
	4688	def time_seconds(**kwargs):
	4689	"""
	4690	Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
	4691	"""
	4692	return time.time() + dt.timedelta(**kwargs).total_seconds()
	4693
	4694
	4695	# create a JSON Web Signature (jws) with HS256 algorithm
	4696	# the resulting format is in JWS Compact Serialization
	4697	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	4698	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	4699	def jwt_encode_hs256(payload_data, key, headers={}):
	4700	header_data = {
	4701	'alg': 'HS256',
	4702	'typ': 'JWT',
	4703	}
	4704	if headers:
	4705	header_data.update(headers)
	4706	header_b64 = base64.b64encode(json.dumps(header_data).encode())
	4707	payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
	4708	h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
	4709	signature_b64 = base64.b64encode(h.digest())
	4710	token = header_b64 + b'.' + payload_b64 + b'.' + signature_b64
	4711	return token
	4712
	4713
	4714	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	4715	def jwt_decode_hs256(jwt):
	4716	header_b64, payload_b64, signature_b64 = jwt.split('.')
	4717	# add trailing ='s that may have been stripped, superfluous ='s are ignored
	4718	payload_data = json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
	4719	return payload_data
	4720
	4721
	4722	WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
	4723
	4724
	4725	@functools.cache
	4726	def supports_terminal_sequences(stream):
	4727	if compat_os_name == 'nt':
	4728	if not WINDOWS_VT_MODE:
	4729	return False
	4730	elif not os.getenv('TERM'):
	4731	return False
	4732	try:
	4733	return stream.isatty()
	4734	except BaseException:
	4735	return False
	4736
	4737
	4738	def windows_enable_vt_mode():
	4739	"""Ref: https://bugs.python.org/issue30075 """
	4740	if get_windows_version() < (10, 0, 10586):
	4741	return
	4742
	4743	import ctypes
	4744	import ctypes.wintypes
	4745	import msvcrt
	4746
	4747	ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
	4748
	4749	dll = ctypes.WinDLL('kernel32', use_last_error=False)
	4750	handle = os.open('CONOUT$', os.O_RDWR)
	4751	try:
	4752	h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
	4753	dw_original_mode = ctypes.wintypes.DWORD()
	4754	success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
	4755	if not success:
	4756	raise Exception('GetConsoleMode failed')
	4757
	4758	success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
	4759	dw_original_mode.value \| ENABLE_VIRTUAL_TERMINAL_PROCESSING))
	4760	if not success:
	4761	raise Exception('SetConsoleMode failed')
	4762	finally:
	4763	os.close(handle)
	4764
	4765	global WINDOWS_VT_MODE
	4766	WINDOWS_VT_MODE = True
	4767	supports_terminal_sequences.cache_clear()
	4768
	4769
	4770	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	4771
	4772
	4773	def remove_terminal_sequences(string):
	4774	return _terminal_sequences_re.sub('', string)
	4775
	4776
	4777	def number_of_digits(number):
	4778	return len('%d' % number)
	4779
	4780
	4781	def join_nonempty(*values, delim='-', from_dict=None):
	4782	if from_dict is not None:
	4783	values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
	4784	return delim.join(map(str, filter(None, values)))
	4785
	4786
	4787	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	4788	"""
	4789	Find the largest format dimensions in terms of video width and, for each thumbnail:
	4790	* Modify the URL: Match the width with the provided regex and replace with the former width
	4791	* Update dimensions
	4792
	4793	This function is useful with video services that scale the provided thumbnails on demand
	4794	"""
	4795	_keys = ('width', 'height')
	4796	max_dimensions = max(
	4797	(tuple(format.get(k) or 0 for k in _keys) for format in formats),
	4798	default=(0, 0))
	4799	if not max_dimensions[0]:
	4800	return thumbnails
	4801	return [
	4802	merge_dicts(
	4803	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	4804	dict(zip(_keys, max_dimensions)), thumbnail)
	4805	for thumbnail in thumbnails
	4806	]
	4807
	4808
	4809	def parse_http_range(range):
	4810	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	4811	if not range:
	4812	return None, None, None
	4813	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	4814	if not crg:
	4815	return None, None, None
	4816	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	4817
	4818
	4819	def read_stdin(what):
	4820	if what:
	4821	eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
	4822	write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
	4823	return sys.stdin
	4824
	4825
	4826	def determine_file_encoding(data):
	4827	"""
	4828	Detect the text encoding used
	4829	@returns (encoding, bytes to skip)
	4830	"""
	4831
	4832	# BOM marks are given priority over declarations
	4833	for bom, enc in BOMS:
	4834	if data.startswith(bom):
	4835	return enc, len(bom)
	4836
	4837	# Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
	4838	# We ignore the endianness to get a good enough match
	4839	data = data.replace(b'\0', b'')
	4840	mobj = re.match(rb'(?m)^#\scoding\s:\s(\S+)\s$', data)
	4841	return mobj.group(1).decode() if mobj else None, 0
	4842
	4843
	4844	class Config:
	4845	own_args = None
	4846	parsed_args = None
	4847	filename = None
	4848	__initialized = False
	4849
	4850	def __init__(self, parser, label=None):
	4851	self.parser, self.label = parser, label
	4852	self._loaded_paths, self.configs = set(), []
	4853
	4854	def init(self, args=None, filename=None):
	4855	assert not self.__initialized
	4856	self.own_args, self.filename = args, filename
	4857	return self.load_configs()
	4858
	4859	def load_configs(self):
	4860	directory = ''
	4861	if self.filename:
	4862	location = os.path.realpath(self.filename)
	4863	directory = os.path.dirname(location)
	4864	if location in self._loaded_paths:
	4865	return False
	4866	self._loaded_paths.add(location)
	4867
	4868	self.__initialized = True
	4869	opts, _ = self.parser.parse_known_args(self.own_args)
	4870	self.parsed_args = self.own_args
	4871	for location in opts.config_locations or []:
	4872	if location == '-':
	4873	if location in self._loaded_paths:
	4874	continue
	4875	self._loaded_paths.add(location)
	4876	self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
	4877	continue
	4878	location = os.path.join(directory, expand_path(location))
	4879	if os.path.isdir(location):
	4880	location = os.path.join(location, 'yt-dlp.conf')
	4881	if not os.path.exists(location):
	4882	self.parser.error(f'config location {location} does not exist')
	4883	self.append_config(self.read_file(location), location)
	4884	return True
	4885
	4886	def __str__(self):
	4887	label = join_nonempty(
	4888	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	4889	delim=' ')
	4890	return join_nonempty(
	4891	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	4892	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	4893	delim='\n')
	4894
	4895	@staticmethod
	4896	def read_file(filename, default=[]):
	4897	try:
	4898	optionf = open(filename, 'rb')
	4899	except OSError:
	4900	return default # silently skip if file is not present
	4901	try:
	4902	enc, skip = determine_file_encoding(optionf.read(512))
	4903	optionf.seek(skip, io.SEEK_SET)
	4904	except OSError:
	4905	enc = None # silently skip read errors
	4906	try:
	4907	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
	4908	contents = optionf.read().decode(enc or preferredencoding())
	4909	res = shlex.split(contents, comments=True)
	4910	except Exception as err:
	4911	raise ValueError(f'Unable to parse "{filename}": {err}')
	4912	finally:
	4913	optionf.close()
	4914	return res
	4915
	4916	@staticmethod
	4917	def hide_login_info(opts):
	4918	PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
	4919	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	4920
	4921	def _scrub_eq(o):
	4922	m = eqre.match(o)
	4923	if m:
	4924	return m.group('key') + '=PRIVATE'
	4925	else:
	4926	return o
	4927
	4928	opts = list(map(_scrub_eq, opts))
	4929	for idx, opt in enumerate(opts):
	4930	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	4931	opts[idx + 1] = 'PRIVATE'
	4932	return opts
	4933
	4934	def append_config(self, *args, label=None):
	4935	config = type(self)(self.parser, label)
	4936	config._loaded_paths = self._loaded_paths
	4937	if config.init(*args):
	4938	self.configs.append(config)
	4939
	4940	@property
	4941	def all_args(self):
	4942	for config in reversed(self.configs):
	4943	yield from config.all_args
	4944	yield from self.parsed_args or []
	4945
	4946	def parse_known_args(self, **kwargs):
	4947	return self.parser.parse_known_args(self.all_args, **kwargs)
	4948
	4949	def parse_args(self):
	4950	return self.parser.parse_args(self.all_args)
	4951
	4952
	4953	def merge_headers(*dicts):
	4954	"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
	4955	return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
	4956
	4957
	4958	def cached_method(f):
	4959	"""Cache a method"""
	4960	signature = inspect.signature(f)
	4961
	4962	@functools.wraps(f)
	4963	def wrapper(self, args, *kwargs):
	4964	bound_args = signature.bind(self, args, *kwargs)
	4965	bound_args.apply_defaults()
	4966	key = tuple(bound_args.arguments.values())[1:]
	4967
	4968	cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
	4969	if key not in cache:
	4970	cache[key] = f(self, args, *kwargs)
	4971	return cache[key]
	4972	return wrapper
	4973
	4974
	4975	class classproperty:
	4976	"""property access for class methods with optional caching"""
	4977	def __new__(cls, func=None, args, *kwargs):
	4978	if not func:
	4979	return functools.partial(cls, args, *kwargs)
	4980	return super().__new__(cls)
	4981
	4982	def __init__(self, func, *, cache=False):
	4983	functools.update_wrapper(self, func)
	4984	self.func = func
	4985	self._cache = {} if cache else None
	4986
	4987	def __get__(self, _, cls):
	4988	if self._cache is None:
	4989	return self.func(cls)
	4990	elif cls not in self._cache:
	4991	self._cache[cls] = self.func(cls)
	4992	return self._cache[cls]
	4993
	4994
	4995	class function_with_repr:
	4996	def __init__(self, func, repr_=None):
	4997	functools.update_wrapper(self, func)
	4998	self.func, self.__repr = func, repr_
	4999
	5000	def __call__(self, args, *kwargs):
	5001	return self.func(args, *kwargs)
	5002
	5003	@classmethod
	5004	def set_repr(cls, repr_):
	5005	return functools.partial(cls, repr_=repr_)
	5006
	5007	def __repr__(self):
	5008	if self.__repr:
	5009	return self.__repr
	5010	return f'{self.func.__module__}.{self.func.__qualname__}'
	5011
	5012
	5013	class Namespace(types.SimpleNamespace):
	5014	"""Immutable namespace"""
	5015
	5016	def __iter__(self):
	5017	return iter(self.__dict__.values())
	5018
	5019	@property
	5020	def items_(self):
	5021	return self.__dict__.items()
	5022
	5023
	5024	MEDIA_EXTENSIONS = Namespace(
	5025	common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
	5026	video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
	5027	common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
	5028	audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
	5029	thumbnails=('jpg', 'png', 'webp'),
	5030	storyboards=('mhtml', ),
	5031	subtitles=('srt', 'vtt', 'ass', 'lrc'),
	5032	manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
	5033	)
	5034	MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
	5035	MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
	5036
	5037	KNOWN_EXTENSIONS = (MEDIA_EXTENSIONS.video, MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
	5038
	5039
	5040	class RetryManager:
	5041	"""Usage:
	5042	for retry in RetryManager(...):
	5043	try:
	5044	...
	5045	except SomeException as err:
	5046	retry.error = err
	5047	continue
	5048	"""
	5049	attempt, _error = 0, None
	5050
	5051	def __init__(self, _retries, _error_callback, **kwargs):
	5052	self.retries = _retries or 0
	5053	self.error_callback = functools.partial(_error_callback, **kwargs)
	5054
	5055	def _should_retry(self):
	5056	return self._error is not NO_DEFAULT and self.attempt <= self.retries
	5057
	5058	@property
	5059	def error(self):
	5060	if self._error is NO_DEFAULT:
	5061	return None
	5062	return self._error
	5063
	5064	@error.setter
	5065	def error(self, value):
	5066	self._error = value
	5067
	5068	def __iter__(self):
	5069	while self._should_retry():
	5070	self.error = NO_DEFAULT
	5071	self.attempt += 1
	5072	yield self
	5073	if self.error:
	5074	self.error_callback(self.error, self.attempt, self.retries)
	5075
	5076	@staticmethod
	5077	def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
	5078	"""Utility function for reporting retries"""
	5079	if count > retries:
	5080	if error:
	5081	return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
	5082	raise e
	5083
	5084	if not count:
	5085	return warn(e)
	5086	elif isinstance(e, ExtractorError):
	5087	e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
	5088	warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
	5089
	5090	delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
	5091	if delay:
	5092	info(f'Sleeping {delay:.2f} seconds ...')
	5093	time.sleep(delay)
	5094
	5095
	5096	def make_archive_id(ie, video_id):
	5097	ie_key = ie if isinstance(ie, str) else ie.ie_key()
	5098	return f'{ie_key.lower()} {video_id}'
	5099
	5100
	5101	def truncate_string(s, left, right=0):
	5102	assert left > 3 and right >= 0
	5103	if s is None or len(s) <= left + right:
	5104	return s
	5105	return f'{s[:left - 3]}...{s[-right:] if right else ""}'
	5106
	5107
	5108	def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
	5109	assert 'all' in alias_dict, '"all" alias is required'
	5110	requested = list(start or [])
	5111	for val in options:
	5112	discard = val.startswith('-')
	5113	if discard:
	5114	val = val[1:]
	5115
	5116	if val in alias_dict:
	5117	val = alias_dict[val] if not discard else [
	5118	i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
	5119	# NB: Do not allow regex in aliases for performance
	5120	requested = orderedSet_from_options(val, alias_dict, start=requested)
	5121	continue
	5122
	5123	current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
	5124	else [val] if val in alias_dict['all'] else None)
	5125	if current is None:
	5126	raise ValueError(val)
	5127
	5128	if discard:
	5129	for item in current:
	5130	while item in requested:
	5131	requested.remove(item)
	5132	else:
	5133	requested.extend(current)
	5134
	5135	return orderedSet(requested)
	5136
	5137
	5138	# TODO: Rewrite
	5139	class FormatSorter:
	5140	regex = r' ((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.?))?)? *$'
	5141
	5142	default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
	5143	'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
	5144	'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
	5145	ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
	5146	'height', 'width', 'proto', 'vext', 'abr', 'aext',
	5147	'fps', 'fs_approx', 'source', 'id')
	5148
	5149	settings = {
	5150	'vcodec': {'type': 'ordered', 'regex': True,
	5151	'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265\|he?vc?', '[hx]264\|avc', 'vp0?8', 'mp4v\|h263', 'theora', '', None, 'none']},
	5152	'acodec': {'type': 'ordered', 'regex': True,
	5153	'order': ['[af]lac', 'wav\|aiff', 'opus', 'vorbis\|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
	5154	'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
	5155	'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
	5156	'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
	5157	'order': ['(ht\|f)tps', '(ht\|f)tp$', 'm3u8.', '.dash', 'websocket_frag', 'rtmpe?', '', 'mms\|rtsp', 'ws\|websocket', 'f4']},
	5158	'vext': {'type': 'ordered', 'field': 'video_ext',
	5159	'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
	5160	'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
	5161	'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
	5162	'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
	5163	'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
	5164	'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
	5165	'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
	5166	'field': ('vcodec', 'acodec'),
	5167	'function': lambda it: int(any(v != 'none' for v in it))},
	5168	'ie_pref': {'priority': True, 'type': 'extractor'},
	5169	'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5170	'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5171	'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
	5172	'quality': {'convert': 'float', 'default': -1},
	5173	'filesize': {'convert': 'bytes'},
	5174	'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
	5175	'id': {'convert': 'string', 'field': 'format_id'},
	5176	'height': {'convert': 'float_none'},
	5177	'width': {'convert': 'float_none'},
	5178	'fps': {'convert': 'float_none'},
	5179	'channels': {'convert': 'float_none', 'field': 'audio_channels'},
	5180	'tbr': {'convert': 'float_none'},
	5181	'vbr': {'convert': 'float_none'},
	5182	'abr': {'convert': 'float_none'},
	5183	'asr': {'convert': 'float_none'},
	5184	'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
	5185
	5186	'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
	5187	'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
	5188	'function': lambda it: next(filter(None, it), None)},
	5189	'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
	5190	'function': lambda it: next(filter(None, it), None)},
	5191	'ext': {'type': 'combined', 'field': ('vext', 'aext')},
	5192	'res': {'type': 'multiple', 'field': ('height', 'width'),
	5193	'function': lambda it: (lambda l: min(l) if l else 0)(tuple(filter(None, it)))},
	5194
	5195	# Actual field names
	5196	'format_id': {'type': 'alias', 'field': 'id'},
	5197	'preference': {'type': 'alias', 'field': 'ie_pref'},
	5198	'language_preference': {'type': 'alias', 'field': 'lang'},
	5199	'source_preference': {'type': 'alias', 'field': 'source'},
	5200	'protocol': {'type': 'alias', 'field': 'proto'},
	5201	'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
	5202	'audio_channels': {'type': 'alias', 'field': 'channels'},
	5203
	5204	# Deprecated
	5205	'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5206	'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5207	'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
	5208	'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
	5209	'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
	5210	'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
	5211	'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
	5212	'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
	5213	'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
	5214	'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
	5215	'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
	5216	'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
	5217	'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
	5218	'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
	5219	'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5220	'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5221	'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5222	'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5223	'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5224	'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5225	}
	5226
	5227	def __init__(self, ydl, field_preference):
	5228	self.ydl = ydl
	5229	self._order = []
	5230	self.evaluate_params(self.ydl.params, field_preference)
	5231	if ydl.params.get('verbose'):
	5232	self.print_verbose_info(self.ydl.write_debug)
	5233
	5234	def _get_field_setting(self, field, key):
	5235	if field not in self.settings:
	5236	if key in ('forced', 'priority'):
	5237	return False
	5238	self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
	5239	'deprecated and may be removed in a future version')
	5240	self.settings[field] = {}
	5241	propObj = self.settings[field]
	5242	if key not in propObj:
	5243	type = propObj.get('type')
	5244	if key == 'field':
	5245	default = 'preference' if type == 'extractor' else (field,) if type in ('combined', 'multiple') else field
	5246	elif key == 'convert':
	5247	default = 'order' if type == 'ordered' else 'float_string' if field else 'ignore'
	5248	else:
	5249	default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key, None)
	5250	propObj[key] = default
	5251	return propObj[key]
	5252
	5253	def _resolve_field_value(self, field, value, convertNone=False):
	5254	if value is None:
	5255	if not convertNone:
	5256	return None
	5257	else:
	5258	value = value.lower()
	5259	conversion = self._get_field_setting(field, 'convert')
	5260	if conversion == 'ignore':
	5261	return None
	5262	if conversion == 'string':
	5263	return value
	5264	elif conversion == 'float_none':
	5265	return float_or_none(value)
	5266	elif conversion == 'bytes':
	5267	return parse_bytes(value)
	5268	elif conversion == 'order':
	5269	order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
	5270	use_regex = self._get_field_setting(field, 'regex')
	5271	list_length = len(order_list)
	5272	empty_pos = order_list.index('') if '' in order_list else list_length + 1
	5273	if use_regex and value is not None:
	5274	for i, regex in enumerate(order_list):
	5275	if regex and re.match(regex, value):
	5276	return list_length - i
	5277	return list_length - empty_pos # not in list
	5278	else: # not regex or value = None
	5279	return list_length - (order_list.index(value) if value in order_list else empty_pos)
	5280	else:
	5281	if value.isnumeric():
	5282	return float(value)
	5283	else:
	5284	self.settings[field]['convert'] = 'string'
	5285	return value
	5286
	5287	def evaluate_params(self, params, sort_extractor):
	5288	self._use_free_order = params.get('prefer_free_formats', False)
	5289	self._sort_user = params.get('format_sort', [])
	5290	self._sort_extractor = sort_extractor
	5291
	5292	def add_item(field, reverse, closest, limit_text):
	5293	field = field.lower()
	5294	if field in self._order:
	5295	return
	5296	self._order.append(field)
	5297	limit = self._resolve_field_value(field, limit_text)
	5298	data = {
	5299	'reverse': reverse,
	5300	'closest': False if limit is None else closest,
	5301	'limit_text': limit_text,
	5302	'limit': limit}
	5303	if field in self.settings:
	5304	self.settings[field].update(data)
	5305	else:
	5306	self.settings[field] = data
	5307
	5308	sort_list = (
	5309	tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
	5310	+ (tuple() if params.get('format_sort_force', False)
	5311	else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
	5312	+ tuple(self._sort_user) + tuple(sort_extractor) + self.default)
	5313
	5314	for item in sort_list:
	5315	match = re.match(self.regex, item)
	5316	if match is None:
	5317	raise ExtractorError('Invalid format sort string "%s" given by extractor' % item)
	5318	field = match.group('field')
	5319	if field is None:
	5320	continue
	5321	if self._get_field_setting(field, 'type') == 'alias':
	5322	alias, field = field, self._get_field_setting(field, 'field')
	5323	if self._get_field_setting(alias, 'deprecated'):
	5324	self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
	5325	f'be removed in a future version. Please use {field} instead')
	5326	reverse = match.group('reverse') is not None
	5327	closest = match.group('separator') == '~'
	5328	limit_text = match.group('limit')
	5329
	5330	has_limit = limit_text is not None
	5331	has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
	5332	has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
	5333
	5334	fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
	5335	limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
	5336	limit_count = len(limits)
	5337	for (i, f) in enumerate(fields):
	5338	add_item(f, reverse, closest,
	5339	limits[i] if i < limit_count
	5340	else limits[0] if has_limit and not has_multiple_limits
	5341	else None)
	5342
	5343	def print_verbose_info(self, write_debug):
	5344	if self._sort_user:
	5345	write_debug('Sort order given by user: %s' % ', '.join(self._sort_user))
	5346	if self._sort_extractor:
	5347	write_debug('Sort order given by extractor: %s' % ', '.join(self._sort_extractor))
	5348	write_debug('Formats sorted by: %s' % ', '.join(['%s%s%s' % (
	5349	'+' if self._get_field_setting(field, 'reverse') else '', field,
	5350	'%s%s(%s)' % ('~' if self._get_field_setting(field, 'closest') else ':',
	5351	self._get_field_setting(field, 'limit_text'),
	5352	self._get_field_setting(field, 'limit'))
	5353	if self._get_field_setting(field, 'limit_text') is not None else '')
	5354	for field in self._order if self._get_field_setting(field, 'visible')]))
	5355
	5356	def _calculate_field_preference_from_value(self, format, field, type, value):
	5357	reverse = self._get_field_setting(field, 'reverse')
	5358	closest = self._get_field_setting(field, 'closest')
	5359	limit = self._get_field_setting(field, 'limit')
	5360
	5361	if type == 'extractor':
	5362	maximum = self._get_field_setting(field, 'max')
	5363	if value is None or (maximum is not None and value >= maximum):
	5364	value = -1
	5365	elif type == 'boolean':
	5366	in_list = self._get_field_setting(field, 'in_list')
	5367	not_in_list = self._get_field_setting(field, 'not_in_list')
	5368	value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
	5369	elif type == 'ordered':
	5370	value = self._resolve_field_value(field, value, True)
	5371
	5372	# try to convert to number
	5373	val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
	5374	is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
	5375	if is_num:
	5376	value = val_num
	5377
	5378	return ((-10, 0) if value is None
	5379	else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
	5380	else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
	5381	else (0, value, 0) if not reverse and (limit is None or value <= limit)
	5382	else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
	5383	else (-1, value, 0))
	5384
	5385	def _calculate_field_preference(self, format, field):
	5386	type = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
	5387	get_value = lambda f: format.get(self._get_field_setting(f, 'field'))
	5388	if type == 'multiple':
	5389	type = 'field' # Only 'field' is allowed in multiple for now
	5390	actual_fields = self._get_field_setting(field, 'field')
	5391
	5392	value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
	5393	else:
	5394	value = get_value(field)
	5395	return self._calculate_field_preference_from_value(format, field, type, value)
	5396
	5397	def calculate_preference(self, format):
	5398	# Determine missing protocol
	5399	if not format.get('protocol'):
	5400	format['protocol'] = determine_protocol(format)
	5401
	5402	# Determine missing ext
	5403	if not format.get('ext') and 'url' in format:
	5404	format['ext'] = determine_ext(format['url'])
	5405	if format.get('vcodec') == 'none':
	5406	format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
	5407	format['video_ext'] = 'none'
	5408	else:
	5409	format['video_ext'] = format['ext']
	5410	format['audio_ext'] = 'none'
	5411	# if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
	5412	# format['preference'] = -1000
	5413
	5414	if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265\|he?vc?', format.get('vcodec') or ''):
	5415	# HEVC-over-FLV is out-of-spec by FLV's original spec
	5416	# ref. https://trac.ffmpeg.org/ticket/6389
	5417	# ref. https://github.com/yt-dlp/yt-dlp/pull/5821
	5418	format['preference'] = -100
	5419
	5420	# Determine missing bitrates
	5421	if format.get('vcodec') == 'none':
	5422	format['vbr'] = 0
	5423	if format.get('acodec') == 'none':
	5424	format['abr'] = 0
	5425	if not format.get('vbr') and format.get('vcodec') != 'none':
	5426	format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
	5427	if not format.get('abr') and format.get('acodec') != 'none':
	5428	format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
	5429	if not format.get('tbr'):
	5430	format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
	5431
	5432	return tuple(self._calculate_field_preference(format, field) for field in self._order)
	5433
	5434
	5435	def filesize_from_tbr(tbr, duration):
	5436	"""
	5437	@param tbr: Total bitrate in kbps (1000 bits/sec)
	5438	@param duration: Duration in seconds
	5439	@returns Filesize in bytes
	5440	"""
	5441	if tbr is None or duration is None:
	5442	return None
	5443	return int(duration * tbr * (1000 / 8))
	5444
	5445
	5446	# XXX: Temporary
	5447	class _YDLLogger:
	5448	def __init__(self, ydl=None):
	5449	self._ydl = ydl
	5450
	5451	def debug(self, message):
	5452	if self._ydl:
	5453	self._ydl.write_debug(message)
	5454
	5455	def info(self, message):
	5456	if self._ydl:
	5457	self._ydl.to_screen(message)
	5458
	5459	def warning(self, message, *, once=False):
	5460	if self._ydl:
	5461	self._ydl.report_warning(message, once)
	5462
	5463	def error(self, message, *, is_error=True):
	5464	if self._ydl:
	5465	self._ydl.report_error(message, is_error=is_error)
	5466
	5467	def stdout(self, message):
	5468	if self._ydl:
	5469	self._ydl.to_stdout(message)
	5470
	5471	def stderr(self, message):
	5472	if self._ydl:
	5473	self._ydl.to_stderr(message)