jfr.im git - yt-dlp.git/blame - yt_dlp/utils/

Commit	Line	Data
1e399778	1	import base64
5bc880b9	2	import binascii
912b38b4	3	import calendar
676eb3f2	4	import codecs
c380cc28	5	import collections
ab029d7e	6	import collections.abc
62e609ab	7	import contextlib
c305a25c	8	import datetime as dt
0c265486	9	import email.header
f8271158	10	import email.utils
f45c185f	11	import errno
49fa4d9a N	12	import hashlib
49fa4d9a N	13	import hmac
ac668111	14	import html.entities
ac668111	15	import html.parser
b1f94422	16	import inspect
03f9daab	17	import io
79a2e94e	18	import itertools
f4bfd65f	19	import json
d77c3dfd	20	import locale
02dbf93f	21	import math
f8271158	22	import mimetypes
db3ad8a6	23	import netrc
347de493	24	import operator
d77c3dfd	25	import os
c496ca96	26	import platform
773f291d	27	import random
d77c3dfd	28	import re
f8271158	29	import shlex
c496ca96	30	import socket
79a2e94e	31	import ssl
ac668111	32	import struct
1c088fa8	33	import subprocess
d77c3dfd	34	import sys
181c8655	35	import tempfile
c380cc28	36	import time
01951dda	37	import traceback
64fa820c	38	import types
989a01c2	39	import unicodedata
14f25df2	40	import urllib.error
f8271158	41	import urllib.parse
ac668111	42	import urllib.request
bcf89ce6	43	import xml.etree.ElementTree
d77c3dfd	44
69bec673	45	from . import traversal
	46
	47	from ..compat import functools # isort: split
	48	from ..compat import (
36e6f62c	49	compat_etree_fromstring,
51098426	50	compat_expanduser,
f8271158	51	compat_HTMLParseError,
efa97bdc	52	compat_os_name,
8c25f81b	53	)
ccfd70f4	54	from ..dependencies import xattr
51fb4995	55
add96eb9	56	__name__ = __name__.rsplit('.', 1)[0] # noqa: A001: Pretend to be the parent module
46f1370e	57
468e2e92 FV	58	# This is not clearly defined otherwise
	59	compiled_regex_type = type(re.compile(''))
	60
f7a147e3	61
4823ec9f	62	class NO_DEFAULT:
	63	pass
	64
	65
	66	def IDENTITY(x):
	67	return x
	68
bf42a990	69
7105440c YCH	70	ENGLISH_MONTH_NAMES = [
	71	'January', 'February', 'March', 'April', 'May', 'June',
	72	'July', 'August', 'September', 'October', 'November', 'December']
	73
f6717dec S	74	MONTH_NAMES = {
	75	'en': ENGLISH_MONTH_NAMES,
	76	'fr': [
3e4185c3 S	77	'janvier', 'février', 'mars', 'avril', 'mai', 'juin',
3e4185c3 S	78	'juillet', 'août', 'septembre', 'octobre', 'novembre', 'décembre'],
78545664	79	# these follow the genitive grammatical case (dopełniacz)
	80	# some websites might be using nominative, which will require another month list
	81	# https://en.wikibooks.org/wiki/Polish/Noun_cases
	82	'pl': ['stycznia', 'lutego', 'marca', 'kwietnia', 'maja', 'czerwca',
	83	'lipca', 'sierpnia', 'września', 'października', 'listopada', 'grudnia'],
f6717dec	84	}
a942d6cb	85
8f53dc44	86	# From https://github.com/python/cpython/blob/3.11/Lib/email/_parseaddr.py#L36-L42
	87	TIMEZONE_NAMES = {
	88	'UT': 0, 'UTC': 0, 'GMT': 0, 'Z': 0,
	89	'AST': -4, 'ADT': -3, # Atlantic (used in Canada)
	90	'EST': -5, 'EDT': -4, # Eastern
	91	'CST': -6, 'CDT': -5, # Central
	92	'MST': -7, 'MDT': -6, # Mountain
add96eb9	93	'PST': -8, 'PDT': -7, # Pacific
8f53dc44	94	}
8f53dc44	95
c587cbb7	96	# needed for sanitizing filenames in restricted mode
c8827027	97	ACCENT_CHARS = dict(zip('ÂÃÄÀÁÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖŐØŒÙÚÛÜŰÝÞßàáâãäåæçèéêëìíîïðñòóôõöőøœùúûüűýþÿ',
fd35d8cd JW	98	itertools.chain('AAAAAA', ['AE'], 'CEEEEIIIIDNOOOOOOO', ['OE'], 'UUUUUY', ['TH', 'ss'],
fd35d8cd JW	99	'aaaaaa', ['ae'], 'ceeeeiiiionooooooo', ['oe'], 'uuuuuy', ['th'], 'y')))
c587cbb7	100
46f59e89 S	101	DATE_FORMATS = (
	102	'%d %B %Y',
	103	'%d %b %Y',
	104	'%B %d %Y',
cb655f34 S	105	'%B %dst %Y',
cb655f34 S	106	'%B %dnd %Y',
9d30c213	107	'%B %drd %Y',
cb655f34	108	'%B %dth %Y',
46f59e89	109	'%b %d %Y',
cb655f34 S	110	'%b %dst %Y',
cb655f34 S	111	'%b %dnd %Y',
9d30c213	112	'%b %drd %Y',
cb655f34	113	'%b %dth %Y',
46f59e89 S	114	'%b %dst %Y %I:%M',
46f59e89 S	115	'%b %dnd %Y %I:%M',
9d30c213	116	'%b %drd %Y %I:%M',
46f59e89 S	117	'%b %dth %Y %I:%M',
	118	'%Y %m %d',
	119	'%Y-%m-%d',
bccdbd22	120	'%Y.%m.%d.',
46f59e89	121	'%Y/%m/%d',
81c13222	122	'%Y/%m/%d %H:%M',
46f59e89	123	'%Y/%m/%d %H:%M:%S',
1931a55e THD	124	'%Y%m%d%H%M',
1931a55e THD	125	'%Y%m%d%H%M%S',
4f3fa23e	126	'%Y%m%d',
0c1c6f4b	127	'%Y-%m-%d %H:%M',
46f59e89 S	128	'%Y-%m-%d %H:%M:%S',
46f59e89 S	129	'%Y-%m-%d %H:%M:%S.%f',
5014558a	130	'%Y-%m-%d %H:%M:%S:%f',
46f59e89 S	131	'%d.%m.%Y %H:%M',
	132	'%d.%m.%Y %H.%M',
	133	'%Y-%m-%dT%H:%M:%SZ',
	134	'%Y-%m-%dT%H:%M:%S.%fZ',
	135	'%Y-%m-%dT%H:%M:%S.%f0Z',
	136	'%Y-%m-%dT%H:%M:%S',
	137	'%Y-%m-%dT%H:%M:%S.%f',
	138	'%Y-%m-%dT%H:%M',
c6eed6b8 S	139	'%b %d %Y at %H:%M',
c6eed6b8 S	140	'%b %d %Y at %H:%M:%S',
b555ae9b S	141	'%B %d %Y at %H:%M',
b555ae9b S	142	'%B %d %Y at %H:%M:%S',
a63d9bd0	143	'%H:%M %d-%b-%Y',
46f59e89 S	144	)
	145
	146	DATE_FORMATS_DAY_FIRST = list(DATE_FORMATS)
	147	DATE_FORMATS_DAY_FIRST.extend([
	148	'%d-%m-%Y',
	149	'%d.%m.%Y',
	150	'%d.%m.%y',
	151	'%d/%m/%Y',
	152	'%d/%m/%y',
	153	'%d/%m/%Y %H:%M:%S',
47304e07	154	'%d-%m-%Y %H:%M',
4cbfa570	155	'%H:%M %d/%m/%Y',
46f59e89 S	156	])
	157
	158	DATE_FORMATS_MONTH_FIRST = list(DATE_FORMATS)
	159	DATE_FORMATS_MONTH_FIRST.extend([
	160	'%m-%d-%Y',
	161	'%m.%d.%Y',
	162	'%m/%d/%Y',
	163	'%m/%d/%y',
	164	'%m/%d/%Y %H:%M:%S',
	165	])
	166
06b3fe29	167	PACKED_CODES_RE = r"}$'(.+)',(\d+),(\d+),'([^']+)'\.split\('\\|'$"
0f60ba6e	168	JSON_LD_RE = r'(?is)<script[^>]+type=(["\']?)application/ld\+json\1[^>]>\s(?P<json_ld>{.+?}\|\[.+?\])\s*</script>'
06b3fe29	169
1d485a1a	170	NUMBER_RE = r'\d+(?:\.\d+)?'
1d485a1a	171
7105440c	172
0b9c08b4	173	@functools.cache
d77c3dfd	174	def preferredencoding():
59ae15a5	175	"""Get preferred encoding.
d77c3dfd	176
59ae15a5 PH	177	Returns the best encoding scheme for the system, based on
	178	locale.getpreferredencoding() and some further tweaks.
	179	"""
	180	try:
	181	pref = locale.getpreferredencoding()
28e614de	182	'TEST'.encode(pref)
70a1165b	183	except Exception:
59ae15a5	184	pref = 'UTF-8'
bae611f2	185
59ae15a5	186	return pref
d77c3dfd	187
f4bfd65f	188
181c8655	189	def write_json_file(obj, fn):
1394646a	190	""" Encode obj as JSON and write it to fn, atomically if possible """
181c8655	191
cfb0511d	192	tf = tempfile.NamedTemporaryFile(
	193	prefix=f'{os.path.basename(fn)}.', dir=os.path.dirname(fn),
	194	suffix='.tmp', delete=False, mode='w', encoding='utf-8')
181c8655 PH	195
	196	try:
	197	with tf:
45d86abe	198	json.dump(obj, tf, ensure_ascii=False)
1394646a IK	199	if sys.platform == 'win32':
	200	# Need to remove existing file on Windows, else os.rename raises
	201	# WindowsError or FileExistsError.
19a03940	202	with contextlib.suppress(OSError):
1394646a	203	os.unlink(fn)
19a03940	204	with contextlib.suppress(OSError):
9cd5f54e R	205	mask = os.umask(0)
	206	os.umask(mask)
	207	os.chmod(tf.name, 0o666 & ~mask)
181c8655	208	os.rename(tf.name, fn)
70a1165b	209	except Exception:
19a03940	210	with contextlib.suppress(OSError):
181c8655	211	os.remove(tf.name)
181c8655 PH	212	raise
	213
	214
cfb0511d	215	def find_xpath_attr(node, xpath, key, val=None):
	216	""" Find the xpath xpath[@key=val] """
	217	assert re.match(r'^[a-zA-Z_-]+$', key)
add96eb9	218	expr = xpath + (f'[@{key}]' if val is None else f"[@{key}='{val}']")
cfb0511d	219	return node.find(expr)
59ae56fa	220
d7e66d39 JMF	221	# On python2.6 the xml.etree.ElementTree.Element methods don't support
d7e66d39 JMF	222	# the namespace parameter
5f6a1245 JW	223
5f6a1245 JW	224
d7e66d39 JMF	225	def xpath_with_ns(path, ns_map):
	226	components = [c.split(':') for c in path.split('/')]
	227	replaced = []
	228	for c in components:
	229	if len(c) == 1:
	230	replaced.append(c[0])
	231	else:
	232	ns, tag = c
add96eb9	233	replaced.append(f'{{{ns_map[ns]}}}{tag}')
d7e66d39 JMF	234	return '/'.join(replaced)
d7e66d39 JMF	235
d77c3dfd	236
a41fb80c	237	def xpath_element(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
578c0745	238	def _find_xpath(xpath):
f9934b96	239	return node.find(xpath)
578c0745	240
14f25df2	241	if isinstance(xpath, str):
578c0745 S	242	n = _find_xpath(xpath)
	243	else:
	244	for xp in xpath:
	245	n = _find_xpath(xp)
	246	if n is not None:
	247	break
d74bebd5	248
8e636da4	249	if n is None:
bf42a990 S	250	if default is not NO_DEFAULT:
	251	return default
	252	elif fatal:
bf0ff932	253	name = xpath if name is None else name
add96eb9	254	raise ExtractorError(f'Could not find XML element {name}')
bf0ff932 PH	255	else:
bf0ff932 PH	256	return None
a41fb80c S	257	return n
	258
	259
	260	def xpath_text(node, xpath, name=None, fatal=False, default=NO_DEFAULT):
8e636da4 S	261	n = xpath_element(node, xpath, name, fatal=fatal, default=default)
	262	if n is None or n == default:
	263	return n
	264	if n.text is None:
	265	if default is not NO_DEFAULT:
	266	return default
	267	elif fatal:
	268	name = xpath if name is None else name
add96eb9	269	raise ExtractorError(f'Could not find XML element\'s text {name}')
8e636da4 S	270	else:
	271	return None
	272	return n.text
a41fb80c S	273
	274
	275	def xpath_attr(node, xpath, key, name=None, fatal=False, default=NO_DEFAULT):
	276	n = find_xpath_attr(node, xpath, key)
	277	if n is None:
	278	if default is not NO_DEFAULT:
	279	return default
	280	elif fatal:
86e5f3ed	281	name = f'{xpath}[@{key}]' if name is None else name
add96eb9	282	raise ExtractorError(f'Could not find XML attribute {name}')
a41fb80c S	283	else:
	284	return None
	285	return n.attrib[key]
bf0ff932 PH	286
bf0ff932 PH	287
c487cf00	288	def get_element_by_id(id, html, **kwargs):
43e8fafd	289	"""Return the content of the tag with the specified ID in the passed HTML document"""
c487cf00	290	return get_element_by_attribute('id', id, html, **kwargs)
43e8fafd	291
12ea2f30	292
c487cf00	293	def get_element_html_by_id(id, html, **kwargs):
6f32a0b5	294	"""Return the html of the tag with the specified ID in the passed HTML document"""
c487cf00	295	return get_element_html_by_attribute('id', id, html, **kwargs)
6f32a0b5 ZM	296
6f32a0b5 ZM	297
84c237fb	298	def get_element_by_class(class_name, html):
2af12ad9 TC	299	"""Return the content of the first tag with the specified class in the passed HTML document"""
	300	retval = get_elements_by_class(class_name, html)
	301	return retval[0] if retval else None
	302
	303
6f32a0b5 ZM	304	def get_element_html_by_class(class_name, html):
	305	"""Return the html of the first tag with the specified class in the passed HTML document"""
	306	retval = get_elements_html_by_class(class_name, html)
	307	return retval[0] if retval else None
	308
	309
c487cf00	310	def get_element_by_attribute(attribute, value, html, **kwargs):
c487cf00	311	retval = get_elements_by_attribute(attribute, value, html, **kwargs)
2af12ad9 TC	312	return retval[0] if retval else None
	313
	314
c487cf00	315	def get_element_html_by_attribute(attribute, value, html, **kargs):
c487cf00	316	retval = get_elements_html_by_attribute(attribute, value, html, **kargs)
6f32a0b5 ZM	317	return retval[0] if retval else None
	318
	319
c487cf00	320	def get_elements_by_class(class_name, html, **kargs):
2af12ad9 TC	321	"""Return the content of all tags with the specified class in the passed HTML document as a list"""
2af12ad9 TC	322	return get_elements_by_attribute(
add96eb9	323	'class', rf'[^\'"](?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]',
84c237fb YCH	324	html, escape_value=False)
	325
	326
6f32a0b5 ZM	327	def get_elements_html_by_class(class_name, html):
	328	"""Return the html of all tags with the specified class in the passed HTML document as a list"""
	329	return get_elements_html_by_attribute(
add96eb9	330	'class', rf'[^\'"](?<=[\'"\s]){re.escape(class_name)}(?=[\'"\s])[^\'"]',
6f32a0b5 ZM	331	html, escape_value=False)
	332
	333
	334	def get_elements_by_attribute(args, *kwargs):
43e8fafd	335	"""Return the content of the tag with the specified attribute in the passed HTML document"""
6f32a0b5 ZM	336	return [content for content, _ in get_elements_text_and_html_by_attribute(args, *kwargs)]
	337
	338
	339	def get_elements_html_by_attribute(args, *kwargs):
	340	"""Return the html of the tag with the specified attribute in the passed HTML document"""
	341	return [whole for _, whole in get_elements_text_and_html_by_attribute(args, *kwargs)]
	342
	343
4c9a1a3b	344	def get_elements_text_and_html_by_attribute(attribute, value, html, *, tag=r'[\w:.-]+', escape_value=True):
6f32a0b5 ZM	345	"""
	346	Return the text (content) and the html (whole) of the tag with the specified
	347	attribute in the passed HTML document
	348	"""
c61473c1 M	349	if not value:
c61473c1 M	350	return
9e6dd238	351
86e5f3ed	352	quote = '' if re.match(r'''[\s"'`=<>]''', value) else '?'
0254f162	353
84c237fb YCH	354	value = re.escape(value) if escape_value else value
84c237fb YCH	355
86e5f3ed	356	partial_element_re = rf'''(?x)
4c9a1a3b	357	<(?P<tag>{tag})
0254f162	358	(?:\s(?:[^>"']\|"[^"]"\|'[^']')*)?
86e5f3ed	359	\s{re.escape(attribute)}\s=\s(?P<_q>['"]{quote})(?-x:{value})(?P=_q)
86e5f3ed	360	'''
38285056	361
0254f162 ZM	362	for m in re.finditer(partial_element_re, html):
0254f162 ZM	363	content, whole = get_element_text_and_html_by_tag(m.group('tag'), html[m.start():])
a921f407	364
0254f162 ZM	365	yield (
0254f162 ZM	366	unescapeHTML(re.sub(r'^(?P<q>["\'])(?P<content>.*)(?P=q)$', r'\g<content>', content, flags=re.DOTALL)),
add96eb9	367	whole,
0254f162	368	)
a921f407	369
c5229f39	370
ac668111	371	class HTMLBreakOnClosingTagParser(html.parser.HTMLParser):
6f32a0b5 ZM	372	"""
	373	HTML parser which raises HTMLBreakOnClosingTagException upon reaching the
	374	closing tag for the first opening tag it has encountered, and can be used
	375	as a context manager
	376	"""
	377
	378	class HTMLBreakOnClosingTagException(Exception):
	379	pass
	380
	381	def __init__(self):
	382	self.tagstack = collections.deque()
ac668111	383	html.parser.HTMLParser.__init__(self)
6f32a0b5 ZM	384
	385	def __enter__(self):
	386	return self
	387
	388	def __exit__(self, *_):
	389	self.close()
	390
	391	def close(self):
	392	# handle_endtag does not return upon raising HTMLBreakOnClosingTagException,
	393	# so data remains buffered; we no longer have any interest in it, thus
	394	# override this method to discard it
	395	pass
	396
	397	def handle_starttag(self, tag, _):
	398	self.tagstack.append(tag)
	399
	400	def handle_endtag(self, tag):
	401	if not self.tagstack:
	402	raise compat_HTMLParseError('no tags in the stack')
	403	while self.tagstack:
	404	inner_tag = self.tagstack.pop()
	405	if inner_tag == tag:
	406	break
	407	else:
	408	raise compat_HTMLParseError(f'matching opening tag for closing {tag} tag not found')
	409	if not self.tagstack:
add96eb9	410	raise self.HTMLBreakOnClosingTagException
6f32a0b5 ZM	411
6f32a0b5 ZM	412
46d09f87	413	# XXX: This should be far less strict
6f32a0b5 ZM	414	def get_element_text_and_html_by_tag(tag, html):
	415	"""
	416	For the first element with the specified tag in the passed HTML document
	417	return its' content (text) and the whole element (html)
	418	"""
	419	def find_or_raise(haystack, needle, exc):
	420	try:
	421	return haystack.index(needle)
	422	except ValueError:
	423	raise exc
	424	closing_tag = f'</{tag}>'
	425	whole_start = find_or_raise(
	426	html, f'<{tag}', compat_HTMLParseError(f'opening {tag} tag not found'))
	427	content_start = find_or_raise(
	428	html[whole_start:], '>', compat_HTMLParseError(f'malformed opening {tag} tag'))
	429	content_start += whole_start + 1
	430	with HTMLBreakOnClosingTagParser() as parser:
	431	parser.feed(html[whole_start:content_start])
	432	if not parser.tagstack or parser.tagstack[0] != tag:
	433	raise compat_HTMLParseError(f'parser did not match opening {tag} tag')
	434	offset = content_start
	435	while offset < len(html):
	436	next_closing_tag_start = find_or_raise(
	437	html[offset:], closing_tag,
	438	compat_HTMLParseError(f'closing {tag} tag not found'))
	439	next_closing_tag_end = next_closing_tag_start + len(closing_tag)
	440	try:
	441	parser.feed(html[offset:offset + next_closing_tag_end])
	442	offset += next_closing_tag_end
	443	except HTMLBreakOnClosingTagParser.HTMLBreakOnClosingTagException:
	444	return html[content_start:offset + next_closing_tag_start], \
	445	html[whole_start:offset + next_closing_tag_end]
	446	raise compat_HTMLParseError('unexpected end of html')
	447
	448
ac668111	449	class HTMLAttributeParser(html.parser.HTMLParser):
8bb56eee	450	"""Trivial HTML parser to gather the attributes for a single element"""
b6e0c7d2	451
8bb56eee	452	def __init__(self):
c5229f39	453	self.attrs = {}
ac668111	454	html.parser.HTMLParser.__init__(self)
8bb56eee BF	455
	456	def handle_starttag(self, tag, attrs):
	457	self.attrs = dict(attrs)
7053aa3a	458	raise compat_HTMLParseError('done')
8bb56eee	459
c5229f39	460
ac668111	461	class HTMLListAttrsParser(html.parser.HTMLParser):
73673ccf FF	462	"""HTML parser to gather the attributes for the elements of a list"""
	463
	464	def __init__(self):
ac668111	465	html.parser.HTMLParser.__init__(self)
73673ccf FF	466	self.items = []
	467	self._level = 0
	468
	469	def handle_starttag(self, tag, attrs):
	470	if tag == 'li' and self._level == 0:
	471	self.items.append(dict(attrs))
	472	self._level += 1
	473
	474	def handle_endtag(self, tag):
	475	self._level -= 1
	476
	477
8bb56eee BF	478	def extract_attributes(html_element):
	479	"""Given a string for an HTML element such as
	480	<el
	481	a="foo" B="bar" c="&98;az" d=boz
	482	empty= noval entity="&"
	483	sq='"' dq="'"
	484	>
	485	Decode and return a dictionary of attributes.
	486	{
	487	'a': 'foo', 'b': 'bar', c: 'baz', d: 'boz',
	488	'empty': '', 'noval': None, 'entity': '&',
	489	'sq': '"', 'dq': '\''
	490	}.
8bb56eee BF	491	"""
8bb56eee BF	492	parser = HTMLAttributeParser()
19a03940	493	with contextlib.suppress(compat_HTMLParseError):
b4a3d461 S	494	parser.feed(html_element)
b4a3d461 S	495	parser.close()
8bb56eee	496	return parser.attrs
9e6dd238	497
c5229f39	498
73673ccf FF	499	def parse_list(webpage):
	500	"""Given a string for an series of HTML <li> elements,
	501	return a dictionary of their attributes"""
	502	parser = HTMLListAttrsParser()
	503	parser.feed(webpage)
	504	parser.close()
	505	return parser.items
	506
	507
9e6dd238	508	def clean_html(html):
59ae15a5	509	"""Clean an HTML snippet into a readable string"""
dd622d7c PH	510
	511	if html is None: # Convenience for sanitizing descriptions etc.
	512	return html
	513
49185227	514	html = re.sub(r'\s+', ' ', html)
	515	html = re.sub(r'(?u)\s?<\s?br\s?/?\s?>\s?', '\n', html)
	516	html = re.sub(r'(?u)<\s?/\s?p\s?>\s?<\s?p[^>]*>', '\n', html)
59ae15a5 PH	517	# Strip html tags
	518	html = re.sub('<.*?>', '', html)
	519	# Replace html entities
	520	html = unescapeHTML(html)
7decf895	521	return html.strip()
9e6dd238 FV	522
9e6dd238 FV	523
b7c47b74	524	class LenientJSONDecoder(json.JSONDecoder):
cc090836	525	# TODO: Write tests
cc090836	526	def __init__(self, args, transform_source=None, ignore_extra=False, close_objects=0, *kwargs):
b7c47b74	527	self.transform_source, self.ignore_extra = transform_source, ignore_extra
cc090836	528	self._close_attempts = 2 * close_objects
b7c47b74	529	super().__init__(args, *kwargs)
b7c47b74	530
cc090836	531	@staticmethod
	532	def _close_object(err):
	533	doc = err.doc[:err.pos]
	534	# We need to add comma first to get the correct error message
	535	if err.msg.startswith('Expecting \',\''):
	536	return doc + ','
	537	elif not doc.endswith(','):
	538	return
	539
	540	if err.msg.startswith('Expecting property name'):
	541	return doc[:-1] + '}'
	542	elif err.msg.startswith('Expecting value'):
	543	return doc[:-1] + ']'
	544
b7c47b74	545	def decode(self, s):
	546	if self.transform_source:
	547	s = self.transform_source(s)
cc090836	548	for attempt in range(self._close_attempts + 1):
	549	try:
	550	if self.ignore_extra:
	551	return self.raw_decode(s.lstrip())[0]
	552	return super().decode(s)
	553	except json.JSONDecodeError as e:
	554	if e.pos is None:
	555	raise
	556	elif attempt < self._close_attempts:
	557	s = self._close_object(e)
	558	if s is not None:
	559	continue
f9fb3ce8	560	raise type(e)(f'{e.msg} in {s[e.pos - 10:e.pos + 10]!r}', s, e.pos)
cc090836	561	assert False, 'Too many attempts to decode JSON'
b7c47b74	562
b7c47b74	563
d77c3dfd	564	def sanitize_open(filename, open_mode):
59ae15a5 PH	565	"""Try to open the given filename, and slightly tweak it if this fails.
	566
	567	Attempts to open the given filename. If this fails, it tries to change
	568	the filename slightly, step by step, until it's either able to open it
	569	or it fails and raises a final exception, like the standard open()
	570	function.
	571
	572	It returns the tuple (stream, definitive_file_name).
	573	"""
0edb3e33	574	if filename == '-':
	575	if sys.platform == 'win32':
	576	import msvcrt
be5c1ae8	577
62b58c09	578	# stdout may be any IO stream, e.g. when using contextlib.redirect_stdout
daef7911	579	with contextlib.suppress(io.UnsupportedOperation):
daef7911	580	msvcrt.setmode(sys.stdout.fileno(), os.O_BINARY)
0edb3e33	581	return (sys.stdout.buffer if hasattr(sys.stdout, 'buffer') else sys.stdout, filename)
59ae15a5	582
0edb3e33	583	for attempt in range(2):
	584	try:
	585	try:
89737671	586	if sys.platform == 'win32':
b506289f	587	# FIXME: An exclusive lock also locks the file from being read.
	588	# Since windows locks are mandatory, don't lock the file on windows (for now).
	589	# Ref: https://github.com/yt-dlp/yt-dlp/issues/3124
add96eb9	590	raise LockingUnsupportedError
0edb3e33	591	stream = locked_file(filename, open_mode, block=False).__enter__()
8a82af35	592	except OSError:
0edb3e33	593	stream = open(filename, open_mode)
8a82af35	594	return stream, filename
86e5f3ed	595	except OSError as err:
0edb3e33	596	if attempt or err.errno in (errno.EACCES,):
	597	raise
	598	old_filename, filename = filename, sanitize_path(filename)
	599	if old_filename == filename:
	600	raise
d77c3dfd FV	601
	602
	603	def timeconvert(timestr):
59ae15a5 PH	604	"""Convert RFC 2822 defined time string into system timestamp"""
	605	timestamp = None
	606	timetuple = email.utils.parsedate_tz(timestr)
	607	if timetuple is not None:
	608	timestamp = email.utils.mktime_tz(timetuple)
	609	return timestamp
1c469a94	610
5f6a1245	611
5c3895ff	612	def sanitize_filename(s, restricted=False, is_id=NO_DEFAULT):
59ae15a5	613	"""Sanitizes a string so it could be used as part of a filename.
5c3895ff	614	@param restricted Use a stricter subset of allowed characters
	615	@param is_id Whether this is an ID that should be kept unchanged if possible.
	616	If unset, yt-dlp's new sanitization rules are in effect
59ae15a5	617	"""
5c3895ff	618	if s == '':
	619	return ''
	620
59ae15a5	621	def replace_insane(char):
c587cbb7 AT	622	if restricted and char in ACCENT_CHARS:
c587cbb7 AT	623	return ACCENT_CHARS[char]
91dd88b9	624	elif not restricted and char == '\n':
5c3895ff	625	return '\0 '
989a01c2	626	elif is_id is NO_DEFAULT and not restricted and char in '"*:<>?\|/\\':
	627	# Replace with their full-width unicode counterparts
	628	return {'/': '\u29F8', '\\': '\u29f9'}.get(char, chr(ord(char) + 0xfee0))
91dd88b9	629	elif char == '?' or ord(char) < 32 or ord(char) == 127:
59ae15a5 PH	630	return ''
	631	elif char == '"':
	632	return '' if restricted else '\''
	633	elif char == ':':
5c3895ff	634	return '\0_\0-' if restricted else '\0 \0-'
59ae15a5	635	elif char in '\\/\|*<>':
5c3895ff	636	return '\0_'
5c3895ff	637	if restricted and (char in '!&\'()[]{}$;`^,#' or char.isspace() or ord(char) > 127):
65de7d20	638	return '' if unicodedata.category(char)[0] in 'CM' else '\0_'
59ae15a5 PH	639	return char
59ae15a5 PH	640
db4678e4	641	# Replace look-alike Unicode glyphs
db4678e4	642	if restricted and (is_id is NO_DEFAULT or not is_id):
989a01c2	643	s = unicodedata.normalize('NFKC', s)
5c3895ff	644	s = re.sub(r'[0-9]+(?::[0-9]+)+', lambda m: m.group(0).replace(':', '_'), s) # Handle timestamps
28e614de	645	result = ''.join(map(replace_insane, s))
5c3895ff	646	if is_id is NO_DEFAULT:
ae61d108	647	result = re.sub(r'(\0.)(?:(?=\1)..)+', r'\1', result) # Remove repeated substitute chars
ae61d108	648	STRIP_RE = r'(?:\0.\|[ _-])*'
5c3895ff	649	result = re.sub(f'^\0.{STRIP_RE}\|{STRIP_RE}\0.$', '', result) # Remove substitute chars from start/end
	650	result = result.replace('\0', '') or '_'
	651
796173d0 PH	652	if not is_id:
	653	while '__' in result:
	654	result = result.replace('__', '_')
	655	result = result.strip('_')
	656	# Common case of "Foreign band name - English song title"
	657	if restricted and result.startswith('-_'):
	658	result = result[2:]
5a42414b PH	659	if result.startswith('-'):
5a42414b PH	660	result = '_' + result[len('-'):]
a7440261	661	result = result.lstrip('.')
796173d0 PH	662	if not result:
796173d0 PH	663	result = '_'
59ae15a5	664	return result
d77c3dfd	665
5f6a1245	666
c2934512	667	def sanitize_path(s, force=False):
a2aaf4db	668	"""Sanitizes and normalizes path on Windows"""
836e06d2	669	# XXX: this handles drive relative paths (c:sth) incorrectly
c2934512	670	if sys.platform == 'win32':
c4218ac3	671	force = False
c2934512	672	drive_or_unc, _ = os.path.splitdrive(s)
c2934512	673	elif force:
	674	drive_or_unc = ''
	675	else:
a2aaf4db	676	return s
c2934512	677
be531ef1 S	678	norm_path = os.path.normpath(remove_start(s, drive_or_unc)).split(os.path.sep)
be531ef1 S	679	if drive_or_unc:
a2aaf4db S	680	norm_path.pop(0)
a2aaf4db S	681	sanitized_path = [
ec85ded8	682	path_part if path_part in ['.', '..'] else re.sub(r'(?:[/<>:"\\|\\?\*]\|[\s.]$)', '#', path_part)
a2aaf4db	683	for path_part in norm_path]
be531ef1 S	684	if drive_or_unc:
be531ef1 S	685	sanitized_path.insert(0, drive_or_unc + os.path.sep)
4abea8ca	686	elif force and s and s[0] == os.path.sep:
c4218ac3	687	sanitized_path.insert(0, os.path.sep)
836e06d2 SS	688	# TODO: Fix behavioral differences <3.12
	689	# The workaround using `normpath` only superficially passes tests
	690	# Ref: https://github.com/python/cpython/pull/100351
	691	return os.path.normpath(os.path.join(*sanitized_path))
a2aaf4db S	692
a2aaf4db S	693
8f97a15d	694	def sanitize_url(url, *, scheme='http'):
befa4708 S	695	# Prepend protocol-less URLs with `http:` scheme in order to mitigate
befa4708 S	696	# the number of unwanted failures due to missing protocol
21633673	697	if url is None:
	698	return
	699	elif url.startswith('//'):
8f97a15d	700	return f'{scheme}:{url}'
befa4708 S	701	# Fix some common typos seen so far
befa4708 S	702	COMMON_TYPOS = (
067aa17e	703	# https://github.com/ytdl-org/youtube-dl/issues/15649
befa4708 S	704	(r'^httpss://', r'https://'),
	705	# https://bx1.be/lives/direct-tv/
	706	(r'^rmtp([es]?)://', r'rtmp\1://'),
	707	)
	708	for mistake, fixup in COMMON_TYPOS:
	709	if re.match(mistake, url):
	710	return re.sub(mistake, fixup, url)
bc6b9bcd	711	return url
17bcc626 S	712
17bcc626 S	713
5435dcf9	714	def extract_basic_auth(url):
14f25df2	715	parts = urllib.parse.urlsplit(url)
5435dcf9 HH	716	if parts.username is None:
5435dcf9 HH	717	return url, None
14f25df2	718	url = urllib.parse.urlunsplit(parts._replace(netloc=(
5435dcf9	719	parts.hostname if parts.port is None
add96eb9	720	else f'{parts.hostname}:{parts.port}')))
5435dcf9	721	auth_payload = base64.b64encode(
add96eb9	722	('{}:{}'.format(parts.username, parts.password or '')).encode())
0f06bcd7	723	return url, f'Basic {auth_payload.decode()}'
5435dcf9 HH	724
5435dcf9 HH	725
51098426	726	def expand_path(s):
2fa669f7	727	"""Expand shell variables and ~"""
51098426 S	728	return os.path.expandvars(compat_expanduser(s))
	729
	730
7e9a6125	731	def orderedSet(iterable, *, lazy=False):
	732	"""Remove all duplicates from the input iterable"""
	733	def _iter():
	734	seen = [] # Do not use set since the items can be unhashable
	735	for x in iterable:
	736	if x not in seen:
	737	seen.append(x)
	738	yield x
	739
	740	return _iter() if lazy else list(_iter())
d77c3dfd	741
912b38b4	742
55b2f099	743	def _htmlentity_transform(entity_with_semicolon):
4e408e47	744	"""Transforms an HTML entity to a character."""
55b2f099 YCH	745	entity = entity_with_semicolon[:-1]
55b2f099 YCH	746
4e408e47	747	# Known non-numeric HTML entity
ac668111	748	if entity in html.entities.name2codepoint:
ac668111	749	return chr(html.entities.name2codepoint[entity])
4e408e47	750
62b58c09 L	751	# TODO: HTML5 allows entities without a semicolon.
62b58c09 L	752	# E.g. '&Eacuteric' should be decoded as 'Éric'.
ac668111	753	if entity_with_semicolon in html.entities.html5:
ac668111	754	return html.entities.html5[entity_with_semicolon]
55b2f099	755
91757b0f	756	mobj = re.match(r'#(x[0-9a-fA-F]+\|[0-9]+)', entity)
4e408e47 PH	757	if mobj is not None:
4e408e47 PH	758	numstr = mobj.group(1)
28e614de	759	if numstr.startswith('x'):
4e408e47	760	base = 16
add96eb9	761	numstr = f'0{numstr}'
4e408e47 PH	762	else:
4e408e47 PH	763	base = 10
067aa17e	764	# See https://github.com/ytdl-org/youtube-dl/issues/7518
19a03940	765	with contextlib.suppress(ValueError):
ac668111	766	return chr(int(numstr, base))
4e408e47 PH	767
4e408e47 PH	768	# Unknown entity in name, return its literal representation
add96eb9	769	return f'&{entity};'
4e408e47 PH	770
4e408e47 PH	771
d77c3dfd	772	def unescapeHTML(s):
912b38b4 PH	773	if s is None:
912b38b4 PH	774	return None
19a03940	775	assert isinstance(s, str)
d77c3dfd	776
4e408e47	777	return re.sub(
95f3f7c2	778	r'&([^&;]+;)', lambda m: _htmlentity_transform(m.group(1)), s)
d77c3dfd	779
8bf48f23	780
cdb19aa4	781	def escapeHTML(text):
	782	return (
	783	text
	784	.replace('&', '&')
	785	.replace('<', '<')
	786	.replace('>', '>')
	787	.replace('"', '"')
	788	.replace("'", ''')
	789	)
	790
	791
db3ad8a6 ND	792	class netrc_from_content(netrc.netrc):
	793	def __init__(self, content):
	794	self.hosts, self.macros = {}, {}
	795	with io.StringIO(content) as stream:
	796	self._parse('-', stream, False)
	797
	798
d3c93ec2	799	class Popen(subprocess.Popen):
	800	if sys.platform == 'win32':
	801	_startupinfo = subprocess.STARTUPINFO()
	802	_startupinfo.dwFlags \|= subprocess.STARTF_USESHOWWINDOW
	803	else:
	804	_startupinfo = None
	805
82ea226c L	806	@staticmethod
	807	def _fix_pyinstaller_ld_path(env):
	808	"""Restore LD_LIBRARY_PATH when using PyInstaller
	809	Ref: https://github.com/pyinstaller/pyinstaller/blob/develop/doc/runtime-information.rst#ld_library_path--libpath-considerations
	810	https://github.com/yt-dlp/yt-dlp/issues/4573
	811	"""
	812	if not hasattr(sys, '_MEIPASS'):
	813	return
	814
	815	def _fix(key):
	816	orig = env.get(f'{key}_ORIG')
	817	if orig is None:
	818	env.pop(key, None)
	819	else:
	820	env[key] = orig
	821
	822	_fix('LD_LIBRARY_PATH') # Linux
	823	_fix('DYLD_LIBRARY_PATH') # macOS
	824
de015e93	825	def __init__(self, args, remaining, env=None, text=False, shell=False, *kwargs):
82ea226c L	826	if env is None:
	827	env = os.environ.copy()
	828	self._fix_pyinstaller_ld_path(env)
	829
da8e2912	830	self.__text_mode = kwargs.get('encoding') or kwargs.get('errors') or text or kwargs.get('universal_newlines')
f0c9fb96	831	if text is True:
	832	kwargs['universal_newlines'] = True # For 3.6 compatibility
	833	kwargs.setdefault('encoding', 'utf-8')
	834	kwargs.setdefault('errors', 'replace')
de015e93 SS	835
	836	if shell and compat_os_name == 'nt' and kwargs.get('executable') is None:
	837	if not isinstance(args, str):
ff077926	838	args = shell_quote(args, shell=True)
de015e93	839	shell = False
ff077926 SS	840	# Set variable for `cmd.exe` newline escaping (see `utils.shell_quote`)
	841	env['='] = '"^\n\n"'
	842	args = f'{self.__comspec()} /Q /S /D /V:OFF /E:ON /C "{args}"'
de015e93 SS	843
	844	super().__init__(args, remaining, env=env, shell=shell, *kwargs, startupinfo=self._startupinfo)
	845
	846	def __comspec(self):
	847	comspec = os.environ.get('ComSpec') or os.path.join(
	848	os.environ.get('SystemRoot', ''), 'System32', 'cmd.exe')
	849	if os.path.isabs(comspec):
	850	return comspec
	851	raise FileNotFoundError('shell not found: neither %ComSpec% nor %SystemRoot% is set')
d3c93ec2	852
d3c93ec2	853	def communicate_or_kill(self, args, *kwargs):
8a82af35	854	try:
	855	return self.communicate(args, *kwargs)
	856	except BaseException: # Including KeyboardInterrupt
f0c9fb96	857	self.kill(timeout=None)
8a82af35	858	raise
d3c93ec2	859
f0c9fb96	860	def kill(self, *, timeout=0):
	861	super().kill()
	862	if timeout != 0:
	863	self.wait(timeout=timeout)
	864
	865	@classmethod
992dc6b4	866	def run(cls, args, timeout=None, *kwargs):
f0c9fb96	867	with cls(args, *kwargs) as proc:
da8e2912	868	default = '' if proc.__text_mode else b''
992dc6b4	869	stdout, stderr = proc.communicate_or_kill(timeout=timeout)
914491b8	870	return stdout or default, stderr or default, proc.returncode
f0c9fb96	871
d3c93ec2	872
f07b74fc	873	def encodeArgument(s):
cfb0511d	874	# Legacy code that uses byte strings
cfb0511d	875	# Uncomment the following line after fixing all post processors
14f25df2	876	# assert isinstance(s, str), 'Internal error: %r should be of type %r, is %r' % (s, str, type(s))
cfb0511d	877	return s if isinstance(s, str) else s.decode('ascii')
f07b74fc PH	878
f07b74fc PH	879
aa7785f8	880	_timetuple = collections.namedtuple('Time', ('hours', 'minutes', 'seconds', 'milliseconds'))
	881
	882
	883	def timetuple_from_msec(msec):
	884	secs, msec = divmod(msec, 1000)
	885	mins, secs = divmod(secs, 60)
	886	hrs, mins = divmod(mins, 60)
	887	return _timetuple(hrs, mins, secs, msec)
	888
	889
cdb19aa4	890	def formatSeconds(secs, delim=':', msec=False):
aa7785f8	891	time = timetuple_from_msec(secs * 1000)
	892	if time.hours:
	893	ret = '%d%s%02d%s%02d' % (time.hours, delim, time.minutes, delim, time.seconds)
	894	elif time.minutes:
	895	ret = '%d%s%02d' % (time.minutes, delim, time.seconds)
4539dd30	896	else:
aa7785f8	897	ret = '%d' % time.seconds
aa7785f8	898	return '%s.%03d' % (ret, time.milliseconds) if msec else ret
4539dd30	899
a0ddb8a2	900
5873d4cc	901	def bug_reports_message(before=';'):
69bec673	902	from ..update import REPOSITORY
57e0f077	903
	904	msg = (f'please report this issue on https://github.com/{REPOSITORY}/issues?q= , '
	905	'filling out the appropriate issue template. Confirm you are on the latest version using yt-dlp -U')
5873d4cc F	906
	907	before = before.rstrip()
	908	if not before or before.endswith(('.', '!', '?')):
	909	msg = msg[0].title() + msg[1:]
	910
	911	return (before + ' ' if before else '') + msg
08f2a92c JMF	912
08f2a92c JMF	913
bf5b9d85 PM	914	class YoutubeDLError(Exception):
bf5b9d85 PM	915	"""Base exception for YoutubeDL errors."""
aa9369a2	916	msg = None
	917
	918	def __init__(self, msg=None):
	919	if msg is not None:
	920	self.msg = msg
	921	elif self.msg is None:
	922	self.msg = type(self).__name__
	923	super().__init__(self.msg)
bf5b9d85 PM	924
	925
	926	class ExtractorError(YoutubeDLError):
1c256f70	927	"""Error during info extraction."""
5f6a1245	928
1151c407	929	def __init__(self, msg, tb=None, expected=False, cause=None, video_id=None, ie=None):
9a82b238	930	""" tb, if given, is the original traceback (so that it can be printed out).
7a5c1cfe	931	If expected is set, this is a normal error message and most likely not a bug in yt-dlp.
9a82b238	932	"""
c365dba8	933	from ..networking.exceptions import network_exceptions
3158150c	934	if sys.exc_info()[0] in network_exceptions:
9a82b238	935	expected = True
d5979c5d	936
7265a219	937	self.orig_msg = str(msg)
1c256f70	938	self.traceback = tb
1151c407	939	self.expected = expected
2eabb802	940	self.cause = cause
d11271dd	941	self.video_id = video_id
1151c407	942	self.ie = ie
1151c407	943	self.exc_info = sys.exc_info() # preserve original exception
5df14442	944	if isinstance(self.exc_info[1], ExtractorError):
5df14442	945	self.exc_info = self.exc_info[1].exc_info
9bcfe33b	946	super().__init__(self.__msg)
1151c407	947
9bcfe33b	948	@property
	949	def __msg(self):
	950	return ''.join((
	951	format_field(self.ie, None, '[%s] '),
	952	format_field(self.video_id, None, '%s: '),
	953	self.orig_msg,
	954	format_field(self.cause, None, ' (caused by %r)'),
	955	'' if self.expected else bug_reports_message()))
1c256f70	956
01951dda	957	def format_traceback(self):
497d2fab	958	return join_nonempty(
497d2fab	959	self.traceback and ''.join(traceback.format_tb(self.traceback)),
e491d06d	960	self.cause and ''.join(traceback.format_exception(None, self.cause, self.cause.__traceback__)[1:]),
497d2fab	961	delim='\n') or None
01951dda	962
9bcfe33b	963	def __setattr__(self, name, value):
	964	super().__setattr__(name, value)
	965	if getattr(self, 'msg', None) and name not in ('msg', 'args'):
	966	self.msg = self.__msg or type(self).__name__
	967	self.args = (self.msg, ) # Cannot be property
	968
1c256f70	969
416c7fcb PH	970	class UnsupportedError(ExtractorError):
416c7fcb PH	971	def __init__(self, url):
86e5f3ed	972	super().__init__(
add96eb9	973	f'Unsupported URL: {url}', expected=True)
416c7fcb PH	974	self.url = url
	975
	976
55b3e45b JMF	977	class RegexNotFoundError(ExtractorError):
	978	"""Error when a regex didn't match"""
	979	pass
	980
	981
773f291d S	982	class GeoRestrictedError(ExtractorError):
	983	"""Geographic restriction Error exception.
	984
	985	This exception may be thrown when a video is not available from your
	986	geographic location due to geographic restrictions imposed by a website.
	987	"""
b6e0c7d2	988
0db3bae8	989	def __init__(self, msg, countries=None, **kwargs):
0db3bae8	990	kwargs['expected'] = True
86e5f3ed	991	super().__init__(msg, **kwargs)
773f291d S	992	self.countries = countries
	993
	994
693f0600	995	class UserNotLive(ExtractorError):
	996	"""Error when a channel/user is not live"""
	997
	998	def __init__(self, msg=None, **kwargs):
	999	kwargs['expected'] = True
	1000	super().__init__(msg or 'The channel is not currently live', **kwargs)
	1001
	1002
bf5b9d85	1003	class DownloadError(YoutubeDLError):
59ae15a5	1004	"""Download Error exception.
d77c3dfd	1005
59ae15a5 PH	1006	This exception may be thrown by FileDownloader objects if they are not
	1007	configured to continue on errors. They will contain the appropriate
	1008	error message.
	1009	"""
5f6a1245	1010
8cc83b8d FV	1011	def __init__(self, msg, exc_info=None):
8cc83b8d FV	1012	""" exc_info, if given, is the original exception that caused the trouble (as returned by sys.exc_info()). """
86e5f3ed	1013	super().__init__(msg)
8cc83b8d	1014	self.exc_info = exc_info
d77c3dfd FV	1015
d77c3dfd FV	1016
498f5606	1017	class EntryNotInPlaylist(YoutubeDLError):
	1018	"""Entry not in playlist exception.
	1019
	1020	This exception will be thrown by YoutubeDL when a requested entry
	1021	is not found in the playlist info_dict
	1022	"""
aa9369a2	1023	msg = 'Entry not found in info'
498f5606	1024
498f5606	1025
bf5b9d85	1026	class SameFileError(YoutubeDLError):
59ae15a5	1027	"""Same File exception.
d77c3dfd	1028
59ae15a5 PH	1029	This exception will be thrown by FileDownloader objects if they detect
	1030	multiple files would have to be downloaded to the same file on disk.
	1031	"""
aa9369a2	1032	msg = 'Fixed output name but more than one file to download'
	1033
	1034	def __init__(self, filename=None):
	1035	if filename is not None:
	1036	self.msg += f': {filename}'
	1037	super().__init__(self.msg)
d77c3dfd FV	1038
d77c3dfd FV	1039
bf5b9d85	1040	class PostProcessingError(YoutubeDLError):
59ae15a5	1041	"""Post Processing exception.
d77c3dfd	1042
59ae15a5 PH	1043	This exception may be raised by PostProcessor's .run() method to
	1044	indicate an error in the postprocessing task.
	1045	"""
5f6a1245	1046
5f6a1245	1047
48f79687	1048	class DownloadCancelled(YoutubeDLError):
	1049	""" Exception raised when the download queue should be interrupted """
	1050	msg = 'The download was cancelled'
8b0d7497	1051
8b0d7497	1052
48f79687	1053	class ExistingVideoReached(DownloadCancelled):
	1054	""" --break-on-existing triggered """
	1055	msg = 'Encountered a video that is already in the archive, stopping due to --break-on-existing'
8b0d7497	1056
48f79687	1057
48f79687	1058	class RejectedVideoReached(DownloadCancelled):
fe2ce85a	1059	""" --break-match-filter triggered """
fe2ce85a	1060	msg = 'Encountered a video that did not match filter, stopping due to --break-match-filter'
51d9739f	1061
51d9739f	1062
48f79687	1063	class MaxDownloadsReached(DownloadCancelled):
59ae15a5	1064	""" --max-downloads limit has been reached. """
48f79687	1065	msg = 'Maximum number of downloads reached, stopping due to --max-downloads'
	1066
	1067
f2ebc5c7	1068	class ReExtractInfo(YoutubeDLError):
	1069	""" Video info needs to be re-extracted. """
	1070
	1071	def __init__(self, msg, expected=False):
	1072	super().__init__(msg)
	1073	self.expected = expected
	1074
	1075
	1076	class ThrottledDownload(ReExtractInfo):
48f79687	1077	""" Download speed below --throttled-rate. """
aa9369a2	1078	msg = 'The download speed is below throttle limit'
d77c3dfd	1079
43b22906	1080	def __init__(self):
43b22906	1081	super().__init__(self.msg, expected=False)
f2ebc5c7	1082
d77c3dfd	1083
bf5b9d85	1084	class UnavailableVideoError(YoutubeDLError):
59ae15a5	1085	"""Unavailable Format exception.
d77c3dfd	1086
59ae15a5 PH	1087	This exception will be thrown when a video is requested
	1088	in a format that is not available for that video.
	1089	"""
aa9369a2	1090	msg = 'Unable to download video'
	1091
	1092	def __init__(self, err=None):
	1093	if err is not None:
	1094	self.msg += f': {err}'
	1095	super().__init__(self.msg)
d77c3dfd FV	1096
d77c3dfd FV	1097
bf5b9d85	1098	class ContentTooShortError(YoutubeDLError):
59ae15a5	1099	"""Content Too Short exception.
d77c3dfd	1100
59ae15a5 PH	1101	This exception may be raised by FileDownloader objects when a file they
	1102	download is too small for what the server announced first, indicating
	1103	the connection was probably interrupted.
	1104	"""
d77c3dfd	1105
59ae15a5	1106	def __init__(self, downloaded, expected):
86e5f3ed	1107	super().__init__(f'Downloaded {downloaded} bytes, expected {expected} bytes')
2c7ed247	1108	# Both in bytes
59ae15a5 PH	1109	self.downloaded = downloaded
59ae15a5 PH	1110	self.expected = expected
d77c3dfd	1111
5f6a1245	1112
bf5b9d85	1113	class XAttrMetadataError(YoutubeDLError):
efa97bdc	1114	def __init__(self, code=None, msg='Unknown error'):
86e5f3ed	1115	super().__init__(msg)
efa97bdc	1116	self.code = code
bd264412	1117	self.msg = msg
efa97bdc YCH	1118
efa97bdc YCH	1119	# Parsing code and msg
3089bc74	1120	if (self.code in (errno.ENOSPC, errno.EDQUOT)
a0566bbf	1121	or 'No space left' in self.msg or 'Disk quota exceeded' in self.msg):
efa97bdc YCH	1122	self.reason = 'NO_SPACE'
	1123	elif self.code == errno.E2BIG or 'Argument list too long' in self.msg:
	1124	self.reason = 'VALUE_TOO_LONG'
	1125	else:
	1126	self.reason = 'NOT_SUPPORTED'
	1127
	1128
bf5b9d85	1129	class XAttrUnavailableError(YoutubeDLError):
efa97bdc YCH	1130	pass
	1131
	1132
941e881e	1133	def is_path_like(f):
	1134	return isinstance(f, (str, bytes, os.PathLike))
	1135
	1136
96a134de	1137	def extract_timezone(date_str, default=None):
46f59e89	1138	m = re.search(
f137e4c2	1139	r'''(?x)
	1140	^.{8,}? # >=8 char non-TZ prefix, if present
	1141	(?P<tz>Z\| # just the UTC Z, or
	1142	(?:(?<=.\b\d{4}\|\b\d{2}:\d\d)\| # preceded by 4 digits or hh:mm or
	1143	(?<!.\b[a-zA-Z]{3}\|[a-zA-Z]{4}\|..\b\d\d)) # not preceded by 3 alpha word or >= 4 alpha or 2 digits
	1144	[ ]? # optional space
	1145	(?P<sign>\+\|-) # +/-
	1146	(?P<hours>[0-9]{2}):?(?P<minutes>[0-9]{2}) # hh[:]mm
	1147	$)
	1148	''', date_str)
96a134de	1149	timezone = None
96a134de	1150
46f59e89	1151	if not m:
8f53dc44	1152	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1153	timezone = TIMEZONE_NAMES.get(m and m.group('tz').strip())
	1154	if timezone is not None:
	1155	date_str = date_str[:-len(m.group('tz'))]
96a134de	1156	timezone = dt.timedelta(hours=timezone)
46f59e89 S	1157	else:
46f59e89 S	1158	date_str = date_str[:-len(m.group('tz'))]
96a134de	1159	if m.group('sign'):
46f59e89	1160	sign = 1 if m.group('sign') == '+' else -1
c305a25c	1161	timezone = dt.timedelta(
46f59e89 S	1162	hours=sign * int(m.group('hours')),
46f59e89 S	1163	minutes=sign * int(m.group('minutes')))
96a134de	1164
	1165	if timezone is None and default is not NO_DEFAULT:
	1166	timezone = default or dt.timedelta()
	1167
46f59e89 S	1168	return timezone, date_str
	1169
	1170
08b38d54	1171	def parse_iso8601(date_str, delimiter='T', timezone=None):
912b38b4 PH	1172	""" Return a UNIX timestamp from the given date """
	1173
	1174	if date_str is None:
	1175	return None
	1176
52c3a6e4 S	1177	date_str = re.sub(r'\.[0-9]+', '', date_str)
52c3a6e4 S	1178
96a134de	1179	timezone, date_str = extract_timezone(date_str, timezone)
46f59e89	1180
96a134de	1181	with contextlib.suppress(ValueError, TypeError):
86e5f3ed	1182	date_format = f'%Y-%m-%d{delimiter}%H:%M:%S'
c305a25c	1183	dt_ = dt.datetime.strptime(date_str, date_format) - timezone
c305a25c	1184	return calendar.timegm(dt_.timetuple())
912b38b4 PH	1185
912b38b4 PH	1186
46f59e89 S	1187	def date_formats(day_first=True):
	1188	return DATE_FORMATS_DAY_FIRST if day_first else DATE_FORMATS_MONTH_FIRST
	1189
	1190
42bdd9d0	1191	def unified_strdate(date_str, day_first=True):
bf50b038	1192	"""Return a string with the date in the format YYYYMMDD"""
64e7ad60 PH	1193
	1194	if date_str is None:
	1195	return None
bf50b038	1196	upload_date = None
5f6a1245	1197	# Replace commas
026fcc04	1198	date_str = date_str.replace(',', ' ')
42bdd9d0	1199	# Remove AM/PM + timezone
9bb8e0a3	1200	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
46f59e89	1201	_, date_str = extract_timezone(date_str)
42bdd9d0	1202
46f59e89	1203	for expression in date_formats(day_first):
19a03940	1204	with contextlib.suppress(ValueError):
c305a25c	1205	upload_date = dt.datetime.strptime(date_str, expression).strftime('%Y%m%d')
42393ce2 PH	1206	if upload_date is None:
	1207	timetuple = email.utils.parsedate_tz(date_str)
	1208	if timetuple:
19a03940	1209	with contextlib.suppress(ValueError):
c305a25c	1210	upload_date = dt.datetime(*timetuple[:6]).strftime('%Y%m%d')
6a750402	1211	if upload_date is not None:
14f25df2	1212	return str(upload_date)
bf50b038	1213
5f6a1245	1214
46f59e89	1215	def unified_timestamp(date_str, day_first=True):
ad54c913	1216	if not isinstance(date_str, str):
46f59e89 S	1217	return None
46f59e89 S	1218
8f53dc44	1219	date_str = re.sub(r'\s+', ' ', re.sub(
8f53dc44	1220	r'(?i)[,\|]\|(mon\|tues?\|wed(nes)?\|thu(rs)?\|fri\|sat(ur)?)(day)?', '', date_str))
46f59e89	1221
7dc2a74e	1222	pm_delta = 12 if re.search(r'(?i)PM', date_str) else 0
46f59e89 S	1223	timezone, date_str = extract_timezone(date_str)
	1224
	1225	# Remove AM/PM + timezone
	1226	date_str = re.sub(r'(?i)\s*(?:AM\|PM)(?:\s+[A-Z]+)?', '', date_str)
	1227
deef3195 S	1228	# Remove unrecognized timezones from ISO 8601 alike timestamps
	1229	m = re.search(r'\d{1,2}:\d{1,2}(?:\.\d+)?(?P<tz>\s*[A-Z]+)$', date_str)
	1230	if m:
	1231	date_str = date_str[:-len(m.group('tz'))]
	1232
f226880c PH	1233	# Python only supports microseconds, so remove nanoseconds
	1234	m = re.search(r'^([0-9]{4,}-[0-9]{1,2}-[0-9]{1,2}T[0-9]{1,2}:[0-9]{1,2}:[0-9]{1,2}\.[0-9]{6})[0-9]+$', date_str)
	1235	if m:
	1236	date_str = m.group(1)
	1237
46f59e89	1238	for expression in date_formats(day_first):
19a03940	1239	with contextlib.suppress(ValueError):
c305a25c	1240	dt_ = dt.datetime.strptime(date_str, expression) - timezone + dt.timedelta(hours=pm_delta)
c305a25c	1241	return calendar.timegm(dt_.timetuple())
8f53dc44	1242
46f59e89 S	1243	timetuple = email.utils.parsedate_tz(date_str)
46f59e89 S	1244	if timetuple:
8f53dc44	1245	return calendar.timegm(timetuple) + pm_delta * 3600 - timezone.total_seconds()
46f59e89 S	1246
46f59e89 S	1247
28e614de	1248	def determine_ext(url, default_ext='unknown_video'):
85750f89	1249	if url is None or '.' not in url:
f4776371	1250	return default_ext
9cb9a5df	1251	guess = url.partition('?')[0].rpartition('.')[2]
73e79f2a PH	1252	if re.match(r'^[A-Za-z0-9]+$', guess):
73e79f2a PH	1253	return guess
a7aaa398 S	1254	# Try extract ext from URLs like http://example.com/foo/bar.mp4/?download
a7aaa398 S	1255	elif guess.rstrip('/') in KNOWN_EXTENSIONS:
9cb9a5df	1256	return guess.rstrip('/')
73e79f2a	1257	else:
cbdbb766	1258	return default_ext
73e79f2a	1259
5f6a1245	1260
824fa511 S	1261	def subtitles_filename(filename, sub_lang, sub_format, expected_real_ext=None):
824fa511 S	1262	return replace_extension(filename, sub_lang + '.' + sub_format, expected_real_ext)
d4051a8e	1263
5f6a1245	1264
9e62f283	1265	def datetime_from_str(date_str, precision='auto', format='%Y%m%d'):
3d38b2d6	1266	R"""
	1267	Return a datetime object from a string.
	1268	Supported format:
	1269	(now\|today\|yesterday\|DATE)([+-]\d+(microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?)?
	1270
	1271	@param format strftime format of DATE
	1272	@param precision Round the datetime object: auto\|microsecond\|second\|minute\|hour\|day
	1273	auto: round to the unit provided in date_str (if applicable).
9e62f283	1274	"""
	1275	auto_precision = False
	1276	if precision == 'auto':
	1277	auto_precision = True
	1278	precision = 'microsecond'
c305a25c	1279	today = datetime_round(dt.datetime.now(dt.timezone.utc), precision)
f8795e10	1280	if date_str in ('now', 'today'):
37254abc	1281	return today
f8795e10	1282	if date_str == 'yesterday':
c305a25c	1283	return today - dt.timedelta(days=1)
9e62f283	1284	match = re.match(
3d38b2d6	1285	r'(?P<start>.+)(?P<sign>[+-])(?P<time>\d+)(?P<unit>microsecond\|second\|minute\|hour\|day\|week\|month\|year)s?',
9e62f283	1286	date_str)
37254abc	1287	if match is not None:
9e62f283	1288	start_time = datetime_from_str(match.group('start'), precision, format)
9e62f283	1289	time = int(match.group('time')) * (-1 if match.group('sign') == '-' else 1)
37254abc	1290	unit = match.group('unit')
9e62f283	1291	if unit == 'month' or unit == 'year':
9e62f283	1292	new_date = datetime_add_months(start_time, time * 12 if unit == 'year' else time)
37254abc	1293	unit = 'day'
9e62f283	1294	else:
	1295	if unit == 'week':
	1296	unit = 'day'
	1297	time *= 7
c305a25c	1298	delta = dt.timedelta(**{unit + 's': time})
9e62f283	1299	new_date = start_time + delta
	1300	if auto_precision:
	1301	return datetime_round(new_date, unit)
	1302	return new_date
	1303
c305a25c	1304	return datetime_round(dt.datetime.strptime(date_str, format), precision)
9e62f283	1305
9e62f283	1306
d49f8db3	1307	def date_from_str(date_str, format='%Y%m%d', strict=False):
3d38b2d6	1308	R"""
3d38b2d6	1309	Return a date object from a string using datetime_from_str
9e62f283	1310
3d38b2d6	1311	@param strict Restrict allowed patterns to "YYYYMMDD" and
3d38b2d6	1312	(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?
9e62f283	1313	"""
3d38b2d6	1314	if strict and not re.fullmatch(r'\d{8}\|(now\|today\|yesterday)(-\d+(day\|week\|month\|year)s?)?', date_str):
3d38b2d6	1315	raise ValueError(f'Invalid date format "{date_str}"')
9e62f283	1316	return datetime_from_str(date_str, precision='microsecond', format=format).date()
	1317
	1318
c305a25c	1319	def datetime_add_months(dt_, months):
9e62f283	1320	"""Increment/Decrement a datetime object by months."""
c305a25c	1321	month = dt_.month + months - 1
c305a25c	1322	year = dt_.year + month // 12
9e62f283	1323	month = month % 12 + 1
c305a25c	1324	day = min(dt_.day, calendar.monthrange(year, month)[1])
c305a25c	1325	return dt_.replace(year, month, day)
9e62f283	1326
9e62f283	1327
c305a25c	1328	def datetime_round(dt_, precision='day'):
9e62f283	1329	"""
	1330	Round a datetime object's time to a specific precision
	1331	"""
	1332	if precision == 'microsecond':
c305a25c	1333	return dt_
9e62f283	1334
	1335	unit_seconds = {
	1336	'day': 86400,
	1337	'hour': 3600,
	1338	'minute': 60,
	1339	'second': 1,
	1340	}
	1341	roundto = lambda x, n: ((x + n / 2) // n) * n
c305a25c	1342	timestamp = roundto(calendar.timegm(dt_.timetuple()), unit_seconds[precision])
c305a25c	1343	return dt.datetime.fromtimestamp(timestamp, dt.timezone.utc)
5f6a1245 JW	1344
5f6a1245 JW	1345
e63fc1be	1346	def hyphenate_date(date_str):
	1347	"""
	1348	Convert a date in 'YYYYMMDD' format to 'YYYY-MM-DD' format"""
	1349	match = re.match(r'^(\d\d\d\d)(\d\d)(\d\d)$', date_str)
	1350	if match is not None:
	1351	return '-'.join(match.groups())
	1352	else:
	1353	return date_str
	1354
5f6a1245	1355
86e5f3ed	1356	class DateRange:
bd558525	1357	"""Represents a time interval between two dates"""
5f6a1245	1358
bd558525 JMF	1359	def __init__(self, start=None, end=None):
	1360	"""start and end must be strings in the format accepted by date"""
	1361	if start is not None:
d49f8db3	1362	self.start = date_from_str(start, strict=True)
bd558525	1363	else:
c305a25c	1364	self.start = dt.datetime.min.date()
bd558525	1365	if end is not None:
d49f8db3	1366	self.end = date_from_str(end, strict=True)
bd558525	1367	else:
c305a25c	1368	self.end = dt.datetime.max.date()
37254abc	1369	if self.start > self.end:
add96eb9	1370	raise ValueError(f'Date range: "{self}" , the start date must be before the end date')
5f6a1245	1371
bd558525 JMF	1372	@classmethod
	1373	def day(cls, day):
	1374	"""Returns a range that only contains the given day"""
5f6a1245 JW	1375	return cls(day, day)
5f6a1245 JW	1376
bd558525 JMF	1377	def __contains__(self, date):
bd558525 JMF	1378	"""Check if the date is in the range"""
c305a25c	1379	if not isinstance(date, dt.date):
37254abc JMF	1380	date = date_from_str(date)
37254abc JMF	1381	return self.start <= date <= self.end
5f6a1245	1382
46f1370e	1383	def __repr__(self):
46f1370e	1384	return f'{__name__}.{type(self).__name__}({self.start.isoformat()!r}, {self.end.isoformat()!r})'
c496ca96	1385
45491a2a	1386	def __str__(self):
	1387	return f'{self.start} to {self.end}'
	1388
f2df4071	1389	def __eq__(self, other):
	1390	return (isinstance(other, DateRange)
	1391	and self.start == other.start and self.end == other.end)
	1392
c496ca96	1393
b1f94422	1394	@functools.cache
	1395	def system_identifier():
	1396	python_implementation = platform.python_implementation()
	1397	if python_implementation == 'PyPy' and hasattr(sys, 'pypy_version_info'):
	1398	python_implementation += ' version %d.%d.%d' % sys.pypy_version_info[:3]
dab284f8	1399	libc_ver = []
	1400	with contextlib.suppress(OSError): # We may not have access to the executable
	1401	libc_ver = platform.libc_ver()
b1f94422	1402
add96eb9	1403	return 'Python {} ({} {} {}) - {} ({}{})'.format(
b1f94422	1404	platform.python_version(),
b1f94422	1405	python_implementation,
17fc3dc4	1406	platform.machine(),
b1f94422	1407	platform.architecture()[0],
b1f94422	1408	platform.platform(),
5b9f253f M	1409	ssl.OPENSSL_VERSION,
5b9f253f M	1410	format_field(join_nonempty(*libc_ver, delim=' '), None, ', %s'),
b1f94422	1411	)
c257baff PH	1412
c257baff PH	1413
0b9c08b4	1414	@functools.cache
49fa4d9a	1415	def get_windows_version():
add96eb9	1416	""" Get Windows version. returns () if it's not running on Windows """
49fa4d9a N	1417	if compat_os_name == 'nt':
	1418	return version_tuple(platform.win32_ver()[1])
	1419	else:
8a82af35	1420	return ()
49fa4d9a N	1421
49fa4d9a N	1422
734f90bb	1423	def write_string(s, out=None, encoding=None):
19a03940	1424	assert isinstance(s, str)
19a03940	1425	out = out or sys.stderr
3b479100 SS	1426	# `sys.stderr` might be `None` (Ref: https://github.com/pyinstaller/pyinstaller/pull/7217)
	1427	if not out:
	1428	return
7459e3a2	1429
fe1daad3	1430	if compat_os_name == 'nt' and supports_terminal_sequences(out):
3fe75fdc	1431	s = re.sub(r'([\r\n]+)', r' \1', s)
59f943cd	1432
8a82af35	1433	enc, buffer = None, out
93240fc1	1434	# `mode` might be `None` (Ref: https://github.com/yt-dlp/yt-dlp/issues/8816)
93240fc1	1435	if 'b' in (getattr(out, 'mode', None) or ''):
c487cf00	1436	enc = encoding or preferredencoding()
104aa738	1437	elif hasattr(out, 'buffer'):
8a82af35	1438	buffer = out.buffer
104aa738	1439	enc = encoding or getattr(out, 'encoding', None) or preferredencoding()
c487cf00	1440
8a82af35	1441	buffer.write(s.encode(enc, 'ignore') if enc else s)
7459e3a2 PH	1442	out.flush()
	1443
	1444
3d2623a8	1445	# TODO: Use global logger
da4db748	1446	def deprecation_warning(msg, , printer=None, stacklevel=0, *kwargs):
69bec673	1447	from .. import _IN_CLI
da4db748	1448	if _IN_CLI:
	1449	if msg in deprecation_warning._cache:
	1450	return
	1451	deprecation_warning._cache.add(msg)
	1452	if printer:
	1453	return printer(f'{msg}{bug_reports_message()}', **kwargs)
	1454	return write_string(f'ERROR: {msg}{bug_reports_message()}\n', **kwargs)
	1455	else:
	1456	import warnings
	1457	warnings.warn(DeprecationWarning(msg), stacklevel=stacklevel + 3)
	1458
	1459
	1460	deprecation_warning._cache = set()
	1461
	1462
48ea9cea PH	1463	def bytes_to_intlist(bs):
	1464	if not bs:
	1465	return []
	1466	if isinstance(bs[0], int): # Python 3
	1467	return list(bs)
	1468	else:
	1469	return [ord(c) for c in bs]
	1470
c257baff	1471
cba892fa	1472	def intlist_to_bytes(xs):
	1473	if not xs:
	1474	return b''
ac668111	1475	return struct.pack('%dB' % len(xs), *xs)
c38b1e77 PH	1476
c38b1e77 PH	1477
8a82af35	1478	class LockingUnsupportedError(OSError):
1890fc63	1479	msg = 'File locking is not supported'
0edb3e33	1480
	1481	def __init__(self):
	1482	super().__init__(self.msg)
	1483
	1484
c1c9a79c PH	1485	# Cross-platform file locking
c1c9a79c PH	1486	if sys.platform == 'win32':
fe0918bb	1487	import ctypes
c1c9a79c PH	1488	import ctypes.wintypes
	1489	import msvcrt
	1490
	1491	class OVERLAPPED(ctypes.Structure):
	1492	_fields_ = [
	1493	('Internal', ctypes.wintypes.LPVOID),
	1494	('InternalHigh', ctypes.wintypes.LPVOID),
	1495	('Offset', ctypes.wintypes.DWORD),
	1496	('OffsetHigh', ctypes.wintypes.DWORD),
	1497	('hEvent', ctypes.wintypes.HANDLE),
	1498	]
	1499
37e325b9	1500	kernel32 = ctypes.WinDLL('kernel32')
c1c9a79c PH	1501	LockFileEx = kernel32.LockFileEx
	1502	LockFileEx.argtypes = [
	1503	ctypes.wintypes.HANDLE, # hFile
	1504	ctypes.wintypes.DWORD, # dwFlags
	1505	ctypes.wintypes.DWORD, # dwReserved
	1506	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1507	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
add96eb9	1508	ctypes.POINTER(OVERLAPPED), # Overlapped
c1c9a79c PH	1509	]
	1510	LockFileEx.restype = ctypes.wintypes.BOOL
	1511	UnlockFileEx = kernel32.UnlockFileEx
	1512	UnlockFileEx.argtypes = [
	1513	ctypes.wintypes.HANDLE, # hFile
	1514	ctypes.wintypes.DWORD, # dwReserved
	1515	ctypes.wintypes.DWORD, # nNumberOfBytesToLockLow
	1516	ctypes.wintypes.DWORD, # nNumberOfBytesToLockHigh
add96eb9	1517	ctypes.POINTER(OVERLAPPED), # Overlapped
c1c9a79c PH	1518	]
	1519	UnlockFileEx.restype = ctypes.wintypes.BOOL
	1520	whole_low = 0xffffffff
	1521	whole_high = 0x7fffffff
	1522
747c0bd1	1523	def _lock_file(f, exclusive, block):
c1c9a79c PH	1524	overlapped = OVERLAPPED()
	1525	overlapped.Offset = 0
	1526	overlapped.OffsetHigh = 0
	1527	overlapped.hEvent = 0
	1528	f._lock_file_overlapped_p = ctypes.pointer(overlapped)
747c0bd1	1529
	1530	if not LockFileEx(msvcrt.get_osfhandle(f.fileno()),
	1531	(0x2 if exclusive else 0x0) \| (0x0 if block else 0x1),
	1532	0, whole_low, whole_high, f._lock_file_overlapped_p):
2cb19820	1533	# NB: No argument form of "ctypes.FormatError" does not work on PyPy
2cb19820	1534	raise BlockingIOError(f'Locking file failed: {ctypes.FormatError(ctypes.GetLastError())!r}')
c1c9a79c PH	1535
	1536	def _unlock_file(f):
	1537	assert f._lock_file_overlapped_p
	1538	handle = msvcrt.get_osfhandle(f.fileno())
747c0bd1	1539	if not UnlockFileEx(handle, 0, whole_low, whole_high, f._lock_file_overlapped_p):
add96eb9	1540	raise OSError(f'Unlocking file failed: {ctypes.FormatError()!r}')
c1c9a79c PH	1541
c1c9a79c PH	1542	else:
399a76e6 YCH	1543	try:
399a76e6 YCH	1544	import fcntl
c1c9a79c	1545
a3125791	1546	def _lock_file(f, exclusive, block):
b63837bc	1547	flags = fcntl.LOCK_EX if exclusive else fcntl.LOCK_SH
	1548	if not block:
	1549	flags \|= fcntl.LOCK_NB
acea8d7c	1550	try:
b63837bc	1551	fcntl.flock(f, flags)
acea8d7c JK	1552	except BlockingIOError:
	1553	raise
	1554	except OSError: # AOSP does not have flock()
b63837bc	1555	fcntl.lockf(f, flags)
c1c9a79c	1556
399a76e6	1557	def _unlock_file(f):
45998b3e E	1558	with contextlib.suppress(OSError):
	1559	return fcntl.flock(f, fcntl.LOCK_UN)
	1560	with contextlib.suppress(OSError):
	1561	return fcntl.lockf(f, fcntl.LOCK_UN) # AOSP does not have flock()
	1562	return fcntl.flock(f, fcntl.LOCK_UN \| fcntl.LOCK_NB) # virtiofs needs LOCK_NB on unlocking
a3125791	1563
399a76e6	1564	except ImportError:
399a76e6	1565
a3125791	1566	def _lock_file(f, exclusive, block):
add96eb9	1567	raise LockingUnsupportedError
399a76e6 YCH	1568
399a76e6 YCH	1569	def _unlock_file(f):
add96eb9	1570	raise LockingUnsupportedError
c1c9a79c PH	1571
c1c9a79c PH	1572
86e5f3ed	1573	class locked_file:
0edb3e33	1574	locked = False
747c0bd1	1575
a3125791	1576	def __init__(self, filename, mode, block=True, encoding=None):
fcfa8853 JK	1577	if mode not in {'r', 'rb', 'a', 'ab', 'w', 'wb'}:
	1578	raise NotImplementedError(mode)
	1579	self.mode, self.block = mode, block
	1580
	1581	writable = any(f in mode for f in 'wax+')
	1582	readable = any(f in mode for f in 'r+')
	1583	flags = functools.reduce(operator.ior, (
	1584	getattr(os, 'O_CLOEXEC', 0), # UNIX only
	1585	getattr(os, 'O_BINARY', 0), # Windows only
	1586	getattr(os, 'O_NOINHERIT', 0), # Windows only
	1587	os.O_CREAT if writable else 0, # O_TRUNC only after locking
	1588	os.O_APPEND if 'a' in mode else 0,
	1589	os.O_EXCL if 'x' in mode else 0,
	1590	os.O_RDONLY if not writable else os.O_RDWR if readable else os.O_WRONLY,
	1591	))
	1592
98804d03	1593	self.f = os.fdopen(os.open(filename, flags, 0o666), mode, encoding=encoding)
c1c9a79c PH	1594
c1c9a79c PH	1595	def __enter__(self):
a3125791	1596	exclusive = 'r' not in self.mode
c1c9a79c	1597	try:
a3125791	1598	_lock_file(self.f, exclusive, self.block)
0edb3e33	1599	self.locked = True
86e5f3ed	1600	except OSError:
c1c9a79c PH	1601	self.f.close()
c1c9a79c PH	1602	raise
fcfa8853	1603	if 'w' in self.mode:
131e14dc JK	1604	try:
	1605	self.f.truncate()
	1606	except OSError as e:
1890fc63	1607	if e.errno not in (
	1608	errno.ESPIPE, # Illegal seek - expected for FIFO
	1609	errno.EINVAL, # Invalid argument - expected for /dev/null
	1610	):
	1611	raise
c1c9a79c PH	1612	return self
c1c9a79c PH	1613
0edb3e33	1614	def unlock(self):
	1615	if not self.locked:
	1616	return
c1c9a79c	1617	try:
0edb3e33	1618	_unlock_file(self.f)
c1c9a79c	1619	finally:
0edb3e33	1620	self.locked = False
c1c9a79c	1621
0edb3e33	1622	def __exit__(self, *_):
	1623	try:
	1624	self.unlock()
	1625	finally:
	1626	self.f.close()
4eb7f1d1	1627
0edb3e33	1628	open = __enter__
0edb3e33	1629	close = __exit__
a3125791	1630
0edb3e33	1631	def __getattr__(self, attr):
0edb3e33	1632	return getattr(self.f, attr)
a3125791	1633
0edb3e33	1634	def __iter__(self):
0edb3e33	1635	return iter(self.f)
a3125791	1636
4eb7f1d1	1637
0b9c08b4	1638	@functools.cache
4644ac55 S	1639	def get_filesystem_encoding():
	1640	encoding = sys.getfilesystemencoding()
	1641	return encoding if encoding is not None else 'utf-8'
	1642
	1643
64766459	1644	_WINDOWS_QUOTE_TRANS = str.maketrans({'"': R'\"'})
ff077926 SS	1645	_CMD_QUOTE_TRANS = str.maketrans({
	1646	# Keep quotes balanced by replacing them with `""` instead of `\\"`
	1647	'"': '""',
64766459	1648	# These require an env-variable `=` containing `"^\n\n"` (set in `utils.Popen`)
ff077926 SS	1649	# `=` should be unique since variables containing `=` cannot be set using cmd
ff077926 SS	1650	'\n': '%=%',
64766459	1651	'\r': '%=%',
ff077926 SS	1652	# Use zero length variable replacement so `%` doesn't get expanded
	1653	# `cd` is always set as long as extensions are enabled (`/E:ON` in `utils.Popen`)
	1654	'%': '%%cd:~,%',
	1655	})
	1656
	1657
	1658	def shell_quote(args, *, shell=False):
	1659	args = list(variadic(args))
ff077926 SS	1660
	1661	if compat_os_name != 'nt':
	1662	return shlex.join(args)
	1663
	1664	trans = _CMD_QUOTE_TRANS if shell else _WINDOWS_QUOTE_TRANS
	1665	return ' '.join(
64766459 SS	1666	s if re.fullmatch(r'[\w#$*\-+./:?@\\]+', s, re.ASCII)
64766459 SS	1667	else re.sub(r'(\\+)("\|$)', r'\1\1\2', s).translate(trans).join('""')
ff077926	1668	for s in args)
9d4660ca PH	1669
	1670
	1671	def smuggle_url(url, data):
	1672	""" Pass additional data in a URL for internal use. """
	1673
81953d1a RA	1674	url, idata = unsmuggle_url(url, {})
81953d1a RA	1675	data.update(idata)
14f25df2	1676	sdata = urllib.parse.urlencode(
28e614de PH	1677	{'__youtubedl_smuggle': json.dumps(data)})
28e614de PH	1678	return url + '#' + sdata
9d4660ca PH	1679
9d4660ca PH	1680
79f82953	1681	def unsmuggle_url(smug_url, default=None):
83e865a3	1682	if '#__youtubedl_smuggle' not in smug_url:
79f82953	1683	return smug_url, default
28e614de	1684	url, _, sdata = smug_url.rpartition('#')
14f25df2	1685	jsond = urllib.parse.parse_qs(sdata)['__youtubedl_smuggle'][0]
9d4660ca PH	1686	data = json.loads(jsond)
9d4660ca PH	1687	return url, data
02dbf93f PH	1688
02dbf93f PH	1689
e0fd9573	1690	def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
	1691	""" Formats numbers with decimal sufixes like K, M, etc """
	1692	num, factor = float_or_none(num), float(factor)
4c3f8c3f	1693	if num is None or num < 0:
e0fd9573	1694	return None
eeb2a770	1695	POSSIBLE_SUFFIXES = 'kMGTPEZY'
	1696	exponent = 0 if num == 0 else min(int(math.log(num, factor)), len(POSSIBLE_SUFFIXES))
	1697	suffix = ['', *POSSIBLE_SUFFIXES][exponent]
abbeeebc	1698	if factor == 1024:
abbeeebc	1699	suffix = {'k': 'Ki', '': ''}.get(suffix, f'{suffix}i')
e0fd9573	1700	converted = num / (factor ** exponent)
abbeeebc	1701	return fmt % (converted, suffix)
e0fd9573	1702
e0fd9573	1703
02dbf93f	1704	def format_bytes(bytes):
f02d24d8	1705	return format_decimal_suffix(bytes, '%.2f%sB', factor=1024) or 'N/A'
f53c966a	1706
1c088fa8	1707
64c464a1	1708	def lookup_unit_table(unit_table, s, strict=False):
64c464a1	1709	num_re = NUMBER_RE if strict else NUMBER_RE.replace(R'\.', '[,.]')
fb47597b	1710	units_re = '\|'.join(re.escape(u) for u in unit_table)
64c464a1	1711	m = (re.fullmatch if strict else re.match)(
64c464a1	1712	rf'(?P<num>{num_re})\s*(?P<unit>{units_re})\b', s)
fb47597b S	1713	if not m:
fb47597b S	1714	return None
64c464a1	1715
64c464a1	1716	num = float(m.group('num').replace(',', '.'))
fb47597b	1717	mult = unit_table[m.group('unit')]
64c464a1	1718	return round(num * mult)
	1719
	1720
	1721	def parse_bytes(s):
	1722	"""Parse a string indicating a byte quantity into an integer"""
	1723	return lookup_unit_table(
	1724	{u: 1024*i for i, u in enumerate(['', 'KMGTPEZY'])},
	1725	s.upper(), strict=True)
fb47597b S	1726
fb47597b S	1727
be64b5b0 PH	1728	def parse_filesize(s):
	1729	if s is None:
	1730	return None
	1731
dfb1b146	1732	# The lower-case forms are of course incorrect and unofficial,
be64b5b0 PH	1733	# but we support those too
	1734	_UNIT_TABLE = {
	1735	'B': 1,
	1736	'b': 1,
70852b47	1737	'bytes': 1,
be64b5b0 PH	1738	'KiB': 1024,
	1739	'KB': 1000,
	1740	'kB': 1024,
	1741	'Kb': 1000,
13585d76	1742	'kb': 1000,
70852b47 YCH	1743	'kilobytes': 1000,
70852b47 YCH	1744	'kibibytes': 1024,
be64b5b0 PH	1745	'MiB': 1024 ** 2,
	1746	'MB': 1000 ** 2,
	1747	'mB': 1024 ** 2,
	1748	'Mb': 1000 ** 2,
13585d76	1749	'mb': 1000 ** 2,
70852b47 YCH	1750	'megabytes': 1000 ** 2,
70852b47 YCH	1751	'mebibytes': 1024 ** 2,
be64b5b0 PH	1752	'GiB': 1024 ** 3,
	1753	'GB': 1000 ** 3,
	1754	'gB': 1024 ** 3,
	1755	'Gb': 1000 ** 3,
13585d76	1756	'gb': 1000 ** 3,
70852b47 YCH	1757	'gigabytes': 1000 ** 3,
70852b47 YCH	1758	'gibibytes': 1024 ** 3,
be64b5b0 PH	1759	'TiB': 1024 ** 4,
	1760	'TB': 1000 ** 4,
	1761	'tB': 1024 ** 4,
	1762	'Tb': 1000 ** 4,
13585d76	1763	'tb': 1000 ** 4,
70852b47 YCH	1764	'terabytes': 1000 ** 4,
70852b47 YCH	1765	'tebibytes': 1024 ** 4,
be64b5b0 PH	1766	'PiB': 1024 ** 5,
	1767	'PB': 1000 ** 5,
	1768	'pB': 1024 ** 5,
	1769	'Pb': 1000 ** 5,
13585d76	1770	'pb': 1000 ** 5,
70852b47 YCH	1771	'petabytes': 1000 ** 5,
70852b47 YCH	1772	'pebibytes': 1024 ** 5,
be64b5b0 PH	1773	'EiB': 1024 ** 6,
	1774	'EB': 1000 ** 6,
	1775	'eB': 1024 ** 6,
	1776	'Eb': 1000 ** 6,
13585d76	1777	'eb': 1000 ** 6,
70852b47 YCH	1778	'exabytes': 1000 ** 6,
70852b47 YCH	1779	'exbibytes': 1024 ** 6,
be64b5b0 PH	1780	'ZiB': 1024 ** 7,
	1781	'ZB': 1000 ** 7,
	1782	'zB': 1024 ** 7,
	1783	'Zb': 1000 ** 7,
13585d76	1784	'zb': 1000 ** 7,
70852b47 YCH	1785	'zettabytes': 1000 ** 7,
70852b47 YCH	1786	'zebibytes': 1024 ** 7,
be64b5b0 PH	1787	'YiB': 1024 ** 8,
	1788	'YB': 1000 ** 8,
	1789	'yB': 1024 ** 8,
	1790	'Yb': 1000 ** 8,
13585d76	1791	'yb': 1000 ** 8,
70852b47 YCH	1792	'yottabytes': 1000 ** 8,
70852b47 YCH	1793	'yobibytes': 1024 ** 8,
be64b5b0 PH	1794	}
be64b5b0 PH	1795
fb47597b S	1796	return lookup_unit_table(_UNIT_TABLE, s)
	1797
	1798
	1799	def parse_count(s):
	1800	if s is None:
be64b5b0 PH	1801	return None
be64b5b0 PH	1802
352d5da8	1803	s = re.sub(r'^[^\d]+\s', '', s).strip()
fb47597b S	1804
	1805	if re.match(r'^[\d,.]+$', s):
	1806	return str_to_int(s)
	1807
	1808	_UNIT_TABLE = {
	1809	'k': 1000,
	1810	'K': 1000,
	1811	'm': 1000 ** 2,
	1812	'M': 1000 ** 2,
	1813	'kk': 1000 ** 2,
	1814	'KK': 1000 ** 2,
352d5da8	1815	'b': 1000 ** 3,
352d5da8	1816	'B': 1000 ** 3,
fb47597b	1817	}
be64b5b0	1818
352d5da8	1819	ret = lookup_unit_table(_UNIT_TABLE, s)
	1820	if ret is not None:
	1821	return ret
	1822
	1823	mobj = re.match(r'([\d,.]+)(?:$\|\s)', s)
	1824	if mobj:
	1825	return str_to_int(mobj.group(1))
be64b5b0	1826
2f7ae819	1827
5d45484c	1828	def parse_resolution(s, *, lenient=False):
b871d7e9 S	1829	if s is None:
	1830	return {}
	1831
5d45484c LNO	1832	if lenient:
	1833	mobj = re.search(r'(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)', s)
	1834	else:
	1835	mobj = re.search(r'(?<![a-zA-Z0-9])(?P<w>\d+)\s[xX×,]\s(?P<h>\d+)(?![a-zA-Z0-9])', s)
b871d7e9 S	1836	if mobj:
	1837	return {
	1838	'width': int(mobj.group('w')),
	1839	'height': int(mobj.group('h')),
	1840	}
	1841
17ec8bcf	1842	mobj = re.search(r'(?<![a-zA-Z0-9])(\d+)[pPiI](?![a-zA-Z0-9])', s)
b871d7e9 S	1843	if mobj:
	1844	return {'height': int(mobj.group(1))}
	1845
	1846	mobj = re.search(r'\b([48])[kK]\b', s)
	1847	if mobj:
	1848	return {'height': int(mobj.group(1)) * 540}
	1849
	1850	return {}
	1851
	1852
0dc41787	1853	def parse_bitrate(s):
14f25df2	1854	if not isinstance(s, str):
0dc41787 S	1855	return
	1856	mobj = re.search(r'\b(\d+)\s*kbps', s)
	1857	if mobj:
	1858	return int(mobj.group(1))
	1859
	1860
a942d6cb	1861	def month_by_name(name, lang='en'):
caefb1de PH	1862	""" Return the number of a month by (locale-independently) English name """
caefb1de PH	1863
f6717dec	1864	month_names = MONTH_NAMES.get(lang, MONTH_NAMES['en'])
a942d6cb	1865
caefb1de	1866	try:
f6717dec	1867	return month_names.index(name) + 1
7105440c YCH	1868	except ValueError:
	1869	return None
	1870
	1871
	1872	def month_by_abbreviation(abbrev):
	1873	""" Return the number of a month by (locale-independently) English
	1874	abbreviations """
	1875
	1876	try:
	1877	return [s[:3] for s in ENGLISH_MONTH_NAMES].index(abbrev) + 1
caefb1de PH	1878	except ValueError:
caefb1de PH	1879	return None
18258362 JMF	1880
18258362 JMF	1881
5aafe895	1882	def fix_xml_ampersands(xml_str):
18258362	1883	"""Replace all the '&' by '&' in XML"""
5aafe895 PH	1884	return re.sub(
5aafe895 PH	1885	r'&(?!amp;\|lt;\|gt;\|apos;\|quot;\|#x[0-9a-fA-F]{,4};\|#[0-9]{,4};)',
28e614de	1886	'&',
5aafe895	1887	xml_str)
e3946f98 PH	1888
	1889
	1890	def setproctitle(title):
14f25df2	1891	assert isinstance(title, str)
c1c05c67	1892
fe0918bb	1893	# Workaround for https://github.com/yt-dlp/yt-dlp/issues/4541
	1894	try:
	1895	import ctypes
	1896	except ImportError:
c1c05c67 YCH	1897	return
c1c05c67 YCH	1898
e3946f98	1899	try:
611c1dd9	1900	libc = ctypes.cdll.LoadLibrary('libc.so.6')
e3946f98 PH	1901	except OSError:
e3946f98 PH	1902	return
2f49bcd6 RC	1903	except TypeError:
	1904	# LoadLibrary in Windows Python 2.7.13 only expects
	1905	# a bytestring, but since unicode_literals turns
	1906	# every string into a unicode string, it fails.
	1907	return
0f06bcd7	1908	title_bytes = title.encode()
6eefe533 PH	1909	buf = ctypes.create_string_buffer(len(title_bytes))
6eefe533 PH	1910	buf.value = title_bytes
e3946f98	1911	try:
f9fb3ce8	1912	# PR_SET_NAME = 15 Ref: /usr/include/linux/prctl.h
6eefe533	1913	libc.prctl(15, buf, 0, 0, 0)
e3946f98 PH	1914	except AttributeError:
e3946f98 PH	1915	return # Strange libc, just skip this
d7dda168 PH	1916
	1917
	1918	def remove_start(s, start):
46bc9b7d	1919	return s[len(start):] if s is not None and s.startswith(start) else s
29eb5174 PH	1920
29eb5174 PH	1921
2b9faf55	1922	def remove_end(s, end):
46bc9b7d	1923	return s[:-len(end)] if s is not None and s.endswith(end) else s
2b9faf55 PH	1924
2b9faf55 PH	1925
31b2051e S	1926	def remove_quotes(s):
	1927	if s is None or len(s) < 2:
	1928	return s
add96eb9	1929	for quote in ('"', "'"):
31b2051e S	1930	if s[0] == quote and s[-1] == quote:
	1931	return s[1:-1]
	1932	return s
	1933
	1934
b6e0c7d2	1935	def get_domain(url):
ebf99aaf	1936	"""
	1937	This implementation is inconsistent, but is kept for compatibility.
	1938	Use this only for "webpage_url_domain"
	1939	"""
	1940	return remove_start(urllib.parse.urlparse(url).netloc, 'www.') or None
b6e0c7d2 U	1941
b6e0c7d2 U	1942
29eb5174	1943	def url_basename(url):
14f25df2	1944	path = urllib.parse.urlparse(url).path
28e614de	1945	return path.strip('/').split('/')[-1]
aa94a6d3 PH	1946
aa94a6d3 PH	1947
02dc0a36	1948	def base_url(url):
7657ec7e	1949	return re.match(r'https?://[^?#]+/', url).group()
02dc0a36 S	1950
02dc0a36 S	1951
e34c3361	1952	def urljoin(base, path):
4b5de77b	1953	if isinstance(path, bytes):
0f06bcd7	1954	path = path.decode()
14f25df2	1955	if not isinstance(path, str) or not path:
e34c3361	1956	return None
fad4ceb5	1957	if re.match(r'^(?:[a-zA-Z][a-zA-Z0-9+-.]*:)?//', path):
e34c3361	1958	return path
4b5de77b	1959	if isinstance(base, bytes):
0f06bcd7	1960	base = base.decode()
14f25df2	1961	if not isinstance(base, str) or not re.match(
4b5de77b	1962	r'^(?:https?:)?//', base):
e34c3361	1963	return None
14f25df2	1964	return urllib.parse.urljoin(base, path)
e34c3361 S	1965
e34c3361 S	1966
9732d77e	1967	def int_or_none(v, scale=1, default=None, get_attr=None, invscale=1):
9e907ebd	1968	if get_attr and v is not None:
9e907ebd	1969	v = getattr(v, get_attr, None)
1812afb7 S	1970	try:
1812afb7 S	1971	return int(v) * invscale // scale
31c49255	1972	except (ValueError, TypeError, OverflowError):
af98f8ff	1973	return default
9732d77e	1974
9572013d	1975
40a90862	1976	def str_or_none(v, default=None):
14f25df2	1977	return default if v is None else str(v)
40a90862	1978
9732d77e PH	1979
9732d77e PH	1980	def str_to_int(int_str):
48d4681e	1981	""" A more relaxed version of int_or_none """
f9934b96	1982	if isinstance(int_str, int):
348c6bf1	1983	return int_str
14f25df2	1984	elif isinstance(int_str, str):
42db58ec S	1985	int_str = re.sub(r'[,\.\+]', '', int_str)
42db58ec S	1986	return int_or_none(int_str)
608d11f5 PH	1987
608d11f5 PH	1988
9732d77e	1989	def float_or_none(v, scale=1, invscale=1, default=None):
caf80631 S	1990	if v is None:
	1991	return default
	1992	try:
	1993	return float(v) * invscale / scale
5e1271c5	1994	except (ValueError, TypeError):
caf80631	1995	return default
43f775e4 PH	1996
43f775e4 PH	1997
c7e327c4 S	1998	def bool_or_none(v, default=None):
	1999	return v if isinstance(v, bool) else default
	2000
	2001
53cd37ba	2002	def strip_or_none(v, default=None):
14f25df2	2003	return v.strip() if isinstance(v, str) else default
b72b4431 S	2004
b72b4431 S	2005
af03000a	2006	def url_or_none(url):
14f25df2	2007	if not url or not isinstance(url, str):
af03000a S	2008	return None
af03000a S	2009	url = url.strip()
29f7c58a	2010	return url if re.match(r'^(?:(?:https?\|rt(?:m(?:pt?[es]?\|fp)\|sp[su]?)\|mms\|ftps?):)?//', url) else None
af03000a S	2011
af03000a S	2012
ad54c913	2013	def strftime_or_none(timestamp, date_format='%Y%m%d', default=None):
e29663c6	2014	datetime_object = None
e29663c6	2015	try:
f9934b96	2016	if isinstance(timestamp, (int, float)): # unix timestamp
d509c1f5	2017	# Using naive datetime here can break timestamp() in Windows
d509c1f5	2018	# Ref: https://github.com/yt-dlp/yt-dlp/issues/5185, https://github.com/python/cpython/issues/94414
c305a25c	2019	# Also, dt.datetime.fromtimestamp breaks for negative timestamps
a35af430	2020	# Ref: https://github.com/yt-dlp/yt-dlp/issues/6706#issuecomment-1496842642
c305a25c	2021	datetime_object = (dt.datetime.fromtimestamp(0, dt.timezone.utc)
c305a25c	2022	+ dt.timedelta(seconds=timestamp))
14f25df2	2023	elif isinstance(timestamp, str): # assume YYYYMMDD
c305a25c	2024	datetime_object = dt.datetime.strptime(timestamp, '%Y%m%d')
9665f15a	2025	date_format = re.sub( # Support %s on windows
9665f15a	2026	r'(?<!%)(%%)*%s', rf'\g<1>{int(datetime_object.timestamp())}', date_format)
e29663c6	2027	return datetime_object.strftime(date_format)
	2028	except (ValueError, TypeError, AttributeError):
	2029	return default
	2030
	2031
608d11f5	2032	def parse_duration(s):
f9934b96	2033	if not isinstance(s, str):
608d11f5	2034	return None
ca7b3246	2035	s = s.strip()
38d79fd1	2036	if not s:
38d79fd1	2037	return None
ca7b3246	2038
acaff495	2039	days, hours, mins, secs, ms = [None] * 5
8bd1c00b	2040	m = re.match(r'''(?x)
	2041	(?P<before_secs>
	2042	(?:(?:(?P<days>[0-9]+):)?(?P<hours>[0-9]+):)?(?P<mins>[0-9]+):)?
	2043	(?P<secs>(?(before_secs)[0-9]{1,2}\|[0-9]+))
	2044	(?P<ms>[.:][0-9]+)?Z?$
	2045	''', s)
acaff495	2046	if m:
8bd1c00b	2047	days, hours, mins, secs, ms = m.group('days', 'hours', 'mins', 'secs', 'ms')
acaff495	2048	else:
acaff495	2049	m = re.match(
056653bb S	2050	r'''(?ix)(?:P?
056653bb S	2051	(?:
1c1b2f96	2052	[0-9]+\sy(?:ears?)?,?\s
056653bb S	2053	)?
056653bb S	2054	(?:
1c1b2f96	2055	[0-9]+\sm(?:onths?)?,?\s
056653bb S	2056	)?
056653bb S	2057	(?:
1c1b2f96	2058	[0-9]+\sw(?:eeks?)?,?\s
056653bb	2059	)?
8f4b58d7	2060	(?:
1c1b2f96	2061	(?P<days>[0-9]+)\sd(?:ays?)?,?\s
8f4b58d7	2062	)?
056653bb	2063	T)?
acaff495	2064	(?:
af868732	2065	(?P<hours>[0-9]+)\sh(?:(?:ou)?rs?)?,?\s
acaff495	2066	)?
acaff495	2067	(?:
1c1b2f96	2068	(?P<mins>[0-9]+)\sm(?:in(?:ute)?s?)?,?\s
acaff495	2069	)?
	2070	(?:
	2071	(?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\ss(?:ec(?:ond)?s?)?\s
15846398	2072	)?Z?$''', s)
acaff495	2073	if m:
	2074	days, hours, mins, secs, ms = m.groups()
	2075	else:
15846398	2076	m = re.match(r'(?i)(?:(?P<hours>[0-9.]+)\s(?:hours?)\|(?P<mins>[0-9.]+)\s(?:mins?\.?\|minutes?)\s*)Z?$', s)
acaff495	2077	if m:
	2078	hours, mins = m.groups()
	2079	else:
	2080	return None
	2081
acaff495	2082	if ms:
19a03940	2083	ms = ms.replace(':', '.')
	2084	return sum(float(part or 0) * mult for part, mult in (
	2085	(days, 86400), (hours, 3600), (mins, 60), (secs, 1), (ms, 1)))
91d7d0b3 JMF	2086
91d7d0b3 JMF	2087
5ce58244	2088	def _change_extension(prepend, filename, ext, expected_real_ext=None):
5f6a1245	2089	name, real_ext = os.path.splitext(filename)
d70ad093	2090
5ce58244 SS	2091	if not expected_real_ext or real_ext[1:] == expected_real_ext:
	2092	filename = name
	2093	if prepend and real_ext:
	2094	_UnsafeExtensionError.sanitize_extension(ext, prepend=True)
	2095	return f'{filename}.{ext}{real_ext}'
	2096
	2097	return f'{filename}.{_UnsafeExtensionError.sanitize_extension(ext)}'
d70ad093	2098
5ce58244 SS	2099
	2100	prepend_extension = functools.partial(_change_extension, True)
	2101	replace_extension = functools.partial(_change_extension, False)
b3ed15b7 S	2102
b3ed15b7 S	2103
d70ad093 PH	2104	def check_executable(exe, args=[]):
	2105	""" Checks if the given binary is installed somewhere in PATH, and returns its name.
	2106	args can be a list of arguments for a short output (like -version) """
	2107	try:
add96eb9	2108	Popen.run([exe, *args], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
d70ad093 PH	2109	except OSError:
	2110	return False
	2111	return exe
b7ab0590 PH	2112
b7ab0590 PH	2113
7aaf4cd2	2114	def _get_exe_version_output(exe, args):
95807118	2115	try:
b64d04c1	2116	# STDIN should be redirected too. On UNIX-like systems, ffmpeg triggers
7a5c1cfe	2117	# SIGTTOU if yt-dlp is run in the background.
067aa17e	2118	# See https://github.com/ytdl-org/youtube-dl/issues/955#issuecomment-209789656
add96eb9	2119	stdout, _, ret = Popen.run([encodeArgument(exe), *args], text=True,
1cdda329	2120	stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
	2121	if ret:
	2122	return None
95807118 PH	2123	except OSError:
95807118 PH	2124	return False
f0c9fb96	2125	return stdout
cae97f65 PH	2126
	2127
	2128	def detect_exe_version(output, version_re=None, unrecognized='present'):
14f25df2	2129	assert isinstance(output, str)
cae97f65 PH	2130	if version_re is None:
	2131	version_re = r'version\s+([-0-9._a-zA-Z]+)'
	2132	m = re.search(version_re, output)
95807118 PH	2133	if m:
	2134	return m.group(1)
	2135	else:
	2136	return unrecognized
	2137
	2138
9af98e17	2139	def get_exe_version(exe, args=['--version'],
1cdda329	2140	version_re=None, unrecognized=('present', 'broken')):
9af98e17	2141	""" Returns the version of the specified executable,
9af98e17	2142	or False if the executable is not present """
1cdda329	2143	unrecognized = variadic(unrecognized)
1cdda329	2144	assert len(unrecognized) in (1, 2)
9af98e17	2145	out = _get_exe_version_output(exe, args)
1cdda329	2146	if out is None:
	2147	return unrecognized[-1]
	2148	return out and detect_exe_version(out, version_re, unrecognized[0])
9af98e17	2149
9af98e17	2150
7e88d7d7	2151	def frange(start=0, stop=None, step=1):
	2152	"""Float range"""
	2153	if stop is None:
	2154	start, stop = 0, start
	2155	sign = [-1, 1][step > 0] if step else 0
	2156	while sign * start < sign * stop:
	2157	yield start
	2158	start += step
	2159
	2160
cb89cfc1	2161	class LazyList(collections.abc.Sequence):
0f06bcd7	2162	"""Lazy immutable list from an iterable
0f06bcd7	2163	Note that slices of a LazyList are lists and not LazyList"""
483336e7	2164
add96eb9	2165	class IndexError(IndexError): # noqa: A001
8e5fecc8	2166	pass
8e5fecc8	2167
282f5709	2168	def __init__(self, iterable, *, reverse=False, _cache=None):
0f06bcd7	2169	self._iterable = iter(iterable)
	2170	self._cache = [] if _cache is None else _cache
	2171	self._reversed = reverse
483336e7	2172
483336e7	2173	def __iter__(self):
0f06bcd7	2174	if self._reversed:
28419ca2	2175	# We need to consume the entire iterable to iterate in reverse
981052c9	2176	yield from self.exhaust()
28419ca2	2177	return
0f06bcd7	2178	yield from self._cache
	2179	for item in self._iterable:
	2180	self._cache.append(item)
483336e7	2181	yield item
483336e7	2182
0f06bcd7	2183	def _exhaust(self):
	2184	self._cache.extend(self._iterable)
	2185	self._iterable = [] # Discard the emptied iterable to make it pickle-able
	2186	return self._cache
28419ca2	2187
981052c9	2188	def exhaust(self):
0f06bcd7	2189	"""Evaluate the entire iterable"""
0f06bcd7	2190	return self._exhaust()[::-1 if self._reversed else 1]
981052c9	2191
28419ca2	2192	@staticmethod
0f06bcd7	2193	def _reverse_index(x):
f2df4071	2194	return None if x is None else ~x
483336e7	2195
	2196	def __getitem__(self, idx):
	2197	if isinstance(idx, slice):
0f06bcd7	2198	if self._reversed:
0f06bcd7	2199	idx = slice(self._reverse_index(idx.start), self._reverse_index(idx.stop), -(idx.step or 1))
e0f2b4b4	2200	start, stop, step = idx.start, idx.stop, idx.step or 1
483336e7	2201	elif isinstance(idx, int):
0f06bcd7	2202	if self._reversed:
0f06bcd7	2203	idx = self._reverse_index(idx)
e0f2b4b4	2204	start, stop, step = idx, idx, 0
483336e7	2205	else:
483336e7	2206	raise TypeError('indices must be integers or slices')
e0f2b4b4	2207	if ((start or 0) < 0 or (stop or 0) < 0
	2208	or (start is None and step < 0)
	2209	or (stop is None and step > 0)):
483336e7	2210	# We need to consume the entire iterable to be able to slice from the end
483336e7	2211	# Obviously, never use this with infinite iterables
0f06bcd7	2212	self._exhaust()
8e5fecc8	2213	try:
0f06bcd7	2214	return self._cache[idx]
8e5fecc8	2215	except IndexError as e:
8e5fecc8	2216	raise self.IndexError(e) from e
0f06bcd7	2217	n = max(start or 0, stop or 0) - len(self._cache) + 1
28419ca2	2218	if n > 0:
0f06bcd7	2219	self._cache.extend(itertools.islice(self._iterable, n))
8e5fecc8	2220	try:
0f06bcd7	2221	return self._cache[idx]
8e5fecc8	2222	except IndexError as e:
8e5fecc8	2223	raise self.IndexError(e) from e
483336e7	2224
	2225	def __bool__(self):
	2226	try:
0f06bcd7	2227	self[-1] if self._reversed else self[0]
8e5fecc8	2228	except self.IndexError:
483336e7	2229	return False
	2230	return True
	2231
	2232	def __len__(self):
0f06bcd7	2233	self._exhaust()
0f06bcd7	2234	return len(self._cache)
483336e7	2235
282f5709	2236	def __reversed__(self):
0f06bcd7	2237	return type(self)(self._iterable, reverse=not self._reversed, _cache=self._cache)
282f5709	2238
282f5709	2239	def __copy__(self):
0f06bcd7	2240	return type(self)(self._iterable, reverse=self._reversed, _cache=self._cache)
282f5709	2241
28419ca2	2242	def __repr__(self):
	2243	# repr and str should mimic a list. So we exhaust the iterable
	2244	return repr(self.exhaust())
	2245
	2246	def __str__(self):
	2247	return repr(self.exhaust())
	2248
483336e7	2249
7be9ccff	2250	class PagedList:
c07a39ae	2251
add96eb9	2252	class IndexError(IndexError): # noqa: A001
c07a39ae	2253	pass
c07a39ae	2254
dd26ced1 PH	2255	def __len__(self):
	2256	# This is only useful for tests
	2257	return len(self.getslice())
	2258
7be9ccff	2259	def __init__(self, pagefunc, pagesize, use_cache=True):
	2260	self._pagefunc = pagefunc
	2261	self._pagesize = pagesize
f1d13090	2262	self._pagecount = float('inf')
7be9ccff	2263	self._use_cache = use_cache
	2264	self._cache = {}
	2265
	2266	def getpage(self, pagenum):
d8cf8d97	2267	page_results = self._cache.get(pagenum)
d8cf8d97	2268	if page_results is None:
f1d13090	2269	page_results = [] if pagenum > self._pagecount else list(self._pagefunc(pagenum))
7be9ccff	2270	if self._use_cache:
	2271	self._cache[pagenum] = page_results
	2272	return page_results
	2273
	2274	def getslice(self, start=0, end=None):
	2275	return list(self._getslice(start, end))
	2276
	2277	def _getslice(self, start, end):
55575225	2278	raise NotImplementedError('This method must be implemented by subclasses')
	2279
	2280	def __getitem__(self, idx):
f1d13090	2281	assert self._use_cache, 'Indexing PagedList requires cache'
55575225	2282	if not isinstance(idx, int) or idx < 0:
	2283	raise TypeError('indices must be non-negative integers')
	2284	entries = self.getslice(idx, idx + 1)
d8cf8d97	2285	if not entries:
add96eb9	2286	raise self.IndexError
d8cf8d97	2287	return entries[0]
55575225	2288
f9fb3ce8 SS	2289	def __bool__(self):
	2290	return bool(self.getslice(0, 1))
	2291
9c44d242 PH	2292
9c44d242 PH	2293	class OnDemandPagedList(PagedList):
a44ca5a4	2294	"""Download pages until a page with less than maximum results"""
86e5f3ed	2295
7be9ccff	2296	def _getslice(self, start, end):
b7ab0590 PH	2297	for pagenum in itertools.count(start // self._pagesize):
	2298	firstid = pagenum * self._pagesize
	2299	nextfirstid = pagenum * self._pagesize + self._pagesize
	2300	if start >= nextfirstid:
	2301	continue
	2302
b7ab0590 PH	2303	startv = (
	2304	start % self._pagesize
	2305	if firstid <= start < nextfirstid
	2306	else 0)
b7ab0590 PH	2307	endv = (
	2308	((end - 1) % self._pagesize) + 1
	2309	if (end is not None and firstid <= end <= nextfirstid)
	2310	else None)
	2311
f1d13090	2312	try:
	2313	page_results = self.getpage(pagenum)
	2314	except Exception:
	2315	self._pagecount = pagenum - 1
	2316	raise
b7ab0590 PH	2317	if startv != 0 or endv is not None:
b7ab0590 PH	2318	page_results = page_results[startv:endv]
7be9ccff	2319	yield from page_results
b7ab0590 PH	2320
	2321	# A little optimization - if current page is not "full", ie. does
	2322	# not contain page_size videos then we can assume that this page
	2323	# is the last one - there are no more ids on further pages -
	2324	# i.e. no need to query again.
	2325	if len(page_results) + startv < self._pagesize:
	2326	break
	2327
	2328	# If we got the whole page, but the next page is not interesting,
	2329	# break out early as well
	2330	if end == nextfirstid:
	2331	break
81c2f20b PH	2332
81c2f20b PH	2333
9c44d242	2334	class InAdvancePagedList(PagedList):
a44ca5a4	2335	"""PagedList with total number of pages known in advance"""
86e5f3ed	2336
9c44d242	2337	def __init__(self, pagefunc, pagecount, pagesize):
7be9ccff	2338	PagedList.__init__(self, pagefunc, pagesize, True)
f1d13090	2339	self._pagecount = pagecount
9c44d242	2340
7be9ccff	2341	def _getslice(self, start, end):
9c44d242	2342	start_page = start // self._pagesize
d37707bd	2343	end_page = self._pagecount if end is None else min(self._pagecount, end // self._pagesize + 1)
9c44d242 PH	2344	skip_elems = start - start_page * self._pagesize
	2345	only_more = None if end is None else end - start
	2346	for pagenum in range(start_page, end_page):
7be9ccff	2347	page_results = self.getpage(pagenum)
9c44d242	2348	if skip_elems:
7be9ccff	2349	page_results = page_results[skip_elems:]
9c44d242 PH	2350	skip_elems = None
9c44d242 PH	2351	if only_more is not None:
7be9ccff	2352	if len(page_results) < only_more:
7be9ccff	2353	only_more -= len(page_results)
9c44d242	2354	else:
7be9ccff	2355	yield from page_results[:only_more]
9c44d242	2356	break
7be9ccff	2357	yield from page_results
9c44d242 PH	2358
9c44d242 PH	2359
7e88d7d7	2360	class PlaylistEntries:
	2361	MissingEntry = object()
	2362	is_exhausted = False
	2363
	2364	def __init__(self, ydl, info_dict):
7e9a6125	2365	self.ydl = ydl
	2366
	2367	# _entries must be assigned now since infodict can change during iteration
	2368	entries = info_dict.get('entries')
	2369	if entries is None:
	2370	raise EntryNotInPlaylist('There are no entries')
	2371	elif isinstance(entries, list):
	2372	self.is_exhausted = True
	2373
	2374	requested_entries = info_dict.get('requested_entries')
bc5c2f8a	2375	self.is_incomplete = requested_entries is not None
7e9a6125	2376	if self.is_incomplete:
7e9a6125	2377	assert self.is_exhausted
bc5c2f8a	2378	self._entries = [self.MissingEntry] * max(requested_entries or [0])
7e9a6125	2379	for i, entry in zip(requested_entries, entries):
	2380	self._entries[i - 1] = entry
	2381	elif isinstance(entries, (list, PagedList, LazyList)):
	2382	self._entries = entries
	2383	else:
	2384	self._entries = LazyList(entries)
7e88d7d7	2385
	2386	PLAYLIST_ITEMS_RE = re.compile(r'''(?x)
	2387	(?P<start>[+-]?\d+)?
	2388	(?P<range>[:-]
	2389	(?P<end>[+-]?\d+\|inf(?:inite)?)?
	2390	(?::(?P<step>[+-]?\d+))?
	2391	)?''')
	2392
	2393	@classmethod
	2394	def parse_playlist_items(cls, string):
	2395	for segment in string.split(','):
	2396	if not segment:
	2397	raise ValueError('There is two or more consecutive commas')
	2398	mobj = cls.PLAYLIST_ITEMS_RE.fullmatch(segment)
	2399	if not mobj:
	2400	raise ValueError(f'{segment!r} is not a valid specification')
	2401	start, end, step, has_range = mobj.group('start', 'end', 'step', 'range')
	2402	if int_or_none(step) == 0:
	2403	raise ValueError(f'Step in {segment!r} cannot be zero')
	2404	yield slice(int_or_none(start), float_or_none(end), int_or_none(step)) if has_range else int(start)
	2405
	2406	def get_requested_items(self):
	2407	playlist_items = self.ydl.params.get('playlist_items')
	2408	playlist_start = self.ydl.params.get('playliststart', 1)
	2409	playlist_end = self.ydl.params.get('playlistend')
	2410	# For backwards compatibility, interpret -1 as whole list
	2411	if playlist_end in (-1, None):
	2412	playlist_end = ''
	2413	if not playlist_items:
	2414	playlist_items = f'{playlist_start}:{playlist_end}'
	2415	elif playlist_start != 1 or playlist_end:
	2416	self.ydl.report_warning('Ignoring playliststart and playlistend because playlistitems was given', only_once=True)
	2417
	2418	for index in self.parse_playlist_items(playlist_items):
	2419	for i, entry in self[index]:
	2420	yield i, entry
1ac4fd80	2421	if not entry:
1ac4fd80	2422	continue
7e88d7d7	2423	try:
d21056f4	2424	# The item may have just been added to archive. Don't break due to it
	2425	if not self.ydl.params.get('lazy_playlist'):
	2426	# TODO: Add auto-generated fields
	2427	self.ydl._match_entry(entry, incomplete=True, silent=True)
7e88d7d7	2428	except (ExistingVideoReached, RejectedVideoReached):
	2429	return
	2430
7e9a6125	2431	def get_full_count(self):
7e9a6125	2432	if self.is_exhausted and not self.is_incomplete:
7e88d7d7	2433	return len(self)
	2434	elif isinstance(self._entries, InAdvancePagedList):
	2435	if self._entries._pagesize == 1:
	2436	return self._entries._pagecount
	2437
7e88d7d7	2438	@functools.cached_property
	2439	def _getter(self):
	2440	if isinstance(self._entries, list):
	2441	def get_entry(i):
	2442	try:
	2443	entry = self._entries[i]
	2444	except IndexError:
	2445	entry = self.MissingEntry
	2446	if not self.is_incomplete:
add96eb9	2447	raise self.IndexError
7e88d7d7	2448	if entry is self.MissingEntry:
bc5c2f8a	2449	raise EntryNotInPlaylist(f'Entry {i + 1} cannot be found')
7e88d7d7	2450	return entry
	2451	else:
	2452	def get_entry(i):
	2453	try:
	2454	return type(self.ydl)._handle_extraction_exceptions(lambda _, i: self._entries[i])(self.ydl, i)
	2455	except (LazyList.IndexError, PagedList.IndexError):
add96eb9	2456	raise self.IndexError
7e88d7d7	2457	return get_entry
	2458
	2459	def __getitem__(self, idx):
	2460	if isinstance(idx, int):
	2461	idx = slice(idx, idx)
	2462
	2463	# NB: PlaylistEntries[1:10] => (0, 1, ... 9)
	2464	step = 1 if idx.step is None else idx.step
	2465	if idx.start is None:
	2466	start = 0 if step > 0 else len(self) - 1
	2467	else:
	2468	start = idx.start - 1 if idx.start >= 0 else len(self) + idx.start
	2469
	2470	# NB: Do not call len(self) when idx == [:]
	2471	if idx.stop is None:
	2472	stop = 0 if step < 0 else float('inf')
	2473	else:
	2474	stop = idx.stop - 1 if idx.stop >= 0 else len(self) + idx.stop
	2475	stop += [-1, 1][step > 0]
	2476
	2477	for i in frange(start, stop, step):
	2478	if i < 0:
	2479	continue
	2480	try:
7e9a6125	2481	entry = self._getter(i)
	2482	except self.IndexError:
	2483	self.is_exhausted = True
	2484	if step > 0:
7e88d7d7	2485	break
7e9a6125	2486	continue
7e88d7d7	2487	yield i + 1, entry
	2488
	2489	def __len__(self):
	2490	return len(tuple(self[:]))
	2491
add96eb9	2492	class IndexError(IndexError): # noqa: A001
7e88d7d7	2493	pass
	2494
	2495
81c2f20b	2496	def uppercase_escape(s):
676eb3f2	2497	unicode_escape = codecs.getdecoder('unicode_escape')
81c2f20b	2498	return re.sub(
a612753d	2499	r'\\U[0-9a-fA-F]{8}',
676eb3f2 PH	2500	lambda m: unicode_escape(m.group(0))[0],
676eb3f2 PH	2501	s)
0fe2ff78 YCH	2502
	2503
	2504	def lowercase_escape(s):
	2505	unicode_escape = codecs.getdecoder('unicode_escape')
	2506	return re.sub(
	2507	r'\\u[0-9a-fA-F]{4}',
	2508	lambda m: unicode_escape(m.group(0))[0],
	2509	s)
b53466e1	2510
d05cfe06	2511
96b9e9cf	2512	def parse_qs(url, **kwargs):
96b9e9cf	2513	return urllib.parse.parse_qs(urllib.parse.urlparse(url).query, **kwargs)
4dfbf869	2514
4dfbf869	2515
62e609ab PH	2516	def read_batch_urls(batch_fd):
62e609ab PH	2517	def fixup(url):
14f25df2	2518	if not isinstance(url, str):
62e609ab	2519	url = url.decode('utf-8', 'replace')
8c04f0be	2520	BOM_UTF8 = ('\xef\xbb\xbf', '\ufeff')
	2521	for bom in BOM_UTF8:
	2522	if url.startswith(bom):
	2523	url = url[len(bom):]
	2524	url = url.lstrip()
	2525	if not url or url.startswith(('#', ';', ']')):
62e609ab	2526	return False
8c04f0be	2527	# "#" cannot be stripped out since it is part of the URI
962ffcf8	2528	# However, it can be safely stripped out if following a whitespace
5c019f63	2529	return re.split(r'\s#', url, maxsplit=1)[0].rstrip()
62e609ab PH	2530
	2531	with contextlib.closing(batch_fd) as fd:
	2532	return [url for url in map(fixup, fd) if url]
b74fa8cd JMF	2533
	2534
	2535	def urlencode_postdata(args, *kargs):
14f25df2	2536	return urllib.parse.urlencode(args, *kargs).encode('ascii')
bcf89ce6 PH	2537
bcf89ce6 PH	2538
45b2ee6f	2539	def update_url(url, , query_update=None, *kwargs):
	2540	"""Replace URL components specified by kwargs
	2541	@param url str or parse url tuple
	2542	@param query_update update query
	2543	@returns str
	2544	"""
	2545	if isinstance(url, str):
	2546	if not kwargs and not query_update:
	2547	return url
	2548	else:
	2549	url = urllib.parse.urlparse(url)
	2550	if query_update:
	2551	assert 'query' not in kwargs, 'query_update and query cannot be specified at the same time'
	2552	kwargs['query'] = urllib.parse.urlencode({
	2553	**urllib.parse.parse_qs(url.query),
add96eb9	2554	**query_update,
45b2ee6f	2555	}, True)
	2556	return urllib.parse.urlunparse(url._replace(**kwargs))
	2557
	2558
38f9ef31	2559	def update_url_query(url, query):
45b2ee6f	2560	return update_url(url, query_update=query)
16392824	2561
8e60dc75	2562
10c87c15	2563	def _multipart_encode_impl(data, boundary):
add96eb9	2564	content_type = f'multipart/form-data; boundary={boundary}'
0c265486 YCH	2565
	2566	out = b''
	2567	for k, v in data.items():
	2568	out += b'--' + boundary.encode('ascii') + b'\r\n'
14f25df2	2569	if isinstance(k, str):
0f06bcd7	2570	k = k.encode()
14f25df2	2571	if isinstance(v, str):
0f06bcd7	2572	v = v.encode()
0c265486 YCH	2573	# RFC 2047 requires non-ASCII field names to be encoded, while RFC 7578
0c265486 YCH	2574	# suggests sending UTF-8 directly. Firefox sends UTF-8, too
b2ad479d	2575	content = b'Content-Disposition: form-data; name="' + k + b'"\r\n\r\n' + v + b'\r\n'
0c265486 YCH	2576	if boundary.encode('ascii') in content:
	2577	raise ValueError('Boundary overlaps with data')
	2578	out += content
	2579
	2580	out += b'--' + boundary.encode('ascii') + b'--\r\n'
	2581
	2582	return out, content_type
	2583
	2584
	2585	def multipart_encode(data, boundary=None):
add96eb9	2586	"""
0c265486 YCH	2587	Encode a dict to RFC 7578-compliant form-data
	2588
	2589	data:
	2590	A dict where keys and values can be either Unicode or bytes-like
	2591	objects.
	2592	boundary:
	2593	If specified a Unicode object, it's used as the boundary. Otherwise
	2594	a random boundary is generated.
	2595
	2596	Reference: https://tools.ietf.org/html/rfc7578
add96eb9	2597	"""
0c265486 YCH	2598	has_specified_boundary = boundary is not None
	2599
	2600	while True:
	2601	if boundary is None:
	2602	boundary = '---------------' + str(random.randrange(0x0fffffff, 0xffffffff))
	2603
	2604	try:
10c87c15	2605	out, content_type = _multipart_encode_impl(data, boundary)
0c265486 YCH	2606	break
	2607	except ValueError:
	2608	if has_specified_boundary:
	2609	raise
	2610	boundary = None
	2611
	2612	return out, content_type
	2613
	2614
b079c26f SS	2615	def is_iterable_like(x, allowed_types=collections.abc.Iterable, blocked_types=NO_DEFAULT):
	2616	if blocked_types is NO_DEFAULT:
	2617	blocked_types = (str, bytes, collections.abc.Mapping)
	2618	return isinstance(x, allowed_types) and not isinstance(x, blocked_types)
	2619
	2620
	2621	def variadic(x, allowed_types=NO_DEFAULT):
4823ec9f	2622	if not isinstance(allowed_types, (tuple, type)):
	2623	deprecation_warning('allowed_types should be a tuple or a type')
	2624	allowed_types = tuple(allowed_types)
6f2287cb	2625	return x if is_iterable_like(x, blocked_types=allowed_types) else (x, )
304ad45a	2626
304ad45a	2627
c4f60dd7	2628	def try_call(*funcs, expected_type=None, args=[], kwargs={}):
c4f60dd7	2629	for f in funcs:
a32a9a7e	2630	try:
c4f60dd7	2631	val = f(args, *kwargs)
ab029d7e	2632	except (AttributeError, KeyError, TypeError, IndexError, ValueError, ZeroDivisionError):
a32a9a7e S	2633	pass
a32a9a7e S	2634	else:
c4f60dd7	2635	if expected_type is None or isinstance(val, expected_type):
	2636	return val
	2637
	2638
	2639	def try_get(src, getter, expected_type=None):
	2640	return try_call(*variadic(getter), args=(src,), expected_type=expected_type)
329ca3be S	2641
329ca3be S	2642
90137ca4	2643	def filter_dict(dct, cndn=lambda _, v: v is not None):
	2644	return {k: v for k, v in dct.items() if cndn(k, v)}
	2645
	2646
6cc62232 S	2647	def merge_dicts(*dicts):
	2648	merged = {}
	2649	for a_dict in dicts:
	2650	for k, v in a_dict.items():
90137ca4	2651	if (v is not None and k not in merged
90137ca4	2652	or isinstance(v, str) and merged[k] == ''):
6cc62232 S	2653	merged[k] = v
	2654	return merged
	2655
	2656
8e60dc75	2657	def encode_compat_str(string, encoding=preferredencoding(), errors='strict'):
14f25df2	2658	return string if isinstance(string, str) else str(string, encoding, errors)
8e60dc75	2659
16392824	2660
a1a530b0 PH	2661	US_RATINGS = {
	2662	'G': 0,
	2663	'PG': 10,
	2664	'PG-13': 13,
	2665	'R': 16,
	2666	'NC': 18,
	2667	}
fac55558 PH	2668
fac55558 PH	2669
a8795327	2670	TV_PARENTAL_GUIDELINES = {
5a16c9d9 RA	2671	'TV-Y': 0,
	2672	'TV-Y7': 7,
	2673	'TV-G': 0,
	2674	'TV-PG': 0,
	2675	'TV-14': 14,
	2676	'TV-MA': 17,
a8795327 S	2677	}
	2678
	2679
146c80e2	2680	def parse_age_limit(s):
19a03940	2681	# isinstance(False, int) is True. So type() must be used instead
c487cf00	2682	if type(s) is int: # noqa: E721
a8795327	2683	return s if 0 <= s <= 21 else None
19a03940	2684	elif not isinstance(s, str):
d838b1bd	2685	return None
146c80e2	2686	m = re.match(r'^(?P<age>\d{1,2})\+?$', s)
a8795327 S	2687	if m:
a8795327 S	2688	return int(m.group('age'))
5c5fae6d	2689	s = s.upper()
a8795327 S	2690	if s in US_RATINGS:
a8795327 S	2691	return US_RATINGS[s]
add96eb9	2692	m = re.match(r'^TV[_-]?({})$'.format('\|'.join(k[3:] for k in TV_PARENTAL_GUIDELINES)), s)
b8361187	2693	if m:
5a16c9d9	2694	return TV_PARENTAL_GUIDELINES['TV-' + m.group(1)]
b8361187	2695	return None
146c80e2 S	2696
146c80e2 S	2697
fac55558	2698	def strip_jsonp(code):
609a61e3	2699	return re.sub(
5552c9eb	2700	r'''(?sx)^
e9c671d5	2701	(?:window\.)?(?P<func_name>[a-zA-Z0-9_.$]*)
5552c9eb YCH	2702	(?:\s&&\s(?P=func_name))?
	2703	\s$\s(?P<callback_data>.*)$;?
	2704	\s?(?://[^\n])*$''',
	2705	r'\g<callback_data>', code)
478c2c61 PH	2706
478c2c61 PH	2707
8f53dc44	2708	def js_to_json(code, vars={}, *, strict=False):
5c610515	2709	# vars is a dict of var, val pairs to substitute
0898c5c8	2710	STRING_QUOTES = '\'"`'
a71b812f	2711	STRING_RE = '\|'.join(rf'{q}(?:\\.\|[^\\{q}])*{q}' for q in STRING_QUOTES)
c843e685	2712	COMMENT_RE = r'/\(?:(?!\/).)?\/\|//[^\n]*\n'
86e5f3ed	2713	SKIP_RE = fr'\s(?:{COMMENT_RE})?\s'
4195096e	2714	INTEGER_TABLE = (
86e5f3ed	2715	(fr'(?s)^(0[xX][0-9a-fA-F]+){SKIP_RE}:?$', 16),
86e5f3ed	2716	(fr'(?s)^(0+[0-7]+){SKIP_RE}:?$', 8),
4195096e S	2717	)
4195096e S	2718
a71b812f SS	2719	def process_escape(match):
	2720	JSON_PASSTHROUGH_ESCAPES = R'"\bfnrtu'
	2721	escape = match.group(1) or match.group(2)
	2722
	2723	return (Rf'\{escape}' if escape in JSON_PASSTHROUGH_ESCAPES
	2724	else R'\u00' if escape == 'x'
	2725	else '' if escape == '\n'
	2726	else escape)
	2727
0898c5c8 SS	2728	def template_substitute(match):
	2729	evaluated = js_to_json(match.group(1), vars, strict=strict)
	2730	if evaluated[0] == '"':
	2731	return json.loads(evaluated)
	2732	return evaluated
	2733
e05f6939	2734	def fix_kv(m):
e7b6d122 PH	2735	v = m.group(0)
	2736	if v in ('true', 'false', 'null'):
	2737	return v
421ddcb8 C	2738	elif v in ('undefined', 'void 0'):
421ddcb8 C	2739	return 'null'
add96eb9	2740	elif v.startswith(('/*', '//', '!')) or v == ',':
a71b812f SS	2741	return ''
	2742
	2743	if v[0] in STRING_QUOTES:
0898c5c8 SS	2744	v = re.sub(r'(?s)\${([^}]+)}', template_substitute, v[1:-1]) if v[0] == '`' else v[1:-1]
0898c5c8 SS	2745	escaped = re.sub(r'(?s)(")\|\\(.)', process_escape, v)
a71b812f SS	2746	return f'"{escaped}"'
	2747
	2748	for regex, base in INTEGER_TABLE:
	2749	im = re.match(regex, v)
	2750	if im:
	2751	i = int(im.group(1), base)
	2752	return f'"{i}":' if v.endswith(':') else str(i)
	2753
	2754	if v in vars:
d5f043d1 C	2755	try:
	2756	if not strict:
	2757	json.loads(vars[v])
08e29b9f	2758	except json.JSONDecodeError:
d5f043d1 C	2759	return json.dumps(vars[v])
	2760	else:
	2761	return vars[v]
89ac4a19	2762
a71b812f SS	2763	if not strict:
a71b812f SS	2764	return f'"{v}"'
5c610515	2765
a71b812f	2766	raise ValueError(f'Unknown value: {v}')
e05f6939	2767
8072ef2b	2768	def create_map(mobj):
	2769	return json.dumps(dict(json.loads(js_to_json(mobj.group(1) or '[]', vars=vars))))
	2770
52414d64	2771	code = re.sub(r'(?:new\s+)?Array$(.*?)$', r'[\g<1>]', code)
8072ef2b	2772	code = re.sub(r'new Map$(\[.*?\])?$', create_map, code)
8f53dc44	2773	if not strict:
9d7ded64	2774	code = re.sub(rf'new Date$({STRING_RE})$', r'\g<1>', code)
f55523cf	2775	code = re.sub(r'new \w+$(.*?)$', lambda m: json.dumps(m.group(0)), code)
389896df	2776	code = re.sub(r'parseInt$[^\d]+(\d+)[^\d]+$', r'\1', code)
389896df	2777	code = re.sub(r'$function\([^)]$\s\{[^}]\}\s\)\s$\s(["\'][^)]["\'])\s$', r'\1', code)
febff4c1	2778
a71b812f SS	2779	return re.sub(rf'''(?sx)
	2780	{STRING_RE}\|
	2781	{COMMENT_RE}\|,(?={SKIP_RE}[\]}}])\|
421ddcb8	2782	void\s0\|(?:(?<![0-9])[eE]\|[a-df-zA-DF-Z_$])[.a-zA-Z_$0-9]*\|
a71b812f SS	2783	\b(?:0[xX][0-9a-fA-F]+\|0+[0-7]+)(?:{SKIP_RE}:)?\|
a71b812f SS	2784	[0-9]+(?={SKIP_RE}:)\|
8bdd16b4	2785	!+
a71b812f	2786	''', fix_kv, code)
e05f6939 PH	2787
e05f6939 PH	2788
478c2c61 PH	2789	def qualities(quality_ids):
	2790	""" Get a numeric quality value out of a list of possible values """
	2791	def q(qid):
	2792	try:
	2793	return quality_ids.index(qid)
	2794	except ValueError:
	2795	return -1
	2796	return q
	2797
acd69589	2798
119e40ef	2799	POSTPROCESS_WHEN = ('pre_process', 'after_filter', 'video', 'before_dl', 'post_process', 'after_move', 'after_video', 'playlist')
1e43a6f7	2800
1e43a6f7	2801
de6000d9	2802	DEFAULT_OUTTMPL = {
de6000d9	2803	'default': '%(title)s [%(id)s].%(ext)s',
72755351	2804	'chapter': '%(title)s - %(section_number)03d %(section_title)s [%(id)s].%(ext)s',
de6000d9	2805	}
de6000d9	2806	OUTTMPL_TYPES = {
72755351	2807	'chapter': None,
de6000d9	2808	'subtitle': None,
	2809	'thumbnail': None,
	2810	'description': 'description',
	2811	'annotation': 'annotations.xml',
	2812	'infojson': 'info.json',
08438d2c	2813	'link': None,
3b603dbd	2814	'pl_video': None,
5112f26a	2815	'pl_thumbnail': None,
de6000d9	2816	'pl_description': 'description',
	2817	'pl_infojson': 'info.json',
	2818	}
0a871f68	2819
143db31d	2820	# As of [1] format syntax is:
	2821	# %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
	2822	# 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
901130bb	2823	STR_FORMAT_RE_TMPL = r'''(?x)
901130bb	2824	(?<!%)(?P<prefix>(?:%%)*)
143db31d	2825	%
524e2e4f	2826	(?P<has_key>$(?P<key>{0})$)?
752cda38	2827	(?P<format>
524e2e4f	2828	(?P<conversion>[#0\-+ ]+)?
	2829	(?P<min_width>\d+)?
	2830	(?P<precision>\.\d+)?
	2831	(?P<len_mod>[hlL])? # unused in python
901130bb	2832	{1} # conversion type
752cda38	2833	)
143db31d	2834	'''
143db31d	2835
7d1eb38a	2836
ebe1b4e3	2837	STR_FORMAT_TYPES = 'diouxXeEfFgGcrsa'
a020a0dc	2838
7d1eb38a	2839
a020a0dc PH	2840	def limit_length(s, length):
	2841	""" Add ellipses to overly long strings """
	2842	if s is None:
	2843	return None
	2844	ELLIPSES = '...'
	2845	if len(s) > length:
	2846	return s[:length - len(ELLIPSES)] + ELLIPSES
	2847	return s
48844745 PH	2848
	2849
	2850	def version_tuple(v):
5f9b8394	2851	return tuple(int(e) for e in re.split(r'[-.]', v))
48844745 PH	2852
	2853
	2854	def is_outdated_version(version, limit, assume_new=True):
	2855	if not version:
	2856	return not assume_new
	2857	try:
	2858	return version_tuple(version) < version_tuple(limit)
	2859	except ValueError:
	2860	return not assume_new
732ea2f0 PH	2861
	2862
	2863	def ytdl_is_updateable():
7a5c1cfe	2864	""" Returns if yt-dlp can be updated with -U """
735d865e	2865
69bec673	2866	from ..update import is_non_updateable
732ea2f0	2867
5d535b4a	2868	return not is_non_updateable()
7d4111ed PH	2869
	2870
	2871	def args_to_str(args):
	2872	# Get a short string representation for a subprocess command
ff077926	2873	return shell_quote(args)
2ccd1b10 PH	2874
2ccd1b10 PH	2875
a44ca5a4	2876	def error_to_str(err):
	2877	return f'{type(err).__name__}: {err}'
	2878
	2879
2647c933	2880	def mimetype2ext(mt, default=NO_DEFAULT):
	2881	if not isinstance(mt, str):
	2882	if default is not NO_DEFAULT:
	2883	return default
eb9ee194 S	2884	return None
eb9ee194 S	2885
2647c933	2886	MAP = {
2647c933	2887	# video
f6861ec9	2888	'3gpp': '3gp',
2647c933	2889	'mp2t': 'ts',
	2890	'mp4': 'mp4',
	2891	'mpeg': 'mpeg',
	2892	'mpegurl': 'm3u8',
	2893	'quicktime': 'mov',
	2894	'webm': 'webm',
	2895	'vp9': 'vp9',
f659e643	2896	'video/ogg': 'ogv',
f6861ec9	2897	'x-flv': 'flv',
2647c933	2898	'x-m4v': 'm4v',
	2899	'x-matroska': 'mkv',
	2900	'x-mng': 'mng',
a0d8d704	2901	'x-mp4-fragmented': 'mp4',
2647c933	2902	'x-ms-asf': 'asf',
a0d8d704	2903	'x-ms-wmv': 'wmv',
2647c933	2904	'x-msvideo': 'avi',
	2905
	2906	# application (streaming playlists)
b4173f15	2907	'dash+xml': 'mpd',
b4173f15	2908	'f4m+xml': 'f4m',
f164b971	2909	'hds+xml': 'f4m',
2647c933	2910	'vnd.apple.mpegurl': 'm3u8',
e910fe2f	2911	'vnd.ms-sstr+xml': 'ism',
2647c933	2912	'x-mpegurl': 'm3u8',
	2913
	2914	# audio
	2915	'audio/mp4': 'm4a',
	2916	# Per RFC 3003, audio/mpeg can be .mp1, .mp2 or .mp3.
	2917	# Using .mp3 as it's the most popular one
	2918	'audio/mpeg': 'mp3',
d80ca5de	2919	'audio/webm': 'webm',
2647c933	2920	'audio/x-matroska': 'mka',
	2921	'audio/x-mpegurl': 'm3u',
	2922	'midi': 'mid',
	2923	'ogg': 'ogg',
	2924	'wav': 'wav',
	2925	'wave': 'wav',
	2926	'x-aac': 'aac',
	2927	'x-flac': 'flac',
	2928	'x-m4a': 'm4a',
	2929	'x-realaudio': 'ra',
39e7107d	2930	'x-wav': 'wav',
9359f3d4	2931
2647c933	2932	# image
	2933	'avif': 'avif',
	2934	'bmp': 'bmp',
	2935	'gif': 'gif',
	2936	'jpeg': 'jpg',
	2937	'png': 'png',
	2938	'svg+xml': 'svg',
	2939	'tiff': 'tif',
	2940	'vnd.wap.wbmp': 'wbmp',
	2941	'webp': 'webp',
	2942	'x-icon': 'ico',
	2943	'x-jng': 'jng',
	2944	'x-ms-bmp': 'bmp',
	2945
	2946	# caption
	2947	'filmstrip+json': 'fs',
	2948	'smptett+xml': 'tt',
	2949	'ttaf+xml': 'dfxp',
	2950	'ttml+xml': 'ttml',
	2951	'x-ms-sami': 'sami',
9359f3d4	2952
2647c933	2953	# misc
2647c933	2954	'gzip': 'gz',
9359f3d4 F	2955	'json': 'json',
	2956	'xml': 'xml',
	2957	'zip': 'zip',
9359f3d4 F	2958	}
9359f3d4 F	2959
2647c933	2960	mimetype = mt.partition(';')[0].strip().lower()
2647c933	2961	_, _, subtype = mimetype.rpartition('/')
9359f3d4	2962
69bec673	2963	ext = traversal.traverse_obj(MAP, mimetype, subtype, subtype.rsplit('+')[-1])
2647c933	2964	if ext:
	2965	return ext
	2966	elif default is not NO_DEFAULT:
	2967	return default
9359f3d4	2968	return subtype.replace('+', '.')
c460bdd5 PH	2969
c460bdd5 PH	2970
2814f12b THD	2971	def ext2mimetype(ext_or_url):
	2972	if not ext_or_url:
	2973	return None
	2974	if '.' not in ext_or_url:
	2975	ext_or_url = f'file.{ext_or_url}'
	2976	return mimetypes.guess_type(ext_or_url)[0]
	2977
	2978
4f3c5e06	2979	def parse_codecs(codecs_str):
	2980	# http://tools.ietf.org/html/rfc6381
	2981	if not codecs_str:
	2982	return {}
a0566bbf	2983	split_codecs = list(filter(None, map(
dbf5416a	2984	str.strip, codecs_str.strip().strip(',').split(','))))
3fe75fdc	2985	vcodec, acodec, scodec, hdr = None, None, None, None
a0566bbf	2986	for full_codec in split_codecs:
d816f61f	2987	parts = re.sub(r'0+(?=\d)', '', full_codec).split('.')
	2988	if parts[0] in ('avc1', 'avc2', 'avc3', 'avc4', 'vp9', 'vp8', 'hev1', 'hev2',
	2989	'h263', 'h264', 'mp4v', 'hvc1', 'av1', 'theora', 'dvh1', 'dvhe'):
	2990	if vcodec:
	2991	continue
	2992	vcodec = full_codec
	2993	if parts[0] in ('dvh1', 'dvhe'):
	2994	hdr = 'DV'
69bec673	2995	elif parts[0] == 'av1' and traversal.traverse_obj(parts, 3) == '10':
d816f61f	2996	hdr = 'HDR10'
	2997	elif parts[:2] == ['vp9', '2']:
	2998	hdr = 'HDR10'
71082216	2999	elif parts[0] in ('flac', 'mp4a', 'opus', 'vorbis', 'mp3', 'aac', 'ac-4',
d816f61f	3000	'ac-3', 'ec-3', 'eac3', 'dtsc', 'dtse', 'dtsh', 'dtsl'):
	3001	acodec = acodec or full_codec
	3002	elif parts[0] in ('stpp', 'wvtt'):
	3003	scodec = scodec or full_codec
4f3c5e06	3004	else:
19a03940	3005	write_string(f'WARNING: Unknown codec {full_codec}\n')
3fe75fdc	3006	if vcodec or acodec or scodec:
4f3c5e06	3007	return {
	3008	'vcodec': vcodec or 'none',
	3009	'acodec': acodec or 'none',
176f1866	3010	'dynamic_range': hdr,
3fe75fdc	3011	**({'scodec': scodec} if scodec is not None else {}),
4f3c5e06	3012	}
b69fd25c	3013	elif len(split_codecs) == 2:
	3014	return {
	3015	'vcodec': split_codecs[0],
	3016	'acodec': split_codecs[1],
	3017	}
4f3c5e06	3018	return {}
	3019
	3020
fc61aff4 LL	3021	def get_compatible_ext(*, vcodecs, acodecs, vexts, aexts, preferences=None):
	3022	assert len(vcodecs) == len(vexts) and len(acodecs) == len(aexts)
	3023
	3024	allow_mkv = not preferences or 'mkv' in preferences
	3025
	3026	if allow_mkv and max(len(acodecs), len(vcodecs)) > 1:
	3027	return 'mkv' # TODO: any other format allows this?
	3028
	3029	# TODO: All codecs supported by parse_codecs isn't handled here
	3030	COMPATIBLE_CODECS = {
	3031	'mp4': {
71082216	3032	'av1', 'hevc', 'avc1', 'mp4a', 'ac-4', # fourcc (m3u8, mpd)
81b6102d	3033	'h264', 'aacl', 'ec-3', # Set in ISM
fc61aff4 LL	3034	},
	3035	'webm': {
	3036	'av1', 'vp9', 'vp8', 'opus', 'vrbs',
	3037	'vp9x', 'vp8x', # in the webm spec
	3038	},
	3039	}
	3040
812cdfa0	3041	sanitize_codec = functools.partial(
812cdfa0	3042	try_get, getter=lambda x: x[0].split('.')[0].replace('0', '').lower())
8f84770a	3043	vcodec, acodec = sanitize_codec(vcodecs), sanitize_codec(acodecs)
fc61aff4 LL	3044
	3045	for ext in preferences or COMPATIBLE_CODECS.keys():
	3046	codec_set = COMPATIBLE_CODECS.get(ext, set())
	3047	if ext == 'mkv' or codec_set.issuperset((vcodec, acodec)):
	3048	return ext
	3049
	3050	COMPATIBLE_EXTS = (
	3051	{'mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v', 'ismv', 'isma', 'mov'},
fbb73833	3052	{'webm', 'weba'},
fc61aff4 LL	3053	)
	3054	for ext in preferences or vexts:
	3055	current_exts = {ext, vexts, aexts}
	3056	if ext == 'mkv' or current_exts == {ext} or any(
	3057	ext_sets.issuperset(current_exts) for ext_sets in COMPATIBLE_EXTS):
	3058	return ext
	3059	return 'mkv' if allow_mkv else preferences[-1]
	3060
	3061
2647c933	3062	def urlhandle_detect_ext(url_handle, default=NO_DEFAULT):
79298173	3063	getheader = url_handle.headers.get
2ccd1b10	3064
b55ee18f PH	3065	cd = getheader('Content-Disposition')
	3066	if cd:
	3067	m = re.match(r'attachment;\s*filename="(?P<filename>[^"]+)"', cd)
	3068	if m:
	3069	e = determine_ext(m.group('filename'), default_ext=None)
	3070	if e:
	3071	return e
	3072
2647c933	3073	meta_ext = getheader('x-amz-meta-name')
	3074	if meta_ext:
	3075	e = meta_ext.rpartition('.')[2]
	3076	if e:
	3077	return e
	3078
	3079	return mimetype2ext(getheader('Content-Type'), default=default)
05900629 PH	3080
05900629 PH	3081
1e399778	3082	def encode_data_uri(data, mime_type):
add96eb9	3083	return 'data:{};base64,{}'.format(mime_type, base64.b64encode(data).decode('ascii'))
1e399778 YCH	3084
1e399778 YCH	3085
05900629	3086	def age_restricted(content_limit, age_limit):
6ec6cb4e	3087	""" Returns True iff the content should be blocked """
05900629 PH	3088
	3089	if age_limit is None: # No limit set
	3090	return False
	3091	if content_limit is None:
	3092	return False # Content available for everyone
	3093	return age_limit < content_limit
61ca9a80 PH	3094
61ca9a80 PH	3095
88f60feb	3096	# List of known byte-order-marks (BOM)
a904a7f8 L	3097	BOMS = [
	3098	(b'\xef\xbb\xbf', 'utf-8'),
	3099	(b'\x00\x00\xfe\xff', 'utf-32-be'),
	3100	(b'\xff\xfe\x00\x00', 'utf-32-le'),
	3101	(b'\xff\xfe', 'utf-16-le'),
	3102	(b'\xfe\xff', 'utf-16-be'),
	3103	]
a904a7f8 L	3104
a904a7f8 L	3105
61ca9a80 PH	3106	def is_html(first_bytes):
	3107	""" Detect whether a file contains HTML by examining its first bytes. """
	3108
80e8493e	3109	encoding = 'utf-8'
61ca9a80	3110	for bom, enc in BOMS:
80e8493e	3111	while first_bytes.startswith(bom):
80e8493e	3112	encoding, first_bytes = enc, first_bytes[len(bom):]
61ca9a80	3113
80e8493e	3114	return re.match(r'^\s*<', first_bytes.decode(encoding, 'replace'))
a055469f PH	3115
	3116
	3117	def determine_protocol(info_dict):
	3118	protocol = info_dict.get('protocol')
	3119	if protocol is not None:
	3120	return protocol
	3121
7de837a5	3122	url = sanitize_url(info_dict['url'])
a055469f PH	3123	if url.startswith('rtmp'):
	3124	return 'rtmp'
	3125	elif url.startswith('mms'):
	3126	return 'mms'
	3127	elif url.startswith('rtsp'):
	3128	return 'rtsp'
	3129
	3130	ext = determine_ext(url)
	3131	if ext == 'm3u8':
deae7c17	3132	return 'm3u8' if info_dict.get('is_live') else 'm3u8_native'
a055469f PH	3133	elif ext == 'f4m':
	3134	return 'f4m'
	3135
14f25df2	3136	return urllib.parse.urlparse(url).scheme
cfb56d1a PH	3137
cfb56d1a PH	3138
c5e3f849	3139	def render_table(header_row, data, delim=False, extra_gap=0, hide_empty=False):
	3140	""" Render a list of rows, each as a list of values.
	3141	Text after a \t will be right aligned """
ec11a9f4	3142	def width(string):
c5e3f849	3143	return len(remove_terminal_sequences(string).replace('\t', ''))
76d321f6	3144
76d321f6	3145	def get_max_lens(table):
ec11a9f4	3146	return [max(width(str(v)) for v in col) for col in zip(*table)]
76d321f6	3147
add96eb9	3148	def filter_using_list(row, filter_array):
add96eb9	3149	return [col for take, col in itertools.zip_longest(filter_array, row, fillvalue=True) if take]
76d321f6	3150
d16df59d	3151	max_lens = get_max_lens(data) if hide_empty else []
	3152	header_row = filter_using_list(header_row, max_lens)
	3153	data = [filter_using_list(row, max_lens) for row in data]
76d321f6	3154
add96eb9	3155	table = [header_row, *data]
76d321f6	3156	max_lens = get_max_lens(table)
c5e3f849	3157	extra_gap += 1
76d321f6	3158	if delim:
add96eb9	3159	table = [header_row, [delim * (ml + extra_gap) for ml in max_lens], *data]
1ed7953a	3160	table[1][-1] = table[1][-1][:-extra_gap * len(delim)] # Remove extra_gap from end of delimiter
ec11a9f4	3161	for row in table:
ec11a9f4	3162	for pos, text in enumerate(map(str, row)):
c5e3f849	3163	if '\t' in text:
	3164	row[pos] = text.replace('\t', ' ' * (max_lens[pos] - width(text))) + ' ' * extra_gap
	3165	else:
	3166	row[pos] = text + ' ' * (max_lens[pos] - width(text) + extra_gap)
add96eb9	3167	return '\n'.join(''.join(row).rstrip() for row in table)
347de493 PH	3168
347de493 PH	3169
8f18aca8	3170	def _match_one(filter_part, dct, incomplete):
77b87f05	3171	# TODO: Generalize code with YoutubeDL._build_format_filter
a047eeb6	3172	STRING_OPERATORS = {
	3173	'*=': operator.contains,
	3174	'^=': lambda attr, value: attr.startswith(value),
	3175	'$=': lambda attr, value: attr.endswith(value),
	3176	'~=': lambda attr, value: re.search(value, attr),
	3177	}
347de493	3178	COMPARISON_OPERATORS = {
a047eeb6	3179	**STRING_OPERATORS,
a047eeb6	3180	'<=': operator.le, # "<=" must be defined above "<"
347de493	3181	'<': operator.lt,
347de493	3182	'>=': operator.ge,
a047eeb6	3183	'>': operator.gt,
347de493	3184	'=': operator.eq,
347de493	3185	}
a047eeb6	3186
6db9c4d5	3187	if isinstance(incomplete, bool):
	3188	is_incomplete = lambda _: incomplete
	3189	else:
	3190	is_incomplete = lambda k: k in incomplete
	3191
64fa820c	3192	operator_rex = re.compile(r'''(?x)
347de493	3193	(?P<key>[a-z_]+)
add96eb9	3194	\s(?P<negation>!\s)?(?P<op>{})(?P<none_inclusive>\s\?)?\s
347de493	3195	(?:
a047eeb6	3196	(?P<quote>["\'])(?P<quotedstrval>.+?)(?P=quote)\|
a047eeb6	3197	(?P<strval>.+?)
347de493	3198	)
add96eb9	3199	'''.format('\|'.join(map(re.escape, COMPARISON_OPERATORS.keys()))))
64fa820c	3200	m = operator_rex.fullmatch(filter_part.strip())
347de493	3201	if m:
18f96d12	3202	m = m.groupdict()
	3203	unnegated_op = COMPARISON_OPERATORS[m['op']]
	3204	if m['negation']:
77b87f05 MT	3205	op = lambda attr, value: not unnegated_op(attr, value)
	3206	else:
	3207	op = unnegated_op
18f96d12	3208	comparison_value = m['quotedstrval'] or m['strval'] or m['intval']
18f96d12	3209	if m['quote']:
add96eb9	3210	comparison_value = comparison_value.replace(r'\{}'.format(m['quote']), m['quote'])
18f96d12	3211	actual_value = dct.get(m['key'])
18f96d12	3212	numeric_comparison = None
f9934b96	3213	if isinstance(actual_value, (int, float)):
e5a088dc S	3214	# If the original field is a string and matching comparisonvalue is
	3215	# a number we should respect the origin of the original field
	3216	# and process comparison value as a string (see
18f96d12	3217	# https://github.com/ytdl-org/youtube-dl/issues/11082)
347de493	3218	try:
18f96d12	3219	numeric_comparison = int(comparison_value)
347de493	3220	except ValueError:
18f96d12	3221	numeric_comparison = parse_filesize(comparison_value)
	3222	if numeric_comparison is None:
	3223	numeric_comparison = parse_filesize(f'{comparison_value}B')
	3224	if numeric_comparison is None:
	3225	numeric_comparison = parse_duration(comparison_value)
	3226	if numeric_comparison is not None and m['op'] in STRING_OPERATORS:
add96eb9	3227	raise ValueError('Operator {} only supports string values!'.format(m['op']))
347de493	3228	if actual_value is None:
6db9c4d5	3229	return is_incomplete(m['key']) or m['none_inclusive']
18f96d12	3230	return op(actual_value, comparison_value if numeric_comparison is None else numeric_comparison)
347de493 PH	3231
347de493 PH	3232	UNARY_OPERATORS = {
1cc47c66 S	3233	'': lambda v: (v is True) if isinstance(v, bool) else (v is not None),
1cc47c66 S	3234	'!': lambda v: (v is False) if isinstance(v, bool) else (v is None),
347de493	3235	}
64fa820c	3236	operator_rex = re.compile(r'''(?x)
add96eb9	3237	(?P<op>{})\s*(?P<key>[a-z_]+)
add96eb9	3238	'''.format('\|'.join(map(re.escape, UNARY_OPERATORS.keys()))))
64fa820c	3239	m = operator_rex.fullmatch(filter_part.strip())
347de493 PH	3240	if m:
	3241	op = UNARY_OPERATORS[m.group('op')]
	3242	actual_value = dct.get(m.group('key'))
6db9c4d5	3243	if is_incomplete(m.group('key')) and actual_value is None:
8f18aca8	3244	return True
347de493 PH	3245	return op(actual_value)
347de493 PH	3246
add96eb9	3247	raise ValueError(f'Invalid filter part {filter_part!r}')
347de493 PH	3248
347de493 PH	3249
8f18aca8	3250	def match_str(filter_str, dct, incomplete=False):
6db9c4d5	3251	""" Filter a dictionary with a simple string syntax.
	3252	@returns Whether the filter passes
	3253	@param incomplete Set of keys that is expected to be missing from dct.
	3254	Can be True/False to indicate all/none of the keys may be missing.
	3255	All conditions on incomplete keys pass if the key is missing
8f18aca8	3256	"""
347de493	3257	return all(
8f18aca8	3258	_match_one(filter_part.replace(r'\&', '&'), dct, incomplete)
a047eeb6	3259	for filter_part in re.split(r'(?<!\\)&', filter_str))
347de493 PH	3260
347de493 PH	3261
fe2ce85a	3262	def match_filter_func(filters, breaking_filters=None):
fe2ce85a	3263	if not filters and not breaking_filters:
d1b5f70b	3264	return None
45491a2a	3265	repr_ = f'{match_filter_func.__module__}.{match_filter_func.__qualname__}({filters}, {breaking_filters})'
45491a2a	3266
fe2ce85a	3267	breaking_filters = match_filter_func(breaking_filters) or (lambda _, __: None)
fe2ce85a	3268	filters = set(variadic(filters or []))
d1b5f70b	3269
492272fe	3270	interactive = '-' in filters
	3271	if interactive:
	3272	filters.remove('-')
	3273
45491a2a	3274	@function_with_repr.set_repr(repr_)
492272fe	3275	def _match_func(info_dict, incomplete=False):
fe2ce85a	3276	ret = breaking_filters(info_dict, incomplete)
	3277	if ret is not None:
	3278	raise RejectedVideoReached(ret)
	3279
492272fe	3280	if not filters or any(match_str(f, info_dict, incomplete) for f in filters):
492272fe	3281	return NO_DEFAULT if interactive and not incomplete else None
347de493	3282	else:
3bec830a	3283	video_title = info_dict.get('title') or info_dict.get('id') or 'entry'
b1a7cd05	3284	filter_str = ') \| ('.join(map(str.strip, filters))
b1a7cd05	3285	return f'{video_title} does not pass filter ({filter_str}), skipping ..'
347de493	3286	return _match_func
91410c9b PH	3287
91410c9b PH	3288
f2df4071	3289	class download_range_func:
b4e0d758	3290	def __init__(self, chapters, ranges, from_info=False):
b4e0d758	3291	self.chapters, self.ranges, self.from_info = chapters, ranges, from_info
f2df4071	3292
f2df4071	3293	def __call__(self, info_dict, ydl):
0500ee3d	3294
5ec1b6b7	3295	warning = ('There are no chapters matching the regex' if info_dict.get('chapters')
56ba69e4	3296	else 'Cannot match chapters since chapter information is unavailable')
f2df4071	3297	for regex in self.chapters or []:
5ec1b6b7	3298	for i, chapter in enumerate(info_dict.get('chapters') or []):
	3299	if re.search(regex, chapter['title']):
	3300	warning = None
	3301	yield {**chapter, 'index': i}
f2df4071	3302	if self.chapters and warning:
5ec1b6b7	3303	ydl.to_screen(f'[info] {info_dict["id"]}: {warning}')
5ec1b6b7	3304
b4e0d758	3305	for start, end in self.ranges or []:
	3306	yield {
	3307	'start_time': self._handle_negative_timestamp(start, info_dict),
	3308	'end_time': self._handle_negative_timestamp(end, info_dict),
	3309	}
	3310
	3311	if self.from_info and (info_dict.get('start_time') or info_dict.get('end_time')):
	3312	yield {
e59e2074	3313	'start_time': info_dict.get('start_time') or 0,
e59e2074	3314	'end_time': info_dict.get('end_time') or float('inf'),
b4e0d758	3315	}
e59e2074	3316	elif not self.ranges and not self.chapters:
e59e2074	3317	yield {}
b4e0d758	3318
	3319	@staticmethod
	3320	def _handle_negative_timestamp(time, info):
	3321	return max(info['duration'] + time, 0) if info.get('duration') and time < 0 else time
5ec1b6b7	3322
f2df4071	3323	def __eq__(self, other):
	3324	return (isinstance(other, download_range_func)
	3325	and self.chapters == other.chapters and self.ranges == other.ranges)
5ec1b6b7	3326
71df9b7f	3327	def __repr__(self):
a5387729	3328	return f'{__name__}.{type(self).__name__}({self.chapters}, {self.ranges})'
71df9b7f	3329
5ec1b6b7	3330
bf6427d2 YCH	3331	def parse_dfxp_time_expr(time_expr):
bf6427d2 YCH	3332	if not time_expr:
d631d5f9	3333	return
bf6427d2	3334
1d485a1a	3335	mobj = re.match(rf'^(?P<time_offset>{NUMBER_RE})s?$', time_expr)
bf6427d2 YCH	3336	if mobj:
	3337	return float(mobj.group('time_offset'))
	3338
db2fe38b	3339	mobj = re.match(r'^(\d+):(\d\d):(\d\d(?:(?:\.\|:)\d+)?)$', time_expr)
bf6427d2	3340	if mobj:
db2fe38b	3341	return 3600 * int(mobj.group(1)) + 60 * int(mobj.group(2)) + float(mobj.group(3).replace(':', '.'))
bf6427d2 YCH	3342
bf6427d2 YCH	3343
c1c924ab	3344	def srt_subtitles_timecode(seconds):
aa7785f8	3345	return '%02d:%02d:%02d,%03d' % timetuple_from_msec(seconds * 1000)
	3346
	3347
	3348	def ass_subtitles_timecode(seconds):
	3349	time = timetuple_from_msec(seconds * 1000)
	3350	return '%01d:%02d:%02d.%02d' % (*time[:-1], time.milliseconds / 10)
bf6427d2 YCH	3351
	3352
	3353	def dfxp2srt(dfxp_data):
add96eb9	3354	"""
3869028f YCH	3355	@param dfxp_data A bytes-like object containing DFXP data
3869028f YCH	3356	@returns A unicode object containing converted SRT data
add96eb9	3357	"""
5b995f71	3358	LEGACY_NAMESPACES = (
3869028f YCH	3359	(b'http://www.w3.org/ns/ttml', [
	3360	b'http://www.w3.org/2004/11/ttaf1',
	3361	b'http://www.w3.org/2006/04/ttaf1',
	3362	b'http://www.w3.org/2006/10/ttaf1',
5b995f71	3363	]),
3869028f YCH	3364	(b'http://www.w3.org/ns/ttml#styling', [
3869028f YCH	3365	b'http://www.w3.org/ns/ttml#style',
5b995f71 RA	3366	]),
	3367	)
	3368
	3369	SUPPORTED_STYLING = [
	3370	'color',
	3371	'fontFamily',
	3372	'fontSize',
	3373	'fontStyle',
	3374	'fontWeight',
add96eb9	3375	'textDecoration',
5b995f71 RA	3376	]
5b995f71 RA	3377
4e335771	3378	_x = functools.partial(xpath_with_ns, ns_map={
261f4730	3379	'xml': 'http://www.w3.org/XML/1998/namespace',
4e335771	3380	'ttml': 'http://www.w3.org/ns/ttml',
5b995f71	3381	'tts': 'http://www.w3.org/ns/ttml#styling',
4e335771	3382	})
bf6427d2	3383
5b995f71 RA	3384	styles = {}
	3385	default_style = {}
	3386
86e5f3ed	3387	class TTMLPElementParser:
5b995f71 RA	3388	_out = ''
	3389	_unclosed_elements = []
	3390	_applied_styles = []
bf6427d2	3391
2b14cb56	3392	def start(self, tag, attrib):
5b995f71 RA	3393	if tag in (_x('ttml:br'), 'br'):
	3394	self._out += '\n'
	3395	else:
	3396	unclosed_elements = []
	3397	style = {}
	3398	element_style_id = attrib.get('style')
	3399	if default_style:
	3400	style.update(default_style)
	3401	if element_style_id:
	3402	style.update(styles.get(element_style_id, {}))
	3403	for prop in SUPPORTED_STYLING:
	3404	prop_val = attrib.get(_x('tts:' + prop))
	3405	if prop_val:
	3406	style[prop] = prop_val
	3407	if style:
	3408	font = ''
	3409	for k, v in sorted(style.items()):
	3410	if self._applied_styles and self._applied_styles[-1].get(k) == v:
	3411	continue
	3412	if k == 'color':
add96eb9	3413	font += f' color="{v}"'
5b995f71	3414	elif k == 'fontSize':
add96eb9	3415	font += f' size="{v}"'
5b995f71	3416	elif k == 'fontFamily':
add96eb9	3417	font += f' face="{v}"'
5b995f71 RA	3418	elif k == 'fontWeight' and v == 'bold':
	3419	self._out += '<b>'
	3420	unclosed_elements.append('b')
	3421	elif k == 'fontStyle' and v == 'italic':
	3422	self._out += '<i>'
	3423	unclosed_elements.append('i')
	3424	elif k == 'textDecoration' and v == 'underline':
	3425	self._out += '<u>'
	3426	unclosed_elements.append('u')
	3427	if font:
	3428	self._out += '<font' + font + '>'
	3429	unclosed_elements.append('font')
	3430	applied_style = {}
	3431	if self._applied_styles:
	3432	applied_style.update(self._applied_styles[-1])
	3433	applied_style.update(style)
	3434	self._applied_styles.append(applied_style)
	3435	self._unclosed_elements.append(unclosed_elements)
bf6427d2	3436
2b14cb56	3437	def end(self, tag):
5b995f71 RA	3438	if tag not in (_x('ttml:br'), 'br'):
	3439	unclosed_elements = self._unclosed_elements.pop()
	3440	for element in reversed(unclosed_elements):
add96eb9	3441	self._out += f'</{element}>'
5b995f71 RA	3442	if unclosed_elements and self._applied_styles:
5b995f71 RA	3443	self._applied_styles.pop()
bf6427d2	3444
2b14cb56	3445	def data(self, data):
5b995f71	3446	self._out += data
2b14cb56	3447
2b14cb56	3448	def close(self):
5b995f71	3449	return self._out.strip()
2b14cb56	3450
6a765f13	3451	# Fix UTF-8 encoded file wrongly marked as UTF-16. See https://github.com/yt-dlp/yt-dlp/issues/6543#issuecomment-1477169870
	3452	# This will not trigger false positives since only UTF-8 text is being replaced
	3453	dfxp_data = dfxp_data.replace(b'encoding=\'UTF-16\'', b'encoding=\'UTF-8\'')
	3454
2b14cb56	3455	def parse_node(node):
	3456	target = TTMLPElementParser()
	3457	parser = xml.etree.ElementTree.XMLParser(target=target)
	3458	parser.feed(xml.etree.ElementTree.tostring(node))
	3459	return parser.close()
bf6427d2	3460
5b995f71 RA	3461	for k, v in LEGACY_NAMESPACES:
	3462	for ns in v:
	3463	dfxp_data = dfxp_data.replace(ns, k)
	3464
3869028f	3465	dfxp = compat_etree_fromstring(dfxp_data)
bf6427d2	3466	out = []
5b995f71	3467	paras = dfxp.findall(_x('.//ttml:p')) or dfxp.findall('.//p')
1b0427e6 YCH	3468
	3469	if not paras:
	3470	raise ValueError('Invalid dfxp/TTML subtitle')
bf6427d2	3471
5b995f71 RA	3472	repeat = False
	3473	while True:
	3474	for style in dfxp.findall(_x('.//ttml:style')):
261f4730 RA	3475	style_id = style.get('id') or style.get(_x('xml:id'))
	3476	if not style_id:
	3477	continue
5b995f71 RA	3478	parent_style_id = style.get('style')
	3479	if parent_style_id:
	3480	if parent_style_id not in styles:
	3481	repeat = True
	3482	continue
	3483	styles[style_id] = styles[parent_style_id].copy()
	3484	for prop in SUPPORTED_STYLING:
	3485	prop_val = style.get(_x('tts:' + prop))
	3486	if prop_val:
	3487	styles.setdefault(style_id, {})[prop] = prop_val
	3488	if repeat:
	3489	repeat = False
	3490	else:
	3491	break
	3492
	3493	for p in ('body', 'div'):
	3494	ele = xpath_element(dfxp, [_x('.//ttml:' + p), './/' + p])
	3495	if ele is None:
	3496	continue
	3497	style = styles.get(ele.get('style'))
	3498	if not style:
	3499	continue
	3500	default_style.update(style)
	3501
bf6427d2	3502	for para, index in zip(paras, itertools.count(1)):
d631d5f9	3503	begin_time = parse_dfxp_time_expr(para.attrib.get('begin'))
7dff0363	3504	end_time = parse_dfxp_time_expr(para.attrib.get('end'))
d631d5f9 YCH	3505	dur = parse_dfxp_time_expr(para.attrib.get('dur'))
	3506	if begin_time is None:
	3507	continue
7dff0363	3508	if not end_time:
d631d5f9 YCH	3509	if not dur:
	3510	continue
	3511	end_time = begin_time + dur
bf6427d2 YCH	3512	out.append('%d\n%s --> %s\n%s\n\n' % (
bf6427d2 YCH	3513	index,
c1c924ab YCH	3514	srt_subtitles_timecode(begin_time),
c1c924ab YCH	3515	srt_subtitles_timecode(end_time),
bf6427d2 YCH	3516	parse_node(para)))
	3517
	3518	return ''.join(out)
	3519
	3520
c487cf00	3521	def cli_option(params, command_option, param, separator=None):
66e289ba	3522	param = params.get(param)
c487cf00	3523	return ([] if param is None
	3524	else [command_option, str(param)] if separator is None
	3525	else [f'{command_option}{separator}{param}'])
66e289ba S	3526
	3527
	3528	def cli_bool_option(params, command_option, param, true_value='true', false_value='false', separator=None):
	3529	param = params.get(param)
c487cf00	3530	assert param in (True, False, None)
c487cf00	3531	return cli_option({True: true_value, False: false_value}, command_option, param, separator)
66e289ba S	3532
	3533
	3534	def cli_valueless_option(params, command_option, param, expected_value=True):
c487cf00	3535	return [command_option] if params.get(param) == expected_value else []
66e289ba S	3536
66e289ba S	3537
e92caff5	3538	def cli_configuration_args(argdict, keys, default=[], use_compat=True):
eab9b2bc	3539	if isinstance(argdict, (list, tuple)): # for backward compatibility
e92caff5	3540	if use_compat:
5b1ecbb3	3541	return argdict
	3542	else:
	3543	argdict = None
eab9b2bc	3544	if argdict is None:
5b1ecbb3	3545	return default
eab9b2bc	3546	assert isinstance(argdict, dict)
eab9b2bc	3547
e92caff5	3548	assert isinstance(keys, (list, tuple))
e92caff5	3549	for key_list in keys:
e92caff5	3550	arg_list = list(filter(
e92caff5	3551	lambda x: x is not None,
6606817a	3552	[argdict.get(key.lower()) for key in variadic(key_list)]))
e92caff5	3553	if arg_list:
	3554	return [arg for args in arg_list for arg in args]
	3555	return default
66e289ba	3556
6251555f	3557
330690a2	3558	def _configuration_args(main_key, argdict, exe, keys=None, default=[], use_compat=True):
	3559	main_key, exe = main_key.lower(), exe.lower()
	3560	root_key = exe if main_key == exe else f'{main_key}+{exe}'
	3561	keys = [f'{root_key}{k}' for k in (keys or [''])]
	3562	if root_key in keys:
	3563	if main_key != exe:
	3564	keys.append((main_key, exe))
	3565	keys.append('default')
	3566	else:
	3567	use_compat = False
	3568	return cli_configuration_args(argdict, keys, default, use_compat)
	3569
66e289ba	3570
86e5f3ed	3571	class ISO639Utils:
39672624 YCH	3572	# See http://www.loc.gov/standards/iso639-2/ISO-639-2_utf-8.txt
	3573	_lang_map = {
	3574	'aa': 'aar',
	3575	'ab': 'abk',
	3576	'ae': 'ave',
	3577	'af': 'afr',
	3578	'ak': 'aka',
	3579	'am': 'amh',
	3580	'an': 'arg',
	3581	'ar': 'ara',
	3582	'as': 'asm',
	3583	'av': 'ava',
	3584	'ay': 'aym',
	3585	'az': 'aze',
	3586	'ba': 'bak',
	3587	'be': 'bel',
	3588	'bg': 'bul',
	3589	'bh': 'bih',
	3590	'bi': 'bis',
	3591	'bm': 'bam',
	3592	'bn': 'ben',
	3593	'bo': 'bod',
	3594	'br': 'bre',
	3595	'bs': 'bos',
	3596	'ca': 'cat',
	3597	'ce': 'che',
	3598	'ch': 'cha',
	3599	'co': 'cos',
	3600	'cr': 'cre',
	3601	'cs': 'ces',
	3602	'cu': 'chu',
	3603	'cv': 'chv',
	3604	'cy': 'cym',
	3605	'da': 'dan',
	3606	'de': 'deu',
	3607	'dv': 'div',
	3608	'dz': 'dzo',
	3609	'ee': 'ewe',
	3610	'el': 'ell',
	3611	'en': 'eng',
	3612	'eo': 'epo',
	3613	'es': 'spa',
	3614	'et': 'est',
	3615	'eu': 'eus',
	3616	'fa': 'fas',
	3617	'ff': 'ful',
	3618	'fi': 'fin',
	3619	'fj': 'fij',
	3620	'fo': 'fao',
	3621	'fr': 'fra',
	3622	'fy': 'fry',
	3623	'ga': 'gle',
	3624	'gd': 'gla',
	3625	'gl': 'glg',
	3626	'gn': 'grn',
	3627	'gu': 'guj',
	3628	'gv': 'glv',
	3629	'ha': 'hau',
	3630	'he': 'heb',
b7acc835	3631	'iw': 'heb', # Replaced by he in 1989 revision
39672624 YCH	3632	'hi': 'hin',
	3633	'ho': 'hmo',
	3634	'hr': 'hrv',
	3635	'ht': 'hat',
	3636	'hu': 'hun',
	3637	'hy': 'hye',
	3638	'hz': 'her',
	3639	'ia': 'ina',
	3640	'id': 'ind',
b7acc835	3641	'in': 'ind', # Replaced by id in 1989 revision
39672624 YCH	3642	'ie': 'ile',
	3643	'ig': 'ibo',
	3644	'ii': 'iii',
	3645	'ik': 'ipk',
	3646	'io': 'ido',
	3647	'is': 'isl',
	3648	'it': 'ita',
	3649	'iu': 'iku',
	3650	'ja': 'jpn',
	3651	'jv': 'jav',
	3652	'ka': 'kat',
	3653	'kg': 'kon',
	3654	'ki': 'kik',
	3655	'kj': 'kua',
	3656	'kk': 'kaz',
	3657	'kl': 'kal',
	3658	'km': 'khm',
	3659	'kn': 'kan',
	3660	'ko': 'kor',
	3661	'kr': 'kau',
	3662	'ks': 'kas',
	3663	'ku': 'kur',
	3664	'kv': 'kom',
	3665	'kw': 'cor',
	3666	'ky': 'kir',
	3667	'la': 'lat',
	3668	'lb': 'ltz',
	3669	'lg': 'lug',
	3670	'li': 'lim',
	3671	'ln': 'lin',
	3672	'lo': 'lao',
	3673	'lt': 'lit',
	3674	'lu': 'lub',
	3675	'lv': 'lav',
	3676	'mg': 'mlg',
	3677	'mh': 'mah',
	3678	'mi': 'mri',
	3679	'mk': 'mkd',
	3680	'ml': 'mal',
	3681	'mn': 'mon',
	3682	'mr': 'mar',
	3683	'ms': 'msa',
	3684	'mt': 'mlt',
	3685	'my': 'mya',
	3686	'na': 'nau',
	3687	'nb': 'nob',
	3688	'nd': 'nde',
	3689	'ne': 'nep',
	3690	'ng': 'ndo',
	3691	'nl': 'nld',
	3692	'nn': 'nno',
	3693	'no': 'nor',
	3694	'nr': 'nbl',
	3695	'nv': 'nav',
	3696	'ny': 'nya',
	3697	'oc': 'oci',
	3698	'oj': 'oji',
	3699	'om': 'orm',
	3700	'or': 'ori',
	3701	'os': 'oss',
	3702	'pa': 'pan',
7bcd4813	3703	'pe': 'per',
39672624 YCH	3704	'pi': 'pli',
	3705	'pl': 'pol',
	3706	'ps': 'pus',
	3707	'pt': 'por',
	3708	'qu': 'que',
	3709	'rm': 'roh',
	3710	'rn': 'run',
	3711	'ro': 'ron',
	3712	'ru': 'rus',
	3713	'rw': 'kin',
	3714	'sa': 'san',
	3715	'sc': 'srd',
	3716	'sd': 'snd',
	3717	'se': 'sme',
	3718	'sg': 'sag',
	3719	'si': 'sin',
	3720	'sk': 'slk',
	3721	'sl': 'slv',
	3722	'sm': 'smo',
	3723	'sn': 'sna',
	3724	'so': 'som',
	3725	'sq': 'sqi',
	3726	'sr': 'srp',
	3727	'ss': 'ssw',
	3728	'st': 'sot',
	3729	'su': 'sun',
	3730	'sv': 'swe',
	3731	'sw': 'swa',
	3732	'ta': 'tam',
	3733	'te': 'tel',
	3734	'tg': 'tgk',
	3735	'th': 'tha',
	3736	'ti': 'tir',
	3737	'tk': 'tuk',
	3738	'tl': 'tgl',
	3739	'tn': 'tsn',
	3740	'to': 'ton',
	3741	'tr': 'tur',
	3742	'ts': 'tso',
	3743	'tt': 'tat',
	3744	'tw': 'twi',
	3745	'ty': 'tah',
	3746	'ug': 'uig',
	3747	'uk': 'ukr',
	3748	'ur': 'urd',
	3749	'uz': 'uzb',
	3750	've': 'ven',
	3751	'vi': 'vie',
	3752	'vo': 'vol',
	3753	'wa': 'wln',
	3754	'wo': 'wol',
	3755	'xh': 'xho',
	3756	'yi': 'yid',
e9a50fba	3757	'ji': 'yid', # Replaced by yi in 1989 revision
39672624 YCH	3758	'yo': 'yor',
	3759	'za': 'zha',
	3760	'zh': 'zho',
	3761	'zu': 'zul',
	3762	}
	3763
	3764	@classmethod
	3765	def short2long(cls, code):
	3766	"""Convert language code from ISO 639-1 to ISO 639-2/T"""
	3767	return cls._lang_map.get(code[:2])
	3768
	3769	@classmethod
	3770	def long2short(cls, code):
	3771	"""Convert language code from ISO 639-2/T to ISO 639-1"""
	3772	for short_name, long_name in cls._lang_map.items():
	3773	if long_name == code:
	3774	return short_name
	3775
	3776
86e5f3ed	3777	class ISO3166Utils:
4eb10f66 YCH	3778	# From http://data.okfn.org/data/core/country-list
	3779	_country_map = {
	3780	'AF': 'Afghanistan',
	3781	'AX': 'Åland Islands',
	3782	'AL': 'Albania',
	3783	'DZ': 'Algeria',
	3784	'AS': 'American Samoa',
	3785	'AD': 'Andorra',
	3786	'AO': 'Angola',
	3787	'AI': 'Anguilla',
	3788	'AQ': 'Antarctica',
	3789	'AG': 'Antigua and Barbuda',
	3790	'AR': 'Argentina',
	3791	'AM': 'Armenia',
	3792	'AW': 'Aruba',
	3793	'AU': 'Australia',
	3794	'AT': 'Austria',
	3795	'AZ': 'Azerbaijan',
	3796	'BS': 'Bahamas',
	3797	'BH': 'Bahrain',
	3798	'BD': 'Bangladesh',
	3799	'BB': 'Barbados',
	3800	'BY': 'Belarus',
	3801	'BE': 'Belgium',
	3802	'BZ': 'Belize',
	3803	'BJ': 'Benin',
	3804	'BM': 'Bermuda',
	3805	'BT': 'Bhutan',
	3806	'BO': 'Bolivia, Plurinational State of',
	3807	'BQ': 'Bonaire, Sint Eustatius and Saba',
	3808	'BA': 'Bosnia and Herzegovina',
	3809	'BW': 'Botswana',
	3810	'BV': 'Bouvet Island',
	3811	'BR': 'Brazil',
	3812	'IO': 'British Indian Ocean Territory',
	3813	'BN': 'Brunei Darussalam',
	3814	'BG': 'Bulgaria',
	3815	'BF': 'Burkina Faso',
	3816	'BI': 'Burundi',
	3817	'KH': 'Cambodia',
	3818	'CM': 'Cameroon',
	3819	'CA': 'Canada',
	3820	'CV': 'Cape Verde',
	3821	'KY': 'Cayman Islands',
	3822	'CF': 'Central African Republic',
	3823	'TD': 'Chad',
	3824	'CL': 'Chile',
	3825	'CN': 'China',
	3826	'CX': 'Christmas Island',
	3827	'CC': 'Cocos (Keeling) Islands',
	3828	'CO': 'Colombia',
	3829	'KM': 'Comoros',
	3830	'CG': 'Congo',
	3831	'CD': 'Congo, the Democratic Republic of the',
	3832	'CK': 'Cook Islands',
	3833	'CR': 'Costa Rica',
	3834	'CI': 'Côte d\'Ivoire',
	3835	'HR': 'Croatia',
	3836	'CU': 'Cuba',
	3837	'CW': 'Curaçao',
	3838	'CY': 'Cyprus',
	3839	'CZ': 'Czech Republic',
	3840	'DK': 'Denmark',
	3841	'DJ': 'Djibouti',
3842	'DM': 'Dominica',
3843	'DO': 'Dominican Republic',
3844	'EC': 'Ecuador',
3845	'EG': 'Egypt',
3846	'SV': 'El Salvador',
3847	'GQ': 'Equatorial Guinea',
3848	'ER': 'Eritrea',
3849	'EE': 'Estonia',
3850	'ET': 'Ethiopia',
3851	'FK': 'Falkland Islands (Malvinas)',
3852	'FO': 'Faroe Islands',
3853	'FJ': 'Fiji',
3854	'FI': 'Finland',
3855	'FR': 'France',
3856	'GF': 'French Guiana',
3857	'PF': 'French Polynesia',
3858	'TF': 'French Southern Territories',
3859	'GA': 'Gabon',
3860	'GM': 'Gambia',
3861	'GE': 'Georgia',
3862	'DE': 'Germany',
3863	'GH': 'Ghana',
3864	'GI': 'Gibraltar',
3865	'GR': 'Greece',
3866	'GL': 'Greenland',
3867	'GD': 'Grenada',
3868	'GP': 'Guadeloupe',
3869	'GU': 'Guam',
3870	'GT': 'Guatemala',
3871	'GG': 'Guernsey',
3872	'GN': 'Guinea',
3873	'GW': 'Guinea-Bissau',
3874	'GY': 'Guyana',
3875	'HT': 'Haiti',
3876	'HM': 'Heard Island and McDonald Islands',
3877	'VA': 'Holy See (Vatican City State)',
3878	'HN': 'Honduras',
3879	'HK': 'Hong Kong',
3880	'HU': 'Hungary',
3881	'IS': 'Iceland',
3882	'IN': 'India',
3883	'ID': 'Indonesia',
3884	'IR': 'Iran, Islamic Republic of',
3885	'IQ': 'Iraq',
3886	'IE': 'Ireland',
3887	'IM': 'Isle of Man',
3888	'IL': 'Israel',
3889	'IT': 'Italy',
3890	'JM': 'Jamaica',
3891	'JP': 'Japan',
3892	'JE': 'Jersey',
3893	'JO': 'Jordan',
3894	'KZ': 'Kazakhstan',
3895	'KE': 'Kenya',
3896	'KI': 'Kiribati',
3897	'KP': 'Korea, Democratic People\'s Republic of',
3898	'KR': 'Korea, Republic of',
3899	'KW': 'Kuwait',
3900	'KG': 'Kyrgyzstan',
3901	'LA': 'Lao People\'s Democratic Republic',
3902	'LV': 'Latvia',
3903	'LB': 'Lebanon',
3904	'LS': 'Lesotho',
3905	'LR': 'Liberia',
3906	'LY': 'Libya',
3907	'LI': 'Liechtenstein',
3908	'LT': 'Lithuania',
3909	'LU': 'Luxembourg',
3910	'MO': 'Macao',
3911	'MK': 'Macedonia, the Former Yugoslav Republic of',
3912	'MG': 'Madagascar',
3913	'MW': 'Malawi',
3914	'MY': 'Malaysia',
3915	'MV': 'Maldives',
3916	'ML': 'Mali',
3917	'MT': 'Malta',
3918	'MH': 'Marshall Islands',
3919	'MQ': 'Martinique',
3920	'MR': 'Mauritania',
3921	'MU': 'Mauritius',
3922	'YT': 'Mayotte',
3923	'MX': 'Mexico',
3924	'FM': 'Micronesia, Federated States of',
3925	'MD': 'Moldova, Republic of',
3926	'MC': 'Monaco',
3927	'MN': 'Mongolia',
3928	'ME': 'Montenegro',
3929	'MS': 'Montserrat',
3930	'MA': 'Morocco',
3931	'MZ': 'Mozambique',
3932	'MM': 'Myanmar',
3933	'NA': 'Namibia',
3934	'NR': 'Nauru',
3935	'NP': 'Nepal',
3936	'NL': 'Netherlands',
3937	'NC': 'New Caledonia',
3938	'NZ': 'New Zealand',
3939	'NI': 'Nicaragua',
3940	'NE': 'Niger',
3941	'NG': 'Nigeria',
3942	'NU': 'Niue',
3943	'NF': 'Norfolk Island',
3944	'MP': 'Northern Mariana Islands',
3945	'NO': 'Norway',
3946	'OM': 'Oman',
3947	'PK': 'Pakistan',
3948	'PW': 'Palau',
3949	'PS': 'Palestine, State of',
3950	'PA': 'Panama',
3951	'PG': 'Papua New Guinea',
3952	'PY': 'Paraguay',
3953	'PE': 'Peru',
3954	'PH': 'Philippines',
3955	'PN': 'Pitcairn',
3956	'PL': 'Poland',
3957	'PT': 'Portugal',
3958	'PR': 'Puerto Rico',
3959	'QA': 'Qatar',
3960	'RE': 'Réunion',
3961	'RO': 'Romania',
3962	'RU': 'Russian Federation',
3963	'RW': 'Rwanda',
3964	'BL': 'Saint Barthélemy',
3965	'SH': 'Saint Helena, Ascension and Tristan da Cunha',
3966	'KN': 'Saint Kitts and Nevis',
3967	'LC': 'Saint Lucia',
3968	'MF': 'Saint Martin (French part)',
3969	'PM': 'Saint Pierre and Miquelon',
3970	'VC': 'Saint Vincent and the Grenadines',
3971	'WS': 'Samoa',
3972	'SM': 'San Marino',
3973	'ST': 'Sao Tome and Principe',
3974	'SA': 'Saudi Arabia',
3975	'SN': 'Senegal',
3976	'RS': 'Serbia',
3977	'SC': 'Seychelles',
3978	'SL': 'Sierra Leone',
3979	'SG': 'Singapore',
3980	'SX': 'Sint Maarten (Dutch part)',
3981	'SK': 'Slovakia',
3982	'SI': 'Slovenia',
3983	'SB': 'Solomon Islands',
3984	'SO': 'Somalia',
3985	'ZA': 'South Africa',
3986	'GS': 'South Georgia and the South Sandwich Islands',
3987	'SS': 'South Sudan',
3988	'ES': 'Spain',
3989	'LK': 'Sri Lanka',
3990	'SD': 'Sudan',
3991	'SR': 'Suriname',
3992	'SJ': 'Svalbard and Jan Mayen',
3993	'SZ': 'Swaziland',
3994	'SE': 'Sweden',
3995	'CH': 'Switzerland',
3996	'SY': 'Syrian Arab Republic',
3997	'TW': 'Taiwan, Province of China',
3998	'TJ': 'Tajikistan',
3999	'TZ': 'Tanzania, United Republic of',
4000	'TH': 'Thailand',
4001	'TL': 'Timor-Leste',
4002	'TG': 'Togo',
4003	'TK': 'Tokelau',
4004	'TO': 'Tonga',
4005	'TT': 'Trinidad and Tobago',
4006	'TN': 'Tunisia',
4007	'TR': 'Turkey',
4008	'TM': 'Turkmenistan',
4009	'TC': 'Turks and Caicos Islands',
4010	'TV': 'Tuvalu',
4011	'UG': 'Uganda',
4012	'UA': 'Ukraine',
4013	'AE': 'United Arab Emirates',
4014	'GB': 'United Kingdom',
4015	'US': 'United States',
4016	'UM': 'United States Minor Outlying Islands',
4017	'UY': 'Uruguay',
4018	'UZ': 'Uzbekistan',
4019	'VU': 'Vanuatu',
4020	'VE': 'Venezuela, Bolivarian Republic of',
4021	'VN': 'Viet Nam',
4022	'VG': 'Virgin Islands, British',
4023	'VI': 'Virgin Islands, U.S.',
4024	'WF': 'Wallis and Futuna',
4025	'EH': 'Western Sahara',
4026	'YE': 'Yemen',
4027	'ZM': 'Zambia',
4028	'ZW': 'Zimbabwe',
2f97cc61	4029	# Not ISO 3166 codes, but used for IP blocks
	4030	'AP': 'Asia/Pacific Region',
	4031	'EU': 'Europe',
4eb10f66 YCH	4032	}
	4033
	4034	@classmethod
	4035	def short2full(cls, code):
	4036	"""Convert an ISO 3166-2 country code to the corresponding full name"""
	4037	return cls._country_map.get(code.upper())
	4038
	4039
86e5f3ed	4040	class GeoUtils:
773f291d S	4041	# Major IPv4 address blocks per country
773f291d S	4042	_country_ip_map = {
53896ca5	4043	'AD': '46.172.224.0/19',
773f291d S	4044	'AE': '94.200.0.0/13',
	4045	'AF': '149.54.0.0/17',
	4046	'AG': '209.59.64.0/18',
	4047	'AI': '204.14.248.0/21',
	4048	'AL': '46.99.0.0/16',
	4049	'AM': '46.70.0.0/15',
	4050	'AO': '105.168.0.0/13',
53896ca5 S	4051	'AP': '182.50.184.0/21',
53896ca5 S	4052	'AQ': '23.154.160.0/24',
773f291d S	4053	'AR': '181.0.0.0/12',
773f291d S	4054	'AS': '202.70.112.0/20',
53896ca5	4055	'AT': '77.116.0.0/14',
773f291d S	4056	'AU': '1.128.0.0/11',
773f291d S	4057	'AW': '181.41.0.0/18',
53896ca5 S	4058	'AX': '185.217.4.0/22',
53896ca5 S	4059	'AZ': '5.197.0.0/16',
773f291d S	4060	'BA': '31.176.128.0/17',
	4061	'BB': '65.48.128.0/17',
	4062	'BD': '114.130.0.0/16',
	4063	'BE': '57.0.0.0/8',
53896ca5	4064	'BF': '102.178.0.0/15',
773f291d S	4065	'BG': '95.42.0.0/15',
	4066	'BH': '37.131.0.0/17',
	4067	'BI': '154.117.192.0/18',
	4068	'BJ': '137.255.0.0/16',
53896ca5	4069	'BL': '185.212.72.0/23',
773f291d S	4070	'BM': '196.12.64.0/18',
	4071	'BN': '156.31.0.0/16',
	4072	'BO': '161.56.0.0/16',
	4073	'BQ': '161.0.80.0/20',
53896ca5	4074	'BR': '191.128.0.0/12',
773f291d S	4075	'BS': '24.51.64.0/18',
	4076	'BT': '119.2.96.0/19',
	4077	'BW': '168.167.0.0/16',
	4078	'BY': '178.120.0.0/13',
	4079	'BZ': '179.42.192.0/18',
	4080	'CA': '99.224.0.0/11',
	4081	'CD': '41.243.0.0/16',
53896ca5 S	4082	'CF': '197.242.176.0/21',
53896ca5 S	4083	'CG': '160.113.0.0/16',
773f291d	4084	'CH': '85.0.0.0/13',
53896ca5	4085	'CI': '102.136.0.0/14',
773f291d S	4086	'CK': '202.65.32.0/19',
773f291d S	4087	'CL': '152.172.0.0/14',
53896ca5	4088	'CM': '102.244.0.0/14',
773f291d S	4089	'CN': '36.128.0.0/10',
	4090	'CO': '181.240.0.0/12',
	4091	'CR': '201.192.0.0/12',
	4092	'CU': '152.206.0.0/15',
	4093	'CV': '165.90.96.0/19',
	4094	'CW': '190.88.128.0/17',
53896ca5	4095	'CY': '31.153.0.0/16',
773f291d S	4096	'CZ': '88.100.0.0/14',
	4097	'DE': '53.0.0.0/8',
	4098	'DJ': '197.241.0.0/17',
	4099	'DK': '87.48.0.0/12',
	4100	'DM': '192.243.48.0/20',
	4101	'DO': '152.166.0.0/15',
	4102	'DZ': '41.96.0.0/12',
	4103	'EC': '186.68.0.0/15',
	4104	'EE': '90.190.0.0/15',
	4105	'EG': '156.160.0.0/11',
	4106	'ER': '196.200.96.0/20',
	4107	'ES': '88.0.0.0/11',
	4108	'ET': '196.188.0.0/14',
	4109	'EU': '2.16.0.0/13',
	4110	'FI': '91.152.0.0/13',
	4111	'FJ': '144.120.0.0/16',
53896ca5	4112	'FK': '80.73.208.0/21',
773f291d S	4113	'FM': '119.252.112.0/20',
	4114	'FO': '88.85.32.0/19',
	4115	'FR': '90.0.0.0/9',
	4116	'GA': '41.158.0.0/15',
	4117	'GB': '25.0.0.0/8',
	4118	'GD': '74.122.88.0/21',
	4119	'GE': '31.146.0.0/16',
	4120	'GF': '161.22.64.0/18',
	4121	'GG': '62.68.160.0/19',
53896ca5 S	4122	'GH': '154.160.0.0/12',
53896ca5 S	4123	'GI': '95.164.0.0/16',
773f291d S	4124	'GL': '88.83.0.0/19',
	4125	'GM': '160.182.0.0/15',
	4126	'GN': '197.149.192.0/18',
	4127	'GP': '104.250.0.0/19',
	4128	'GQ': '105.235.224.0/20',
	4129	'GR': '94.64.0.0/13',
	4130	'GT': '168.234.0.0/16',
	4131	'GU': '168.123.0.0/16',
	4132	'GW': '197.214.80.0/20',
	4133	'GY': '181.41.64.0/18',
	4134	'HK': '113.252.0.0/14',
	4135	'HN': '181.210.0.0/16',
	4136	'HR': '93.136.0.0/13',
	4137	'HT': '148.102.128.0/17',
	4138	'HU': '84.0.0.0/14',
	4139	'ID': '39.192.0.0/10',
	4140	'IE': '87.32.0.0/12',
	4141	'IL': '79.176.0.0/13',
	4142	'IM': '5.62.80.0/20',
	4143	'IN': '117.192.0.0/10',
	4144	'IO': '203.83.48.0/21',
	4145	'IQ': '37.236.0.0/14',
	4146	'IR': '2.176.0.0/12',
	4147	'IS': '82.221.0.0/16',
	4148	'IT': '79.0.0.0/10',
	4149	'JE': '87.244.64.0/18',
	4150	'JM': '72.27.0.0/17',
	4151	'JO': '176.29.0.0/16',
53896ca5	4152	'JP': '133.0.0.0/8',
773f291d S	4153	'KE': '105.48.0.0/12',
	4154	'KG': '158.181.128.0/17',
	4155	'KH': '36.37.128.0/17',
	4156	'KI': '103.25.140.0/22',
	4157	'KM': '197.255.224.0/20',
53896ca5	4158	'KN': '198.167.192.0/19',
773f291d S	4159	'KP': '175.45.176.0/22',
	4160	'KR': '175.192.0.0/10',
	4161	'KW': '37.36.0.0/14',
	4162	'KY': '64.96.0.0/15',
	4163	'KZ': '2.72.0.0/13',
	4164	'LA': '115.84.64.0/18',
	4165	'LB': '178.135.0.0/16',
53896ca5	4166	'LC': '24.92.144.0/20',
773f291d S	4167	'LI': '82.117.0.0/19',
773f291d S	4168	'LK': '112.134.0.0/15',
53896ca5	4169	'LR': '102.183.0.0/16',
773f291d S	4170	'LS': '129.232.0.0/17',
	4171	'LT': '78.56.0.0/13',
	4172	'LU': '188.42.0.0/16',
	4173	'LV': '46.109.0.0/16',
	4174	'LY': '41.252.0.0/14',
	4175	'MA': '105.128.0.0/11',
	4176	'MC': '88.209.64.0/18',
	4177	'MD': '37.246.0.0/16',
	4178	'ME': '178.175.0.0/17',
	4179	'MF': '74.112.232.0/21',
	4180	'MG': '154.126.0.0/17',
	4181	'MH': '117.103.88.0/21',
	4182	'MK': '77.28.0.0/15',
	4183	'ML': '154.118.128.0/18',
	4184	'MM': '37.111.0.0/17',
	4185	'MN': '49.0.128.0/17',
	4186	'MO': '60.246.0.0/16',
	4187	'MP': '202.88.64.0/20',
	4188	'MQ': '109.203.224.0/19',
	4189	'MR': '41.188.64.0/18',
	4190	'MS': '208.90.112.0/22',
	4191	'MT': '46.11.0.0/16',
	4192	'MU': '105.16.0.0/12',
	4193	'MV': '27.114.128.0/18',
53896ca5	4194	'MW': '102.70.0.0/15',
773f291d S	4195	'MX': '187.192.0.0/11',
	4196	'MY': '175.136.0.0/13',
	4197	'MZ': '197.218.0.0/15',
	4198	'NA': '41.182.0.0/16',
	4199	'NC': '101.101.0.0/18',
	4200	'NE': '197.214.0.0/18',
	4201	'NF': '203.17.240.0/22',
	4202	'NG': '105.112.0.0/12',
	4203	'NI': '186.76.0.0/15',
	4204	'NL': '145.96.0.0/11',
	4205	'NO': '84.208.0.0/13',
	4206	'NP': '36.252.0.0/15',
	4207	'NR': '203.98.224.0/19',
	4208	'NU': '49.156.48.0/22',
	4209	'NZ': '49.224.0.0/14',
	4210	'OM': '5.36.0.0/15',
	4211	'PA': '186.72.0.0/15',
	4212	'PE': '186.160.0.0/14',
	4213	'PF': '123.50.64.0/18',
	4214	'PG': '124.240.192.0/19',
	4215	'PH': '49.144.0.0/13',
	4216	'PK': '39.32.0.0/11',
	4217	'PL': '83.0.0.0/11',
	4218	'PM': '70.36.0.0/20',
	4219	'PR': '66.50.0.0/16',
	4220	'PS': '188.161.0.0/16',
	4221	'PT': '85.240.0.0/13',
	4222	'PW': '202.124.224.0/20',
	4223	'PY': '181.120.0.0/14',
	4224	'QA': '37.210.0.0/15',
53896ca5	4225	'RE': '102.35.0.0/16',
773f291d	4226	'RO': '79.112.0.0/13',
53896ca5	4227	'RS': '93.86.0.0/15',
773f291d	4228	'RU': '5.136.0.0/13',
53896ca5	4229	'RW': '41.186.0.0/16',
773f291d S	4230	'SA': '188.48.0.0/13',
	4231	'SB': '202.1.160.0/19',
	4232	'SC': '154.192.0.0/11',
53896ca5	4233	'SD': '102.120.0.0/13',
773f291d	4234	'SE': '78.64.0.0/12',
53896ca5	4235	'SG': '8.128.0.0/10',
773f291d S	4236	'SI': '188.196.0.0/14',
773f291d S	4237	'SK': '78.98.0.0/15',
53896ca5	4238	'SL': '102.143.0.0/17',
773f291d S	4239	'SM': '89.186.32.0/19',
773f291d S	4240	'SN': '41.82.0.0/15',
53896ca5	4241	'SO': '154.115.192.0/18',
773f291d S	4242	'SR': '186.179.128.0/17',
	4243	'SS': '105.235.208.0/21',
	4244	'ST': '197.159.160.0/19',
	4245	'SV': '168.243.0.0/16',
	4246	'SX': '190.102.0.0/20',
	4247	'SY': '5.0.0.0/16',
	4248	'SZ': '41.84.224.0/19',
	4249	'TC': '65.255.48.0/20',
	4250	'TD': '154.68.128.0/19',
	4251	'TG': '196.168.0.0/14',
	4252	'TH': '171.96.0.0/13',
	4253	'TJ': '85.9.128.0/18',
	4254	'TK': '27.96.24.0/21',
	4255	'TL': '180.189.160.0/20',
	4256	'TM': '95.85.96.0/19',
	4257	'TN': '197.0.0.0/11',
	4258	'TO': '175.176.144.0/21',
	4259	'TR': '78.160.0.0/11',
	4260	'TT': '186.44.0.0/15',
	4261	'TV': '202.2.96.0/19',
	4262	'TW': '120.96.0.0/11',
	4263	'TZ': '156.156.0.0/14',
53896ca5 S	4264	'UA': '37.52.0.0/14',
	4265	'UG': '102.80.0.0/13',
	4266	'US': '6.0.0.0/8',
773f291d	4267	'UY': '167.56.0.0/13',
53896ca5	4268	'UZ': '84.54.64.0/18',
773f291d	4269	'VA': '212.77.0.0/19',
53896ca5	4270	'VC': '207.191.240.0/21',
773f291d	4271	'VE': '186.88.0.0/13',
53896ca5	4272	'VG': '66.81.192.0/20',
773f291d S	4273	'VI': '146.226.0.0/16',
	4274	'VN': '14.160.0.0/11',
	4275	'VU': '202.80.32.0/20',
	4276	'WF': '117.20.32.0/21',
	4277	'WS': '202.4.32.0/19',
	4278	'YE': '134.35.0.0/16',
	4279	'YT': '41.242.116.0/22',
	4280	'ZA': '41.0.0.0/11',
53896ca5 S	4281	'ZM': '102.144.0.0/13',
53896ca5 S	4282	'ZW': '102.177.192.0/18',
773f291d S	4283	}
	4284
	4285	@classmethod
5f95927a S	4286	def random_ipv4(cls, code_or_block):
	4287	if len(code_or_block) == 2:
	4288	block = cls._country_ip_map.get(code_or_block.upper())
	4289	if not block:
	4290	return None
	4291	else:
	4292	block = code_or_block
773f291d	4293	addr, preflen = block.split('/')
ac668111	4294	addr_min = struct.unpack('!L', socket.inet_aton(addr))[0]
773f291d	4295	addr_max = addr_min \| (0xffffffff >> int(preflen))
14f25df2	4296	return str(socket.inet_ntoa(
ac668111	4297	struct.pack('!L', random.randint(addr_min, addr_max))))
773f291d S	4298
773f291d S	4299
0a5445dd YCH	4300	# Both long_to_bytes and bytes_to_long are adapted from PyCrypto, which is
	4301	# released into Public Domain
	4302	# https://github.com/dlitz/pycrypto/blob/master/lib/Crypto/Util/number.py#L387
	4303
	4304	def long_to_bytes(n, blocksize=0):
	4305	"""long_to_bytes(n:long, blocksize:int) : string
	4306	Convert a long integer to a byte string.
	4307
	4308	If optional blocksize is given and greater than zero, pad the front of the
	4309	byte string with binary zeros so that the length is a multiple of
	4310	blocksize.
	4311	"""
	4312	# after much testing, this algorithm was deemed to be the fastest
	4313	s = b''
	4314	n = int(n)
	4315	while n > 0:
ac668111	4316	s = struct.pack('>I', n & 0xffffffff) + s
0a5445dd YCH	4317	n = n >> 32
	4318	# strip off leading zeros
	4319	for i in range(len(s)):
	4320	if s[i] != b'\000'[0]:
	4321	break
	4322	else:
	4323	# only happens when n == 0
	4324	s = b'\000'
	4325	i = 0
	4326	s = s[i:]
	4327	# add back some pad bytes. this could be done more efficiently w.r.t. the
	4328	# de-padding being done above, but sigh...
	4329	if blocksize > 0 and len(s) % blocksize:
	4330	s = (blocksize - len(s) % blocksize) * b'\000' + s
	4331	return s
	4332
	4333
	4334	def bytes_to_long(s):
	4335	"""bytes_to_long(string) : long
	4336	Convert a byte string to a long integer.
	4337
	4338	This is (essentially) the inverse of long_to_bytes().
	4339	"""
	4340	acc = 0
	4341	length = len(s)
	4342	if length % 4:
	4343	extra = (4 - length % 4)
	4344	s = b'\000' * extra + s
	4345	length = length + extra
	4346	for i in range(0, length, 4):
ac668111	4347	acc = (acc << 32) + struct.unpack('>I', s[i:i + 4])[0]
0a5445dd YCH	4348	return acc
	4349
	4350
5bc880b9	4351	def ohdave_rsa_encrypt(data, exponent, modulus):
add96eb9	4352	"""
5bc880b9 YCH	4353	Implement OHDave's RSA algorithm. See http://www.ohdave.com/rsa/
	4354
	4355	Input:
	4356	data: data to encrypt, bytes-like object
	4357	exponent, modulus: parameter e and N of RSA algorithm, both integer
	4358	Output: hex string of encrypted data
	4359
	4360	Limitation: supports one block encryption only
add96eb9	4361	"""
5bc880b9 YCH	4362
	4363	payload = int(binascii.hexlify(data[::-1]), 16)
	4364	encrypted = pow(payload, exponent, modulus)
add96eb9	4365	return f'{encrypted:x}'
81bdc8fd YCH	4366
81bdc8fd YCH	4367
f48409c7 YCH	4368	def pkcs1pad(data, length):
	4369	"""
	4370	Padding input data with PKCS#1 scheme
	4371
	4372	@param {int[]} data input data
	4373	@param {int} length target length
	4374	@returns {int[]} padded data
	4375	"""
	4376	if len(data) > length - 11:
	4377	raise ValueError('Input data too long for PKCS#1 padding')
	4378
	4379	pseudo_random = [random.randint(0, 254) for _ in range(length - len(data) - 3)]
add96eb9	4380	return [0, 2, pseudo_random, 0, data]
f48409c7 YCH	4381
f48409c7 YCH	4382
7b2c3f47	4383	def _base_n_table(n, table):
	4384	if not table and not n:
	4385	raise ValueError('Either table or n must be specified')
612f2be5	4386	table = (table or '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ')[:n]
612f2be5	4387
44f14eb4	4388	if n and n != len(table):
612f2be5	4389	raise ValueError(f'base {n} exceeds table length {len(table)}')
612f2be5	4390	return table
59f898b7	4391
5eb6bdce	4392
7b2c3f47	4393	def encode_base_n(num, n=None, table=None):
7b2c3f47	4394	"""Convert given int to a base-n string"""
612f2be5	4395	table = _base_n_table(n, table)
7b2c3f47	4396	if not num:
5eb6bdce YCH	4397	return table[0]
5eb6bdce YCH	4398
7b2c3f47	4399	result, base = '', len(table)
81bdc8fd	4400	while num:
7b2c3f47	4401	result = table[num % base] + result
612f2be5	4402	num = num // base
7b2c3f47	4403	return result
	4404
	4405
	4406	def decode_base_n(string, n=None, table=None):
	4407	"""Convert given base-n string to int"""
	4408	table = {char: index for index, char in enumerate(_base_n_table(n, table))}
	4409	result, base = 0, len(table)
	4410	for char in string:
	4411	result = result * base + table[char]
	4412	return result
	4413
	4414
f52354a8	4415	def decode_packed_codes(code):
06b3fe29	4416	mobj = re.search(PACKED_CODES_RE, code)
a0566bbf	4417	obfuscated_code, base, count, symbols = mobj.groups()
f52354a8 YCH	4418	base = int(base)
	4419	count = int(count)
	4420	symbols = symbols.split('\|')
	4421	symbol_table = {}
	4422
	4423	while count:
	4424	count -= 1
5eb6bdce	4425	base_n_count = encode_base_n(count, base)
f52354a8 YCH	4426	symbol_table[base_n_count] = symbols[count] or base_n_count
	4427
	4428	return re.sub(
	4429	r'\b(\w+)\b', lambda mobj: symbol_table[mobj.group(0)],
a0566bbf	4430	obfuscated_code)
e154c651	4431
e154c651	4432
1ced2221 S	4433	def caesar(s, alphabet, shift):
	4434	if shift == 0:
	4435	return s
	4436	l = len(alphabet)
	4437	return ''.join(
	4438	alphabet[(alphabet.index(c) + shift) % l] if c in alphabet else c
	4439	for c in s)
	4440
	4441
	4442	def rot47(s):
	4443	return caesar(s, r'''!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{\|}~''', 47)
	4444
	4445
e154c651	4446	def parse_m3u8_attributes(attrib):
	4447	info = {}
	4448	for (key, val) in re.findall(r'(?P<key>[A-Z0-9-]+)=(?P<val>"[^"]+"\|[^",]+)(?:,\|$)', attrib):
	4449	if val.startswith('"'):
	4450	val = val[1:-1]
	4451	info[key] = val
	4452	return info
1143535d YCH	4453
	4454
	4455	def urshift(val, n):
	4456	return val >> n if val >= 0 else (val + 0x100000000) >> n
d3f8e038 YCH	4457
d3f8e038 YCH	4458
efa97bdc	4459	def write_xattr(path, key, value):
6f7563be	4460	# Windows: Write xattrs to NTFS Alternate Data Streams:
	4461	# http://en.wikipedia.org/wiki/NTFS#Alternate_data_streams_.28ADS.29
	4462	if compat_os_name == 'nt':
	4463	assert ':' not in key
	4464	assert os.path.exists(path)
efa97bdc YCH	4465
efa97bdc YCH	4466	try:
6f7563be	4467	with open(f'{path}:{key}', 'wb') as f:
6f7563be	4468	f.write(value)
86e5f3ed	4469	except OSError as e:
efa97bdc	4470	raise XAttrMetadataError(e.errno, e.strerror)
6f7563be	4471	return
efa97bdc	4472
84e26038	4473	# UNIX Method 1. Use os.setxattr/xattrs/pyxattrs modules
efa97bdc	4474
6f7563be	4475	setxattr = None
84e26038	4476	if callable(getattr(os, 'setxattr', None)):
	4477	setxattr = os.setxattr
	4478	elif getattr(xattr, '_yt_dlp__identifier', None) == 'pyxattr':
6f7563be	4479	# Unicode arguments are not supported in pyxattr until version 0.5.0
	4480	# See https://github.com/ytdl-org/youtube-dl/issues/5498
	4481	if version_tuple(xattr.__version__) >= (0, 5, 0):
	4482	setxattr = xattr.set
	4483	elif xattr:
	4484	setxattr = xattr.setxattr
efa97bdc	4485
6f7563be	4486	if setxattr:
	4487	try:
	4488	setxattr(path, key, value)
	4489	except OSError as e:
	4490	raise XAttrMetadataError(e.errno, e.strerror)
	4491	return
efa97bdc	4492
6f7563be	4493	# UNIX Method 2. Use setfattr/xattr executables
	4494	exe = ('setfattr' if check_executable('setfattr', ['--version'])
	4495	else 'xattr' if check_executable('xattr', ['-h']) else None)
	4496	if not exe:
	4497	raise XAttrUnavailableError(
47ab66db	4498	'Couldn\'t find a tool to set the xattrs. Install either the "xattr" or "pyxattr" Python modules or the '
6f7563be	4499	+ ('"xattr" binary' if sys.platform != 'linux' else 'GNU "attr" package (which contains the "setfattr" tool)'))
efa97bdc	4500
0f06bcd7	4501	value = value.decode()
6f7563be	4502	try:
f0c9fb96	4503	_, stderr, returncode = Popen.run(
6f7563be	4504	[exe, '-w', key, value, path] if exe == 'xattr' else [exe, '-n', key, '-v', value, path],
e121e3ce	4505	text=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE, stdin=subprocess.PIPE)
6f7563be	4506	except OSError as e:
6f7563be	4507	raise XAttrMetadataError(e.errno, e.strerror)
f0c9fb96	4508	if returncode:
f0c9fb96	4509	raise XAttrMetadataError(returncode, stderr)
0c265486 YCH	4510
	4511
	4512	def random_birthday(year_field, month_field, day_field):
c305a25c	4513	start_date = dt.date(1950, 1, 1)
c305a25c	4514	end_date = dt.date(1995, 12, 31)
aa374bc7	4515	offset = random.randint(0, (end_date - start_date).days)
c305a25c	4516	random_date = start_date + dt.timedelta(offset)
0c265486	4517	return {
aa374bc7 AS	4518	year_field: str(random_date.year),
	4519	month_field: str(random_date.month),
	4520	day_field: str(random_date.day),
0c265486	4521	}
732044af	4522
c76eb41b	4523
8c53322c L	4524	def find_available_port(interface=''):
	4525	try:
	4526	with socket.socket() as sock:
	4527	sock.bind((interface, 0))
	4528	return sock.getsockname()[1]
	4529	except OSError:
	4530	return None
	4531
	4532
732044af	4533	# Templates for internet shortcut files, which are plain text files.
e5a998f3	4534	DOT_URL_LINK_TEMPLATE = '''\
732044af	4535	[InternetShortcut]
732044af	4536	URL=%(url)s
e5a998f3	4537	'''
732044af	4538
e5a998f3	4539	DOT_WEBLOC_LINK_TEMPLATE = '''\
732044af	4540	<?xml version="1.0" encoding="UTF-8"?>
	4541	<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
	4542	<plist version="1.0">
	4543	<dict>
	4544	\t<key>URL</key>
	4545	\t<string>%(url)s</string>
	4546	</dict>
	4547	</plist>
e5a998f3	4548	'''
732044af	4549
e5a998f3	4550	DOT_DESKTOP_LINK_TEMPLATE = '''\
732044af	4551	[Desktop Entry]
	4552	Encoding=UTF-8
	4553	Name=%(filename)s
	4554	Type=Link
	4555	URL=%(url)s
	4556	Icon=text-html
e5a998f3	4557	'''
732044af	4558
08438d2c	4559	LINK_TEMPLATES = {
	4560	'url': DOT_URL_LINK_TEMPLATE,
	4561	'desktop': DOT_DESKTOP_LINK_TEMPLATE,
	4562	'webloc': DOT_WEBLOC_LINK_TEMPLATE,
	4563	}
	4564
732044af	4565
	4566	def iri_to_uri(iri):
	4567	"""
	4568	Converts an IRI (Internationalized Resource Identifier, allowing Unicode characters) to a URI (Uniform Resource Identifier, ASCII-only).
	4569
	4570	The function doesn't add an additional layer of escaping; e.g., it doesn't escape `%3C` as `%253C`. Instead, it percent-escapes characters with an underlying UTF-8 encoding besides those already escaped, leaving the URI intact.
	4571	"""
	4572
14f25df2	4573	iri_parts = urllib.parse.urlparse(iri)
732044af	4574
	4575	if '[' in iri_parts.netloc:
	4576	raise ValueError('IPv6 URIs are not, yet, supported.')
	4577	# Querying `.netloc`, when there's only one bracket, also raises a ValueError.
	4578
	4579	# The `safe` argument values, that the following code uses, contain the characters that should not be percent-encoded. Everything else but letters, digits and '_.-' will be percent-encoded with an underlying UTF-8 encoding. Everything already percent-encoded will be left as is.
	4580
	4581	net_location = ''
	4582	if iri_parts.username:
f9934b96	4583	net_location += urllib.parse.quote(iri_parts.username, safe=r"!$%&'()*+,~")
732044af	4584	if iri_parts.password is not None:
f9934b96	4585	net_location += ':' + urllib.parse.quote(iri_parts.password, safe=r"!$%&'()*+,~")
732044af	4586	net_location += '@'
732044af	4587
0f06bcd7	4588	net_location += iri_parts.hostname.encode('idna').decode() # Punycode for Unicode hostnames.
732044af	4589	# The 'idna' encoding produces ASCII text.
	4590	if iri_parts.port is not None and iri_parts.port != 80:
	4591	net_location += ':' + str(iri_parts.port)
	4592
f9934b96	4593	return urllib.parse.urlunparse(
732044af	4594	(iri_parts.scheme,
	4595	net_location,
	4596
f9934b96	4597	urllib.parse.quote_plus(iri_parts.path, safe=r"!$%&'()*+,/:;=@\|~"),
732044af	4598
732044af	4599	# Unsure about the `safe` argument, since this is a legacy way of handling parameters.
f9934b96	4600	urllib.parse.quote_plus(iri_parts.params, safe=r"!$%&'()*+,/:;=@\|~"),
732044af	4601
732044af	4602	# Not totally sure about the `safe` argument, since the source does not explicitly mention the query URI component.
f9934b96	4603	urllib.parse.quote_plus(iri_parts.query, safe=r"!$%&'()*+,/:;=?@{\|}~"),
732044af	4604
f9934b96	4605	urllib.parse.quote_plus(iri_parts.fragment, safe=r"!#$%&'()*+,/:;=?@{\|}~")))
732044af	4606
	4607	# Source for `safe` arguments: https://url.spec.whatwg.org/#percent-encoded-bytes.
	4608
	4609
	4610	def to_high_limit_path(path):
	4611	if sys.platform in ['win32', 'cygwin']:
	4612	# Work around MAX_PATH limitation on Windows. The maximum allowed length for the individual path segments may still be quite limited.
e5a998f3	4613	return '\\\\?\\' + os.path.abspath(path)
732044af	4614
732044af	4615	return path
76d321f6	4616
c76eb41b	4617
7b2c3f47	4618	def format_field(obj, field=None, template='%s', ignore=NO_DEFAULT, default='', func=IDENTITY):
69bec673	4619	val = traversal.traverse_obj(obj, *variadic(field))
6f2287cb	4620	if not val if ignore is NO_DEFAULT else val in variadic(ignore):
e0ddbd02	4621	return default
7b2c3f47	4622	return template % func(val)
00dd0cd5	4623
	4624
	4625	def clean_podcast_url(url):
91302ed3	4626	url = re.sub(r'''(?x)
00dd0cd5	4627	(?:
	4628	(?:
	4629	chtbl\.com/track\|
	4630	media\.blubrry\.com\| # https://create.blubrry.com/resources/podcast-media-download-statistics/getting-started/
2af4eeb7 MAF	4631	play\.podtrac\.com\|
	4632	chrt\.fm/track\|
	4633	mgln\.ai/e
	4634	)(?:/[^/.]+)?\|
00dd0cd5	4635	(?:dts\|www)\.podtrac\.com/(?:pts/)?redirect\.[0-9a-z]{3,4}\| # http://analytics.podtrac.com/how-to-measure
	4636	flex\.acast\.com\|
	4637	pd(?:
	4638	cn\.co\| # https://podcorn.com/analytics-prefix/
	4639	st\.fm # https://podsights.com/docs/
2af4eeb7 MAF	4640	)/e\|
	4641	[0-9]\.gum\.fm\|
	4642	pscrb\.fm/rss/p
00dd0cd5	4643	)/''', '', url)
91302ed3	4644	return re.sub(r'^\w+://(\w+://)', r'\1', url)
ffcb8191 THD	4645
	4646
	4647	_HEX_TABLE = '0123456789abcdef'
	4648
	4649
	4650	def random_uuidv4():
	4651	return re.sub(r'[xy]', lambda x: _HEX_TABLE[random.randint(0, 15)], 'xxxxxxxx-xxxx-4xxx-yxxx-xxxxxxxxxxxx')
0202b52a	4652
	4653
	4654	def make_dir(path, to_screen=None):
	4655	try:
	4656	dn = os.path.dirname(path)
b25d6cb9 AI	4657	if dn:
b25d6cb9 AI	4658	os.makedirs(dn, exist_ok=True)
0202b52a	4659	return True
86e5f3ed	4660	except OSError as err:
0202b52a	4661	if callable(to_screen) is not None:
69bec673	4662	to_screen(f'unable to create directory {err}')
0202b52a	4663	return False
f74980cb	4664
	4665
	4666	def get_executable_path():
69bec673	4667	from ..update import _get_variant_and_executable_path
c487cf00	4668
b5899f4f	4669	return os.path.dirname(os.path.abspath(_get_variant_and_executable_path()[1]))
f74980cb	4670
f74980cb	4671
8e40b9d1	4672	def get_user_config_dirs(package_name):
8e40b9d1 M	4673	# .config (e.g. ~/.config/package_name)
8e40b9d1 M	4674	xdg_config_home = os.getenv('XDG_CONFIG_HOME') or compat_expanduser('~/.config')
773c272d	4675	yield os.path.join(xdg_config_home, package_name)
8e40b9d1 M	4676
	4677	# appdata (%APPDATA%/package_name)
	4678	appdata_dir = os.getenv('appdata')
	4679	if appdata_dir:
773c272d	4680	yield os.path.join(appdata_dir, package_name)
8e40b9d1 M	4681
8e40b9d1 M	4682	# home (~/.package_name)
773c272d	4683	yield os.path.join(compat_expanduser('~'), f'.{package_name}')
8e40b9d1 M	4684
	4685
	4686	def get_system_config_dirs(package_name):
8e40b9d1	4687	# /etc/package_name
773c272d	4688	yield os.path.join('/etc', package_name)
06167fbb	4689
06167fbb	4690
3e9b66d7	4691	def time_seconds(**kwargs):
83c4970e L	4692	"""
	4693	Returns TZ-aware time in seconds since the epoch (1970-01-01T00:00:00Z)
	4694	"""
c305a25c	4695	return time.time() + dt.timedelta(**kwargs).total_seconds()
3e9b66d7 LNO	4696
3e9b66d7 LNO	4697
49fa4d9a N	4698	# create a JSON Web Signature (jws) with HS256 algorithm
	4699	# the resulting format is in JWS Compact Serialization
	4700	# implemented following JWT https://www.rfc-editor.org/rfc/rfc7519.html
	4701	# implemented following JWS https://www.rfc-editor.org/rfc/rfc7515.html
	4702	def jwt_encode_hs256(payload_data, key, headers={}):
	4703	header_data = {
	4704	'alg': 'HS256',
	4705	'typ': 'JWT',
	4706	}
	4707	if headers:
	4708	header_data.update(headers)
0f06bcd7	4709	header_b64 = base64.b64encode(json.dumps(header_data).encode())
	4710	payload_b64 = base64.b64encode(json.dumps(payload_data).encode())
	4711	h = hmac.new(key.encode(), header_b64 + b'.' + payload_b64, hashlib.sha256)
49fa4d9a	4712	signature_b64 = base64.b64encode(h.digest())
add96eb9	4713	return header_b64 + b'.' + payload_b64 + b'.' + signature_b64
819e0531	4714
819e0531	4715
16b0d7e6	4716	# can be extended in future to verify the signature and parse header and return the algorithm used if it's not HS256
	4717	def jwt_decode_hs256(jwt):
	4718	header_b64, payload_b64, signature_b64 = jwt.split('.')
2c98d998	4719	# add trailing ='s that may have been stripped, superfluous ='s are ignored
add96eb9	4720	return json.loads(base64.urlsafe_b64decode(f'{payload_b64}==='))
16b0d7e6	4721
16b0d7e6	4722
53973b4d	4723	WINDOWS_VT_MODE = False if compat_os_name == 'nt' else None
	4724
	4725
7a32c70d	4726	@functools.cache
819e0531	4727	def supports_terminal_sequences(stream):
819e0531	4728	if compat_os_name == 'nt':
8a82af35	4729	if not WINDOWS_VT_MODE:
819e0531	4730	return False
	4731	elif not os.getenv('TERM'):
	4732	return False
	4733	try:
	4734	return stream.isatty()
	4735	except BaseException:
	4736	return False
	4737
	4738
c53a18f0	4739	def windows_enable_vt_mode():
c53a18f0	4740	"""Ref: https://bugs.python.org/issue30075 """
8a82af35	4741	if get_windows_version() < (10, 0, 10586):
53973b4d	4742	return
53973b4d	4743
c53a18f0	4744	import ctypes
	4745	import ctypes.wintypes
	4746	import msvcrt
	4747
	4748	ENABLE_VIRTUAL_TERMINAL_PROCESSING = 0x0004
	4749
	4750	dll = ctypes.WinDLL('kernel32', use_last_error=False)
	4751	handle = os.open('CONOUT$', os.O_RDWR)
c53a18f0	4752	try:
	4753	h_out = ctypes.wintypes.HANDLE(msvcrt.get_osfhandle(handle))
	4754	dw_original_mode = ctypes.wintypes.DWORD()
	4755	success = dll.GetConsoleMode(h_out, ctypes.byref(dw_original_mode))
	4756	if not success:
	4757	raise Exception('GetConsoleMode failed')
	4758
	4759	success = dll.SetConsoleMode(h_out, ctypes.wintypes.DWORD(
	4760	dw_original_mode.value \| ENABLE_VIRTUAL_TERMINAL_PROCESSING))
	4761	if not success:
	4762	raise Exception('SetConsoleMode failed')
c53a18f0	4763	finally:
c53a18f0	4764	os.close(handle)
53973b4d	4765
f0795149	4766	global WINDOWS_VT_MODE
	4767	WINDOWS_VT_MODE = True
	4768	supports_terminal_sequences.cache_clear()
	4769
53973b4d	4770
ec11a9f4	4771	_terminal_sequences_re = re.compile('\033\\[[^m]+m')
	4772
	4773
	4774	def remove_terminal_sequences(string):
	4775	return _terminal_sequences_re.sub('', string)
	4776
	4777
	4778	def number_of_digits(number):
	4779	return len('%d' % number)
34921b43	4780
	4781
	4782	def join_nonempty(*values, delim='-', from_dict=None):
	4783	if from_dict is not None:
69bec673	4784	values = (traversal.traverse_obj(from_dict, variadic(v)) for v in values)
34921b43	4785	return delim.join(map(str, filter(None, values)))
06e57990	4786
06e57990	4787
27231526 ZM	4788	def scale_thumbnails_to_max_format_width(formats, thumbnails, url_width_re):
	4789	"""
	4790	Find the largest format dimensions in terms of video width and, for each thumbnail:
	4791	* Modify the URL: Match the width with the provided regex and replace with the former width
	4792	* Update dimensions
	4793
	4794	This function is useful with video services that scale the provided thumbnails on demand
	4795	"""
	4796	_keys = ('width', 'height')
	4797	max_dimensions = max(
add96eb9	4798	(tuple(fmt.get(k) or 0 for k in _keys) for fmt in formats),
27231526 ZM	4799	default=(0, 0))
	4800	if not max_dimensions[0]:
	4801	return thumbnails
	4802	return [
	4803	merge_dicts(
	4804	{'url': re.sub(url_width_re, str(max_dimensions[0]), thumbnail['url'])},
	4805	dict(zip(_keys, max_dimensions)), thumbnail)
	4806	for thumbnail in thumbnails
	4807	]
	4808
	4809
93c8410d LNO	4810	def parse_http_range(range):
	4811	""" Parse value of "Range" or "Content-Range" HTTP header into tuple. """
	4812	if not range:
	4813	return None, None, None
	4814	crg = re.search(r'bytes[ =](\d+)-(\d+)?(?:/(\d+))?', range)
	4815	if not crg:
	4816	return None, None, None
	4817	return int(crg.group(1)), int_or_none(crg.group(2)), int_or_none(crg.group(3))
	4818
	4819
6b9e832d	4820	def read_stdin(what):
a174c453	4821	if what:
	4822	eof = 'Ctrl+Z' if compat_os_name == 'nt' else 'Ctrl+D'
	4823	write_string(f'Reading {what} from STDIN - EOF ({eof}) to end:\n')
6b9e832d	4824	return sys.stdin
	4825
	4826
a904a7f8 L	4827	def determine_file_encoding(data):
a904a7f8 L	4828	"""
88f60feb	4829	Detect the text encoding used
a904a7f8 L	4830	@returns (encoding, bytes to skip)
	4831	"""
	4832
88f60feb	4833	# BOM marks are given priority over declarations
a904a7f8	4834	for bom, enc in BOMS:
a904a7f8 L	4835	if data.startswith(bom):
	4836	return enc, len(bom)
	4837
88f60feb	4838	# Strip off all null bytes to match even when UTF-16 or UTF-32 is used.
88f60feb	4839	# We ignore the endianness to get a good enough match
a904a7f8	4840	data = data.replace(b'\0', b'')
88f60feb	4841	mobj = re.match(rb'(?m)^#\scoding\s:\s(\S+)\s$', data)
88f60feb	4842	return mobj.group(1).decode() if mobj else None, 0
a904a7f8 L	4843
a904a7f8 L	4844
06e57990	4845	class Config:
06e57990	4846	own_args = None
9e491463	4847	parsed_args = None
06e57990	4848	filename = None
	4849	__initialized = False
	4850
	4851	def __init__(self, parser, label=None):
9e491463	4852	self.parser, self.label = parser, label
06e57990	4853	self._loaded_paths, self.configs = set(), []
	4854
	4855	def init(self, args=None, filename=None):
	4856	assert not self.__initialized
284a60c5	4857	self.own_args, self.filename = args, filename
	4858	return self.load_configs()
	4859
	4860	def load_configs(self):
65662dff	4861	directory = ''
284a60c5	4862	if self.filename:
284a60c5	4863	location = os.path.realpath(self.filename)
65662dff	4864	directory = os.path.dirname(location)
06e57990	4865	if location in self._loaded_paths:
	4866	return False
	4867	self._loaded_paths.add(location)
	4868
284a60c5	4869	self.__initialized = True
	4870	opts, _ = self.parser.parse_known_args(self.own_args)
	4871	self.parsed_args = self.own_args
9e491463	4872	for location in opts.config_locations or []:
6b9e832d	4873	if location == '-':
1060f82f	4874	if location in self._loaded_paths:
	4875	continue
	4876	self._loaded_paths.add(location)
6b9e832d	4877	self.append_config(shlex.split(read_stdin('options'), comments=True), label='stdin')
6b9e832d	4878	continue
65662dff	4879	location = os.path.join(directory, expand_path(location))
06e57990	4880	if os.path.isdir(location):
	4881	location = os.path.join(location, 'yt-dlp.conf')
	4882	if not os.path.exists(location):
9e491463	4883	self.parser.error(f'config location {location} does not exist')
06e57990	4884	self.append_config(self.read_file(location), location)
	4885	return True
	4886
	4887	def __str__(self):
	4888	label = join_nonempty(
	4889	self.label, 'config', f'"{self.filename}"' if self.filename else '',
	4890	delim=' ')
	4891	return join_nonempty(
	4892	self.own_args is not None and f'{label[0].upper()}{label[1:]}: {self.hide_login_info(self.own_args)}',
	4893	*(f'\n{c}'.replace('\n', '\n\| ')[1:] for c in self.configs),
	4894	delim='\n')
	4895
7a32c70d	4896	@staticmethod
06e57990	4897	def read_file(filename, default=[]):
06e57990	4898	try:
a904a7f8	4899	optionf = open(filename, 'rb')
86e5f3ed	4900	except OSError:
06e57990	4901	return default # silently skip if file is not present
a904a7f8 L	4902	try:
	4903	enc, skip = determine_file_encoding(optionf.read(512))
	4904	optionf.seek(skip, io.SEEK_SET)
	4905	except OSError:
	4906	enc = None # silently skip read errors
06e57990	4907	try:
06e57990	4908	# FIXME: https://github.com/ytdl-org/youtube-dl/commit/dfe5fa49aed02cf36ba9f743b11b0903554b5e56
a904a7f8	4909	contents = optionf.read().decode(enc or preferredencoding())
f9934b96	4910	res = shlex.split(contents, comments=True)
44a6fcff	4911	except Exception as err:
44a6fcff	4912	raise ValueError(f'Unable to parse "{filename}": {err}')
06e57990	4913	finally:
	4914	optionf.close()
	4915	return res
	4916
7a32c70d	4917	@staticmethod
06e57990	4918	def hide_login_info(opts):
86e5f3ed	4919	PRIVATE_OPTS = {'-p', '--password', '-u', '--username', '--video-password', '--ap-password', '--ap-username'}
06e57990	4920	eqre = re.compile('^(?P<key>' + ('\|'.join(re.escape(po) for po in PRIVATE_OPTS)) + ')=.+$')
	4921
	4922	def _scrub_eq(o):
	4923	m = eqre.match(o)
	4924	if m:
	4925	return m.group('key') + '=PRIVATE'
	4926	else:
	4927	return o
	4928
	4929	opts = list(map(_scrub_eq, opts))
	4930	for idx, opt in enumerate(opts):
	4931	if opt in PRIVATE_OPTS and idx + 1 < len(opts):
	4932	opts[idx + 1] = 'PRIVATE'
	4933	return opts
	4934
	4935	def append_config(self, *args, label=None):
9e491463	4936	config = type(self)(self.parser, label)
06e57990	4937	config._loaded_paths = self._loaded_paths
	4938	if config.init(*args):
	4939	self.configs.append(config)
	4940
7a32c70d	4941	@property
06e57990	4942	def all_args(self):
	4943	for config in reversed(self.configs):
	4944	yield from config.all_args
9e491463	4945	yield from self.parsed_args or []
	4946
	4947	def parse_known_args(self, **kwargs):
	4948	return self.parser.parse_known_args(self.all_args, **kwargs)
06e57990	4949
06e57990	4950	def parse_args(self):
9e491463	4951	return self.parser.parse_args(self.all_args)
da42679b LNO	4952
da42679b LNO	4953
8b7539d2	4954	def merge_headers(*dicts):
08d30158	4955	"""Merge dicts of http headers case insensitively, prioritizing the latter ones"""
76aa9913	4956	return {k.title(): v for k, v in itertools.chain.from_iterable(map(dict.items, dicts))}
28787f16	4957
28787f16	4958
b1f94422	4959	def cached_method(f):
	4960	"""Cache a method"""
	4961	signature = inspect.signature(f)
	4962
7a32c70d	4963	@functools.wraps(f)
b1f94422	4964	def wrapper(self, args, *kwargs):
	4965	bound_args = signature.bind(self, args, *kwargs)
	4966	bound_args.apply_defaults()
d5d1df8a	4967	key = tuple(bound_args.arguments.values())[1:]
b1f94422	4968
6368e2e6	4969	cache = vars(self).setdefault('_cached_method__cache', {}).setdefault(f.__name__, {})
b1f94422	4970	if key not in cache:
	4971	cache[key] = f(self, args, *kwargs)
	4972	return cache[key]
	4973	return wrapper
	4974
	4975
28787f16	4976	class classproperty:
83cc7b8a	4977	"""property access for class methods with optional caching"""
	4978	def __new__(cls, func=None, args, *kwargs):
	4979	if not func:
	4980	return functools.partial(cls, args, *kwargs)
	4981	return super().__new__(cls)
c487cf00	4982
83cc7b8a	4983	def __init__(self, func, *, cache=False):
c487cf00	4984	functools.update_wrapper(self, func)
c487cf00	4985	self.func = func
83cc7b8a	4986	self._cache = {} if cache else None
28787f16	4987
28787f16	4988	def __get__(self, _, cls):
83cc7b8a	4989	if self._cache is None:
	4990	return self.func(cls)
	4991	elif cls not in self._cache:
	4992	self._cache[cls] = self.func(cls)
	4993	return self._cache[cls]
19a03940	4994
19a03940	4995
a5387729	4996	class function_with_repr:
b2e0343b	4997	def __init__(self, func, repr_=None):
a5387729	4998	functools.update_wrapper(self, func)
b2e0343b	4999	self.func, self.__repr = func, repr_
a5387729	5000
	5001	def __call__(self, args, *kwargs):
	5002	return self.func(args, *kwargs)
	5003
45491a2a	5004	@classmethod
	5005	def set_repr(cls, repr_):
	5006	return functools.partial(cls, repr_=repr_)
	5007
a5387729	5008	def __repr__(self):
b2e0343b	5009	if self.__repr:
b2e0343b	5010	return self.__repr
a5387729	5011	return f'{self.func.__module__}.{self.func.__qualname__}'
	5012
	5013
64fa820c	5014	class Namespace(types.SimpleNamespace):
591bb9d3	5015	"""Immutable namespace"""
591bb9d3	5016
7896214c	5017	def __iter__(self):
64fa820c	5018	return iter(self.__dict__.values())
7896214c	5019
7a32c70d	5020	@property
64fa820c	5021	def items_(self):
64fa820c	5022	return self.__dict__.items()
9b8ee23b	5023
9b8ee23b	5024
8dc59305	5025	MEDIA_EXTENSIONS = Namespace(
	5026	common_video=('avi', 'flv', 'mkv', 'mov', 'mp4', 'webm'),
	5027	video=('3g2', '3gp', 'f4v', 'mk3d', 'divx', 'mpg', 'ogv', 'm4v', 'wmv'),
	5028	common_audio=('aiff', 'alac', 'flac', 'm4a', 'mka', 'mp3', 'ogg', 'opus', 'wav'),
fbb73833	5029	audio=('aac', 'ape', 'asf', 'f4a', 'f4b', 'm4b', 'm4p', 'm4r', 'oga', 'ogx', 'spx', 'vorbis', 'wma', 'weba'),
8dc59305	5030	thumbnails=('jpg', 'png', 'webp'),
	5031	storyboards=('mhtml', ),
	5032	subtitles=('srt', 'vtt', 'ass', 'lrc'),
	5033	manifests=('f4f', 'f4m', 'm3u8', 'smil', 'mpd'),
	5034	)
	5035	MEDIA_EXTENSIONS.video += MEDIA_EXTENSIONS.common_video
	5036	MEDIA_EXTENSIONS.audio += MEDIA_EXTENSIONS.common_audio
	5037
	5038	KNOWN_EXTENSIONS = (MEDIA_EXTENSIONS.video, MEDIA_EXTENSIONS.audio, *MEDIA_EXTENSIONS.manifests)
	5039
	5040
5ce58244 SS	5041	class _UnsafeExtensionError(Exception):
	5042	"""
	5043	Mitigation exception for uncommon/malicious file extensions
	5044	This should be caught in YoutubeDL.py alongside a warning
	5045
	5046	Ref: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-79w7-vh3h-8g4j
	5047	"""
	5048	ALLOWED_EXTENSIONS = frozenset([
	5049	# internal
	5050	'description',
	5051	'json',
	5052	'meta',
	5053	'orig',
	5054	'part',
	5055	'temp',
	5056	'uncut',
	5057	'unknown_video',
	5058	'ytdl',
	5059
	5060	# video
	5061	*MEDIA_EXTENSIONS.video,
	5062	'avif',
	5063	'ismv',
	5064	'm2ts',
	5065	'm4s',
	5066	'mng',
	5067	'mpeg',
	5068	'qt',
	5069	'swf',
	5070	'ts',
	5071	'vp9',
	5072	'wvm',
	5073
	5074	# audio
	5075	*MEDIA_EXTENSIONS.audio,
	5076	'isma',
	5077	'mid',
	5078	'mpga',
	5079	'ra',
	5080
	5081	# image
	5082	*MEDIA_EXTENSIONS.thumbnails,
	5083	'bmp',
	5084	'gif',
	5085	'heic',
	5086	'ico',
	5087	'jng',
	5088	'jpeg',
	5089	'jxl',
	5090	'svg',
	5091	'tif',
	5092	'wbmp',
	5093
	5094	# subtitle
	5095	*MEDIA_EXTENSIONS.subtitles,
	5096	'dfxp',
	5097	'fs',
	5098	'ismt',
	5099	'sami',
	5100	'scc',
	5101	'ssa',
	5102	'tt',
	5103	'ttml',
	5104
5105	# others
5106	*MEDIA_EXTENSIONS.manifests,
5107	*MEDIA_EXTENSIONS.storyboards,
5108	'desktop',
5109	'ism',
5110	'm3u',
5111	'sbv',
5112	'url',
5113	'webloc',
5114	'xml',
5115	])
5116
5117	def __init__(self, extension, /):
5118	super().__init__(f'unsafe file extension: {extension!r}')
5119	self.extension = extension
5120
5121	@classmethod
5122	def sanitize_extension(cls, extension, /, *, prepend=False):
5123	if '/' in extension or '\\' in extension:
5124	raise cls(extension)
5125
5126	if not prepend:
5127	_, _, last = extension.rpartition('.')
5128	if last == 'bin':
5129	extension = last = 'unknown_video'
5130	if last.lower() not in cls.ALLOWED_EXTENSIONS:
5131	raise cls(extension)
5132
5133	return extension
5134
5135
be5c1ae8	5136	class RetryManager:
	5137	"""Usage:
	5138	for retry in RetryManager(...):
	5139	try:
	5140	...
	5141	except SomeException as err:
	5142	retry.error = err
	5143	continue
	5144	"""
	5145	attempt, _error = 0, None
	5146
	5147	def __init__(self, _retries, _error_callback, **kwargs):
	5148	self.retries = _retries or 0
	5149	self.error_callback = functools.partial(_error_callback, **kwargs)
	5150
	5151	def _should_retry(self):
	5152	return self._error is not NO_DEFAULT and self.attempt <= self.retries
	5153
7a32c70d	5154	@property
be5c1ae8	5155	def error(self):
	5156	if self._error is NO_DEFAULT:
	5157	return None
	5158	return self._error
	5159
7a32c70d	5160	@error.setter
be5c1ae8	5161	def error(self, value):
	5162	self._error = value
	5163
	5164	def __iter__(self):
	5165	while self._should_retry():
	5166	self.error = NO_DEFAULT
	5167	self.attempt += 1
	5168	yield self
	5169	if self.error:
	5170	self.error_callback(self.error, self.attempt, self.retries)
	5171
7a32c70d	5172	@staticmethod
be5c1ae8	5173	def report_retry(e, count, retries, *, sleep_func, info, warn, error=None, suffix=None):
	5174	"""Utility function for reporting retries"""
	5175	if count > retries:
	5176	if error:
	5177	return error(f'{e}. Giving up after {count - 1} retries') if count > 1 else error(str(e))
	5178	raise e
	5179
	5180	if not count:
	5181	return warn(e)
	5182	elif isinstance(e, ExtractorError):
3ce29336	5183	e = remove_end(str_or_none(e.cause) or e.orig_msg, '.')
be5c1ae8	5184	warn(f'{e}. Retrying{format_field(suffix, None, " %s")} ({count}/{retries})...')
	5185
	5186	delay = float_or_none(sleep_func(n=count - 1)) if callable(sleep_func) else sleep_func
	5187	if delay:
	5188	info(f'Sleeping {delay:.2f} seconds ...')
	5189	time.sleep(delay)
	5190
	5191
0647d925	5192	def make_archive_id(ie, video_id):
	5193	ie_key = ie if isinstance(ie, str) else ie.ie_key()
	5194	return f'{ie_key.lower()} {video_id}'
	5195
	5196
a1c5bd82	5197	def truncate_string(s, left, right=0):
	5198	assert left > 3 and right >= 0
	5199	if s is None or len(s) <= left + right:
	5200	return s
f9fb3ce8	5201	return f'{s[:left - 3]}...{s[-right:] if right else ""}'
a1c5bd82	5202
a1c5bd82	5203
5314b521	5204	def orderedSet_from_options(options, alias_dict, *, use_regex=False, start=None):
	5205	assert 'all' in alias_dict, '"all" alias is required'
	5206	requested = list(start or [])
	5207	for val in options:
	5208	discard = val.startswith('-')
	5209	if discard:
	5210	val = val[1:]
	5211
	5212	if val in alias_dict:
	5213	val = alias_dict[val] if not discard else [
	5214	i[1:] if i.startswith('-') else f'-{i}' for i in alias_dict[val]]
	5215	# NB: Do not allow regex in aliases for performance
	5216	requested = orderedSet_from_options(val, alias_dict, start=requested)
	5217	continue
	5218
	5219	current = (filter(re.compile(val, re.I).fullmatch, alias_dict['all']) if use_regex
	5220	else [val] if val in alias_dict['all'] else None)
	5221	if current is None:
	5222	raise ValueError(val)
	5223
	5224	if discard:
	5225	for item in current:
	5226	while item in requested:
	5227	requested.remove(item)
	5228	else:
	5229	requested.extend(current)
	5230
	5231	return orderedSet(requested)
	5232
	5233
eedda525	5234	# TODO: Rewrite
d0d74b71	5235	class FormatSorter:
	5236	regex = r' ((?P<reverse>\+)?(?P<field>[a-zA-Z0-9_]+)((?P<separator>[~:])(?P<limit>.?))?)? *$'
	5237
	5238	default = ('hidden', 'aud_or_vid', 'hasvid', 'ie_pref', 'lang', 'quality',
	5239	'res', 'fps', 'hdr:12', 'vcodec:vp9.2', 'channels', 'acodec',
	5240	'size', 'br', 'asr', 'proto', 'ext', 'hasaud', 'source', 'id') # These must not be aliases
	5241	ytdl_default = ('hasaud', 'lang', 'quality', 'tbr', 'filesize', 'vbr',
	5242	'height', 'width', 'proto', 'vext', 'abr', 'aext',
	5243	'fps', 'fs_approx', 'source', 'id')
	5244
	5245	settings = {
	5246	'vcodec': {'type': 'ordered', 'regex': True,
	5247	'order': ['av0?1', 'vp0?9.2', 'vp0?9', '[hx]265\|he?vc?', '[hx]264\|avc', 'vp0?8', 'mp4v\|h263', 'theora', '', None, 'none']},
	5248	'acodec': {'type': 'ordered', 'regex': True,
71082216	5249	'order': ['[af]lac', 'wav\|aiff', 'opus', 'vorbis\|ogg', 'aac', 'mp?4a?', 'mp3', 'ac-?4', 'e-?a?c-?3', 'ac-?3', 'dts', '', None, 'none']},
d0d74b71	5250	'hdr': {'type': 'ordered', 'regex': True, 'field': 'dynamic_range',
	5251	'order': ['dv', '(hdr)?12', r'(hdr)?10\+', '(hdr)?10', 'hlg', '', 'sdr', None]},
	5252	'proto': {'type': 'ordered', 'regex': True, 'field': 'protocol',
	5253	'order': ['(ht\|f)tps', '(ht\|f)tp$', 'm3u8.', '.dash', 'websocket_frag', 'rtmpe?', '', 'mms\|rtsp', 'ws\|websocket', 'f4']},
	5254	'vext': {'type': 'ordered', 'field': 'video_ext',
29ca4082	5255	'order': ('mp4', 'mov', 'webm', 'flv', '', 'none'),
29ca4082	5256	'order_free': ('webm', 'mp4', 'mov', 'flv', '', 'none')},
fbb73833	5257	'aext': {'type': 'ordered', 'regex': True, 'field': 'audio_ext',
	5258	'order': ('m4a', 'aac', 'mp3', 'ogg', 'opus', 'web[am]', '', 'none'),
	5259	'order_free': ('ogg', 'opus', 'web[am]', 'mp3', 'm4a', 'aac', '', 'none')},
d0d74b71	5260	'hidden': {'visible': False, 'forced': True, 'type': 'extractor', 'max': -1000},
	5261	'aud_or_vid': {'visible': False, 'forced': True, 'type': 'multiple',
	5262	'field': ('vcodec', 'acodec'),
	5263	'function': lambda it: int(any(v != 'none' for v in it))},
	5264	'ie_pref': {'priority': True, 'type': 'extractor'},
	5265	'hasvid': {'priority': True, 'field': 'vcodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5266	'hasaud': {'field': 'acodec', 'type': 'boolean', 'not_in_list': ('none',)},
	5267	'lang': {'convert': 'float', 'field': 'language_preference', 'default': -1},
	5268	'quality': {'convert': 'float', 'default': -1},
	5269	'filesize': {'convert': 'bytes'},
	5270	'fs_approx': {'convert': 'bytes', 'field': 'filesize_approx'},
	5271	'id': {'convert': 'string', 'field': 'format_id'},
	5272	'height': {'convert': 'float_none'},
	5273	'width': {'convert': 'float_none'},
	5274	'fps': {'convert': 'float_none'},
	5275	'channels': {'convert': 'float_none', 'field': 'audio_channels'},
	5276	'tbr': {'convert': 'float_none'},
	5277	'vbr': {'convert': 'float_none'},
	5278	'abr': {'convert': 'float_none'},
	5279	'asr': {'convert': 'float_none'},
	5280	'source': {'convert': 'float', 'field': 'source_preference', 'default': -1},
	5281
	5282	'codec': {'type': 'combined', 'field': ('vcodec', 'acodec')},
812cdfa0	5283	'br': {'type': 'multiple', 'field': ('tbr', 'vbr', 'abr'), 'convert': 'float_none',
eedda525	5284	'function': lambda it: next(filter(None, it), None)},
812cdfa0	5285	'size': {'type': 'multiple', 'field': ('filesize', 'fs_approx'), 'convert': 'bytes',
eedda525	5286	'function': lambda it: next(filter(None, it), None)},
d0d74b71	5287	'ext': {'type': 'combined', 'field': ('vext', 'aext')},
d0d74b71	5288	'res': {'type': 'multiple', 'field': ('height', 'width'),
add96eb9	5289	'function': lambda it: min(filter(None, it), default=0)},
d0d74b71	5290
	5291	# Actual field names
	5292	'format_id': {'type': 'alias', 'field': 'id'},
	5293	'preference': {'type': 'alias', 'field': 'ie_pref'},
	5294	'language_preference': {'type': 'alias', 'field': 'lang'},
	5295	'source_preference': {'type': 'alias', 'field': 'source'},
	5296	'protocol': {'type': 'alias', 'field': 'proto'},
	5297	'filesize_approx': {'type': 'alias', 'field': 'fs_approx'},
	5298	'audio_channels': {'type': 'alias', 'field': 'channels'},
	5299
	5300	# Deprecated
	5301	'dimension': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5302	'resolution': {'type': 'alias', 'field': 'res', 'deprecated': True},
	5303	'extension': {'type': 'alias', 'field': 'ext', 'deprecated': True},
	5304	'bitrate': {'type': 'alias', 'field': 'br', 'deprecated': True},
	5305	'total_bitrate': {'type': 'alias', 'field': 'tbr', 'deprecated': True},
	5306	'video_bitrate': {'type': 'alias', 'field': 'vbr', 'deprecated': True},
	5307	'audio_bitrate': {'type': 'alias', 'field': 'abr', 'deprecated': True},
	5308	'framerate': {'type': 'alias', 'field': 'fps', 'deprecated': True},
	5309	'filesize_estimate': {'type': 'alias', 'field': 'size', 'deprecated': True},
	5310	'samplerate': {'type': 'alias', 'field': 'asr', 'deprecated': True},
	5311	'video_ext': {'type': 'alias', 'field': 'vext', 'deprecated': True},
	5312	'audio_ext': {'type': 'alias', 'field': 'aext', 'deprecated': True},
	5313	'video_codec': {'type': 'alias', 'field': 'vcodec', 'deprecated': True},
	5314	'audio_codec': {'type': 'alias', 'field': 'acodec', 'deprecated': True},
	5315	'video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5316	'has_video': {'type': 'alias', 'field': 'hasvid', 'deprecated': True},
	5317	'audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5318	'has_audio': {'type': 'alias', 'field': 'hasaud', 'deprecated': True},
	5319	'extractor': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5320	'extractor_preference': {'type': 'alias', 'field': 'ie_pref', 'deprecated': True},
	5321	}
	5322
	5323	def __init__(self, ydl, field_preference):
	5324	self.ydl = ydl
	5325	self._order = []
	5326	self.evaluate_params(self.ydl.params, field_preference)
	5327	if ydl.params.get('verbose'):
	5328	self.print_verbose_info(self.ydl.write_debug)
	5329
	5330	def _get_field_setting(self, field, key):
	5331	if field not in self.settings:
	5332	if key in ('forced', 'priority'):
	5333	return False
	5334	self.ydl.deprecated_feature(f'Using arbitrary fields ({field}) for format sorting is '
	5335	'deprecated and may be removed in a future version')
	5336	self.settings[field] = {}
add96eb9	5337	prop_obj = self.settings[field]
	5338	if key not in prop_obj:
	5339	type_ = prop_obj.get('type')
d0d74b71	5340	if key == 'field':
add96eb9	5341	default = 'preference' if type_ == 'extractor' else (field,) if type_ in ('combined', 'multiple') else field
d0d74b71	5342	elif key == 'convert':
add96eb9	5343	default = 'order' if type_ == 'ordered' else 'float_string' if field else 'ignore'
d0d74b71	5344	else:
add96eb9	5345	default = {'type': 'field', 'visible': True, 'order': [], 'not_in_list': (None,)}.get(key)
	5346	prop_obj[key] = default
	5347	return prop_obj[key]
d0d74b71	5348
add96eb9	5349	def _resolve_field_value(self, field, value, convert_none=False):
d0d74b71	5350	if value is None:
add96eb9	5351	if not convert_none:
d0d74b71	5352	return None
	5353	else:
	5354	value = value.lower()
	5355	conversion = self._get_field_setting(field, 'convert')
	5356	if conversion == 'ignore':
	5357	return None
	5358	if conversion == 'string':
	5359	return value
	5360	elif conversion == 'float_none':
	5361	return float_or_none(value)
	5362	elif conversion == 'bytes':
	5363	return parse_bytes(value)
	5364	elif conversion == 'order':
	5365	order_list = (self._use_free_order and self._get_field_setting(field, 'order_free')) or self._get_field_setting(field, 'order')
	5366	use_regex = self._get_field_setting(field, 'regex')
	5367	list_length = len(order_list)
	5368	empty_pos = order_list.index('') if '' in order_list else list_length + 1
	5369	if use_regex and value is not None:
	5370	for i, regex in enumerate(order_list):
	5371	if regex and re.match(regex, value):
	5372	return list_length - i
	5373	return list_length - empty_pos # not in list
	5374	else: # not regex or value = None
	5375	return list_length - (order_list.index(value) if value in order_list else empty_pos)
	5376	else:
	5377	if value.isnumeric():
	5378	return float(value)
	5379	else:
	5380	self.settings[field]['convert'] = 'string'
	5381	return value
	5382
	5383	def evaluate_params(self, params, sort_extractor):
	5384	self._use_free_order = params.get('prefer_free_formats', False)
	5385	self._sort_user = params.get('format_sort', [])
	5386	self._sort_extractor = sort_extractor
	5387
	5388	def add_item(field, reverse, closest, limit_text):
	5389	field = field.lower()
	5390	if field in self._order:
	5391	return
	5392	self._order.append(field)
	5393	limit = self._resolve_field_value(field, limit_text)
	5394	data = {
	5395	'reverse': reverse,
	5396	'closest': False if limit is None else closest,
	5397	'limit_text': limit_text,
	5398	'limit': limit}
	5399	if field in self.settings:
	5400	self.settings[field].update(data)
	5401	else:
	5402	self.settings[field] = data
	5403
	5404	sort_list = (
	5405	tuple(field for field in self.default if self._get_field_setting(field, 'forced'))
	5406	+ (tuple() if params.get('format_sort_force', False)
	5407	else tuple(field for field in self.default if self._get_field_setting(field, 'priority')))
	5408	+ tuple(self._sort_user) + tuple(sort_extractor) + self.default)
	5409
	5410	for item in sort_list:
	5411	match = re.match(self.regex, item)
	5412	if match is None:
add96eb9	5413	raise ExtractorError(f'Invalid format sort string "{item}" given by extractor')
d0d74b71	5414	field = match.group('field')
	5415	if field is None:
	5416	continue
	5417	if self._get_field_setting(field, 'type') == 'alias':
	5418	alias, field = field, self._get_field_setting(field, 'field')
	5419	if self._get_field_setting(alias, 'deprecated'):
	5420	self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
	5421	f'be removed in a future version. Please use {field} instead')
	5422	reverse = match.group('reverse') is not None
	5423	closest = match.group('separator') == '~'
	5424	limit_text = match.group('limit')
	5425
	5426	has_limit = limit_text is not None
	5427	has_multiple_fields = self._get_field_setting(field, 'type') == 'combined'
	5428	has_multiple_limits = has_limit and has_multiple_fields and not self._get_field_setting(field, 'same_limit')
	5429
	5430	fields = self._get_field_setting(field, 'field') if has_multiple_fields else (field,)
	5431	limits = limit_text.split(':') if has_multiple_limits else (limit_text,) if has_limit else tuple()
	5432	limit_count = len(limits)
	5433	for (i, f) in enumerate(fields):
	5434	add_item(f, reverse, closest,
	5435	limits[i] if i < limit_count
	5436	else limits[0] if has_limit and not has_multiple_limits
	5437	else None)
	5438
	5439	def print_verbose_info(self, write_debug):
	5440	if self._sort_user:
add96eb9	5441	write_debug('Sort order given by user: {}'.format(', '.join(self._sort_user)))
d0d74b71	5442	if self._sort_extractor:
add96eb9	5443	write_debug('Sort order given by extractor: {}'.format(', '.join(self._sort_extractor)))
add96eb9	5444	write_debug('Formats sorted by: {}'.format(', '.join(['{}{}{}'.format(
d0d74b71	5445	'+' if self._get_field_setting(field, 'reverse') else '', field,
add96eb9	5446	'{}{}({})'.format('~' if self._get_field_setting(field, 'closest') else ':',
	5447	self._get_field_setting(field, 'limit_text'),
	5448	self._get_field_setting(field, 'limit'))
d0d74b71	5449	if self._get_field_setting(field, 'limit_text') is not None else '')
add96eb9	5450	for field in self._order if self._get_field_setting(field, 'visible')])))
d0d74b71	5451
add96eb9	5452	def _calculate_field_preference_from_value(self, format_, field, type_, value):
d0d74b71	5453	reverse = self._get_field_setting(field, 'reverse')
	5454	closest = self._get_field_setting(field, 'closest')
	5455	limit = self._get_field_setting(field, 'limit')
	5456
add96eb9	5457	if type_ == 'extractor':
d0d74b71	5458	maximum = self._get_field_setting(field, 'max')
	5459	if value is None or (maximum is not None and value >= maximum):
	5460	value = -1
add96eb9	5461	elif type_ == 'boolean':
d0d74b71	5462	in_list = self._get_field_setting(field, 'in_list')
	5463	not_in_list = self._get_field_setting(field, 'not_in_list')
	5464	value = 0 if ((in_list is None or value in in_list) and (not_in_list is None or value not in not_in_list)) else -1
add96eb9	5465	elif type_ == 'ordered':
d0d74b71	5466	value = self._resolve_field_value(field, value, True)
	5467
	5468	# try to convert to number
	5469	val_num = float_or_none(value, default=self._get_field_setting(field, 'default'))
	5470	is_num = self._get_field_setting(field, 'convert') != 'string' and val_num is not None
	5471	if is_num:
	5472	value = val_num
	5473
	5474	return ((-10, 0) if value is None
	5475	else (1, value, 0) if not is_num # if a field has mixed strings and numbers, strings are sorted higher
	5476	else (0, -abs(value - limit), value - limit if reverse else limit - value) if closest
	5477	else (0, value, 0) if not reverse and (limit is None or value <= limit)
	5478	else (0, -value, 0) if limit is None or (reverse and value == limit) or value > limit
	5479	else (-1, value, 0))
	5480
add96eb9	5481	def _calculate_field_preference(self, format_, field):
	5482	type_ = self._get_field_setting(field, 'type') # extractor, boolean, ordered, field, multiple
	5483	get_value = lambda f: format_.get(self._get_field_setting(f, 'field'))
	5484	if type_ == 'multiple':
	5485	type_ = 'field' # Only 'field' is allowed in multiple for now
d0d74b71	5486	actual_fields = self._get_field_setting(field, 'field')
	5487
	5488	value = self._get_field_setting(field, 'function')(get_value(f) for f in actual_fields)
	5489	else:
	5490	value = get_value(field)
add96eb9	5491	return self._calculate_field_preference_from_value(format_, field, type_, value)
d0d74b71	5492
	5493	def calculate_preference(self, format):
	5494	# Determine missing protocol
	5495	if not format.get('protocol'):
	5496	format['protocol'] = determine_protocol(format)
	5497
	5498	# Determine missing ext
	5499	if not format.get('ext') and 'url' in format:
	5500	format['ext'] = determine_ext(format['url'])
	5501	if format.get('vcodec') == 'none':
	5502	format['audio_ext'] = format['ext'] if format.get('acodec') != 'none' else 'none'
	5503	format['video_ext'] = 'none'
	5504	else:
	5505	format['video_ext'] = format['ext']
	5506	format['audio_ext'] = 'none'
	5507	# if format.get('preference') is None and format.get('ext') in ('f4f', 'f4m'): # Not supported?
	5508	# format['preference'] = -1000
	5509
5424dbaf L	5510	if format.get('preference') is None and format.get('ext') == 'flv' and re.match('[hx]265\|he?vc?', format.get('vcodec') or ''):
	5511	# HEVC-over-FLV is out-of-spec by FLV's original spec
	5512	# ref. https://trac.ffmpeg.org/ticket/6389
	5513	# ref. https://github.com/yt-dlp/yt-dlp/pull/5821
	5514	format['preference'] = -100
	5515
d0d74b71	5516	# Determine missing bitrates
eedda525	5517	if format.get('vcodec') == 'none':
	5518	format['vbr'] = 0
	5519	if format.get('acodec') == 'none':
	5520	format['abr'] = 0
	5521	if not format.get('vbr') and format.get('vcodec') != 'none':
	5522	format['vbr'] = try_call(lambda: format['tbr'] - format['abr']) or None
	5523	if not format.get('abr') and format.get('acodec') != 'none':
	5524	format['abr'] = try_call(lambda: format['tbr'] - format['vbr']) or None
	5525	if not format.get('tbr'):
	5526	format['tbr'] = try_call(lambda: format['vbr'] + format['abr']) or None
d0d74b71	5527
d0d74b71	5528	return tuple(self._calculate_field_preference(format, field) for field in self._order)
1b392f90	5529
1b392f90	5530
86e3b822	5531	def filesize_from_tbr(tbr, duration):
	5532	"""
	5533	@param tbr: Total bitrate in kbps (1000 bits/sec)
	5534	@param duration: Duration in seconds
	5535	@returns Filesize in bytes
	5536	"""
	5537	if tbr is None or duration is None:
	5538	return None
	5539	return int(duration * tbr * (1000 / 8))
	5540
	5541
1b392f90	5542	# XXX: Temporary
	5543	class _YDLLogger:
	5544	def __init__(self, ydl=None):
	5545	self._ydl = ydl
	5546
	5547	def debug(self, message):
	5548	if self._ydl:
	5549	self._ydl.write_debug(message)
	5550
	5551	def info(self, message):
	5552	if self._ydl:
	5553	self._ydl.to_screen(message)
	5554
	5555	def warning(self, message, *, once=False):
	5556	if self._ydl:
3d2623a8	5557	self._ydl.report_warning(message, once)
1b392f90	5558
	5559	def error(self, message, *, is_error=True):
	5560	if self._ydl:
	5561	self._ydl.report_error(message, is_error=is_error)
	5562
	5563	def stdout(self, message):
	5564	if self._ydl:
	5565	self._ydl.to_stdout(message)
	5566
	5567	def stderr(self, message):
	5568	if self._ydl:
	5569	self._ydl.to_stderr(message)