jfr.im git - yt-dlp.git/blame_incremental

... / ...

Commit	Line	Data
	1	import errno
	2	import hashlib
	3	import json
	4	import os.path
	5	import re
	6	import ssl
	7	import sys
	8	import types
	9
	10	import yt_dlp.extractor
	11	from yt_dlp import YoutubeDL
	12	from yt_dlp.compat import compat_os_name
	13	from yt_dlp.utils import preferredencoding, try_call, write_string, find_available_port
	14
	15	if 'pytest' in sys.modules:
	16	import pytest
	17	is_download_test = pytest.mark.download
	18	else:
	19	def is_download_test(testClass):
	20	return testClass
	21
	22
	23	def get_params(override=None):
	24	PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
	25	'parameters.json')
	26	LOCAL_PARAMETERS_FILE = os.path.join(os.path.dirname(os.path.abspath(__file__)),
	27	'local_parameters.json')
	28	with open(PARAMETERS_FILE, encoding='utf-8') as pf:
	29	parameters = json.load(pf)
	30	if os.path.exists(LOCAL_PARAMETERS_FILE):
	31	with open(LOCAL_PARAMETERS_FILE, encoding='utf-8') as pf:
	32	parameters.update(json.load(pf))
	33	if override:
	34	parameters.update(override)
	35	return parameters
	36
	37
	38	def try_rm(filename):
	39	""" Remove a file if it exists """
	40	try:
	41	os.remove(filename)
	42	except OSError as ose:
	43	if ose.errno != errno.ENOENT:
	44	raise
	45
	46
	47	def report_warning(message, args, *kwargs):
	48	'''
	49	Print the message to stderr, it will be prefixed with 'WARNING:'
	50	If stderr is a tty file the 'WARNING:' will be colored
	51	'''
	52	if sys.stderr.isatty() and compat_os_name != 'nt':
	53	_msg_header = '\033[0;33mWARNING:\033[0m'
	54	else:
	55	_msg_header = 'WARNING:'
	56	output = f'{_msg_header} {message}\n'
	57	if 'b' in getattr(sys.stderr, 'mode', ''):
	58	output = output.encode(preferredencoding())
	59	sys.stderr.write(output)
	60
	61
	62	class FakeYDL(YoutubeDL):
	63	def __init__(self, override=None):
	64	# Different instances of the downloader can't share the same dictionary
	65	# some test set the "sublang" parameter, which would break the md5 checks.
	66	params = get_params(override=override)
	67	super().__init__(params, auto_init=False)
	68	self.result = []
	69
	70	def to_screen(self, s, args, *kwargs):
	71	print(s)
	72
	73	def trouble(self, s, args, *kwargs):
	74	raise Exception(s)
	75
	76	def download(self, x):
	77	self.result.append(x)
	78
	79	def expect_warning(self, regex):
	80	# Silence an expected warning matching a regex
	81	old_report_warning = self.report_warning
	82
	83	def report_warning(self, message, args, *kwargs):
	84	if re.match(regex, message):
	85	return
	86	old_report_warning(message, args, *kwargs)
	87	self.report_warning = types.MethodType(report_warning, self)
	88
	89
	90	def gettestcases(include_onlymatching=False):
	91	for ie in yt_dlp.extractor.gen_extractors():
	92	yield from ie.get_testcases(include_onlymatching)
	93
	94
	95	def getwebpagetestcases():
	96	for ie in yt_dlp.extractor.gen_extractors():
	97	for tc in ie.get_webpage_testcases():
	98	tc.setdefault('add_ie', []).append('Generic')
	99	yield tc
	100
	101
	102	md5 = lambda s: hashlib.md5(s.encode()).hexdigest()
	103
	104
	105	def expect_value(self, got, expected, field):
	106	if isinstance(expected, str) and expected.startswith('re:'):
	107	match_str = expected[len('re:'):]
	108	match_rex = re.compile(match_str)
	109
	110	self.assertTrue(
	111	isinstance(got, str),
	112	f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
	113	self.assertTrue(
	114	match_rex.match(got),
	115	f'field {field} (value: {got!r}) should match {match_str!r}')
	116	elif isinstance(expected, str) and expected.startswith('startswith:'):
	117	start_str = expected[len('startswith:'):]
	118	self.assertTrue(
	119	isinstance(got, str),
	120	f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
	121	self.assertTrue(
	122	got.startswith(start_str),
	123	f'field {field} (value: {got!r}) should start with {start_str!r}')
	124	elif isinstance(expected, str) and expected.startswith('contains:'):
	125	contains_str = expected[len('contains:'):]
	126	self.assertTrue(
	127	isinstance(got, str),
	128	f'Expected a {str.__name__} object, but got {type(got).__name__} for field {field}')
	129	self.assertTrue(
	130	contains_str in got,
	131	f'field {field} (value: {got!r}) should contain {contains_str!r}')
	132	elif isinstance(expected, type):
	133	self.assertTrue(
	134	isinstance(got, expected),
	135	f'Expected type {expected!r} for field {field}, but got value {got!r} of type {type(got)!r}')
	136	elif isinstance(expected, dict) and isinstance(got, dict):
	137	expect_dict(self, got, expected)
	138	elif isinstance(expected, list) and isinstance(got, list):
	139	self.assertEqual(
	140	len(expected), len(got),
	141	'Expect a list of length %d, but got a list of length %d for field %s' % (
	142	len(expected), len(got), field))
	143	for index, (item_got, item_expected) in enumerate(zip(got, expected)):
	144	type_got = type(item_got)
	145	type_expected = type(item_expected)
	146	self.assertEqual(
	147	type_expected, type_got,
	148	'Type mismatch for list item at index %d for field %s, expected %r, got %r' % (
	149	index, field, type_expected, type_got))
	150	expect_value(self, item_got, item_expected, field)
	151	else:
	152	if isinstance(expected, str) and expected.startswith('md5:'):
	153	self.assertTrue(
	154	isinstance(got, str),
	155	f'Expected field {field} to be a unicode object, but got value {got!r} of type {type(got)!r}')
	156	got = 'md5:' + md5(got)
	157	elif isinstance(expected, str) and re.match(r'^(?:min\|max)?count:\d+', expected):
	158	self.assertTrue(
	159	isinstance(got, (list, dict)),
	160	f'Expected field {field} to be a list or a dict, but it is of type {type(got).__name__}')
	161	op, _, expected_num = expected.partition(':')
	162	expected_num = int(expected_num)
	163	if op == 'mincount':
	164	assert_func = assertGreaterEqual
	165	msg_tmpl = 'Expected %d items in field %s, but only got %d'
	166	elif op == 'maxcount':
	167	assert_func = assertLessEqual
	168	msg_tmpl = 'Expected maximum %d items in field %s, but got %d'
	169	elif op == 'count':
	170	assert_func = assertEqual
	171	msg_tmpl = 'Expected exactly %d items in field %s, but got %d'
	172	else:
	173	assert False
	174	assert_func(
	175	self, len(got), expected_num,
	176	msg_tmpl % (expected_num, field, len(got)))
	177	return
	178	self.assertEqual(
	179	expected, got,
	180	f'Invalid value for field {field}, expected {expected!r}, got {got!r}')
	181
	182
	183	def expect_dict(self, got_dict, expected_dict):
	184	for info_field, expected in expected_dict.items():
	185	got = got_dict.get(info_field)
	186	expect_value(self, got, expected, info_field)
	187
	188
	189	def sanitize_got_info_dict(got_dict):
	190	IGNORED_FIELDS = (
	191	*YoutubeDL._format_fields,
	192
	193	# Lists
	194	'formats', 'thumbnails', 'subtitles', 'automatic_captions', 'comments', 'entries',
	195
	196	# Auto-generated
	197	'autonumber', 'playlist', 'format_index', 'video_ext', 'audio_ext', 'duration_string', 'epoch', 'n_entries',
	198	'fulltitle', 'extractor', 'extractor_key', 'filename', 'filepath', 'infojson_filename', 'original_url',
	199
	200	# Only live_status needs to be checked
	201	'is_live', 'was_live',
	202	)
	203
	204	IGNORED_PREFIXES = ('', 'playlist', 'requested', 'webpage')
	205
	206	def sanitize(key, value):
	207	if isinstance(value, str) and len(value) > 100 and key != 'thumbnail':
	208	return f'md5:{md5(value)}'
	209	elif isinstance(value, list) and len(value) > 10:
	210	return f'count:{len(value)}'
	211	elif key.endswith('_count') and isinstance(value, int):
	212	return int
	213	return value
	214
	215	test_info_dict = {
	216	key: sanitize(key, value) for key, value in got_dict.items()
	217	if value is not None and key not in IGNORED_FIELDS and (
	218	not any(key.startswith(f'{prefix}_') for prefix in IGNORED_PREFIXES)
	219	or key == '_old_archive_ids')
	220	}
	221
	222	# display_id may be generated from id
	223	if test_info_dict.get('display_id') == test_info_dict.get('id'):
	224	test_info_dict.pop('display_id')
	225
	226	# Remove deprecated fields
	227	for old in YoutubeDL._deprecated_multivalue_fields.keys():
	228	test_info_dict.pop(old, None)
	229
	230	# release_year may be generated from release_date
	231	if try_call(lambda: test_info_dict['release_year'] == int(test_info_dict['release_date'][:4])):
	232	test_info_dict.pop('release_year')
	233
	234	# Check url for flat entries
	235	if got_dict.get('_type', 'video') != 'video' and got_dict.get('url'):
	236	test_info_dict['url'] = got_dict['url']
	237
	238	return test_info_dict
	239
	240
	241	def expect_info_dict(self, got_dict, expected_dict):
	242	expect_dict(self, got_dict, expected_dict)
	243	# Check for the presence of mandatory fields
	244	if got_dict.get('_type') not in ('playlist', 'multi_video'):
	245	mandatory_fields = ['id', 'title']
	246	if expected_dict.get('ext'):
	247	mandatory_fields.extend(('url', 'ext'))
	248	for key in mandatory_fields:
	249	self.assertTrue(got_dict.get(key), 'Missing mandatory field %s' % key)
	250	# Check for mandatory fields that are automatically set by YoutubeDL
	251	if got_dict.get('_type', 'video') == 'video':
	252	for key in ['webpage_url', 'extractor', 'extractor_key']:
	253	self.assertTrue(got_dict.get(key), 'Missing field: %s' % key)
	254
	255	test_info_dict = sanitize_got_info_dict(got_dict)
	256
	257	missing_keys = set(test_info_dict.keys()) - set(expected_dict.keys())
	258	if missing_keys:
	259	def _repr(v):
	260	if isinstance(v, str):
	261	return "'%s'" % v.replace('\\', '\\\\').replace("'", "\\'").replace('\n', '\\n')
	262	elif isinstance(v, type):
	263	return v.__name__
	264	else:
	265	return repr(v)
	266	info_dict_str = ''.join(
	267	f' {_repr(k)}: {_repr(v)},\n'
	268	for k, v in test_info_dict.items() if k not in missing_keys)
	269	if info_dict_str:
	270	info_dict_str += '\n'
	271	info_dict_str += ''.join(
	272	f' {_repr(k)}: {_repr(test_info_dict[k])},\n'
	273	for k in missing_keys)
	274	info_dict_str = '\n\'info_dict\': {\n' + info_dict_str + '},\n'
	275	write_string(info_dict_str.replace('\n', '\n '), out=sys.stderr)
	276	self.assertFalse(
	277	missing_keys,
	278	'Missing keys in test definition: %s' % (
	279	', '.join(sorted(missing_keys))))
	280
	281
	282	def assertRegexpMatches(self, text, regexp, msg=None):
	283	if hasattr(self, 'assertRegexp'):
	284	return self.assertRegexp(text, regexp, msg)
	285	else:
	286	m = re.match(regexp, text)
	287	if not m:
	288	note = 'Regexp didn\'t match: %r not found' % (regexp)
	289	if len(text) < 1000:
	290	note += ' in %r' % text
	291	if msg is None:
	292	msg = note
	293	else:
	294	msg = note + ', ' + msg
	295	self.assertTrue(m, msg)
	296
	297
	298	def assertGreaterEqual(self, got, expected, msg=None):
	299	if not (got >= expected):
	300	if msg is None:
	301	msg = f'{got!r} not greater than or equal to {expected!r}'
	302	self.assertTrue(got >= expected, msg)
	303
	304
	305	def assertLessEqual(self, got, expected, msg=None):
	306	if not (got <= expected):
	307	if msg is None:
	308	msg = f'{got!r} not less than or equal to {expected!r}'
	309	self.assertTrue(got <= expected, msg)
	310
	311
	312	def assertEqual(self, got, expected, msg=None):
	313	if not (got == expected):
	314	if msg is None:
	315	msg = f'{got!r} not equal to {expected!r}'
	316	self.assertTrue(got == expected, msg)
	317
	318
	319	def expect_warnings(ydl, warnings_re):
	320	real_warning = ydl.report_warning
	321
	322	def _report_warning(w, args, *kwargs):
	323	if not any(re.search(w_re, w) for w_re in warnings_re):
	324	real_warning(w, args, *kwargs)
	325
	326	ydl.report_warning = _report_warning
	327
	328
	329	def http_server_port(httpd):
	330	if os.name == 'java' and isinstance(httpd.socket, ssl.SSLSocket):
	331	# In Jython SSLSocket is not a subclass of socket.socket
	332	sock = httpd.socket.sock
	333	else:
	334	sock = httpd.socket
	335	return sock.getsockname()[1]
	336
	337
	338	def verify_address_availability(address):
	339	if find_available_port(address) is None:
	340	pytest.skip(f'Unable to bind to source address {address} (address may not exist)')