[yt-dlp.git] / test / test_download.py

#!/usr/bin/env python

# Allow direct execution
import os
import sys
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

from test.helper import (
    get_params,
    get_testcases,
    try_rm,
    md5,
    report_warning
)


import hashlib
import io
import json
import re
import socket

import youtube_dl.YoutubeDL
from youtube_dl.utils import (
    compat_http_client,
    compat_str,
    compat_urllib_error,
    compat_HTTPError,
    DownloadError,
    ExtractorError,
    UnavailableVideoError,
)
from youtube_dl.extractor import get_info_extractor

RETRIES = 3

class YoutubeDL(youtube_dl.YoutubeDL):
    def __init__(self, *args, **kwargs):
        self.to_stderr = self.to_screen
        self.processed_info_dicts = []
        super(YoutubeDL, self).__init__(*args, **kwargs)
    def report_warning(self, message):
        # Don't accept warnings during tests
        raise ExtractorError(message)
    def process_info(self, info_dict):
        self.processed_info_dicts.append(info_dict)
        return super(YoutubeDL, self).process_info(info_dict)

def _file_md5(fn):
    with open(fn, 'rb') as f:
        return hashlib.md5(f.read()).hexdigest()

defs = get_testcases()


class TestDownload(unittest.TestCase):
    maxDiff = None
    def setUp(self):
        self.defs = defs

### Dynamically generate tests
def generator(test_case):

    def test_template(self):
        ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
        other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
        def print_skipping(reason):
            print('Skipping %s: %s' % (test_case['name'], reason))
        if not ie.working():
            print_skipping('IE marked as not _WORKING')
            return
        if 'playlist' not in test_case:
            info_dict = test_case.get('info_dict', {})
            if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
                print_skipping('The output file cannot be know, the "file" '
                    'key is missing or the info_dict is incomplete')
                return
        if 'skip' in test_case:
            print_skipping(test_case['skip'])
            return
        for other_ie in other_ies:
            if not other_ie.working():
                print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
                return

        params = get_params(test_case.get('params', {}))

        ydl = YoutubeDL(params)
        ydl.add_default_info_extractors()
        finished_hook_called = set()
        def _hook(status):
            if status['status'] == 'finished':
                finished_hook_called.add(status['filename'])
        ydl.add_progress_hook(_hook)

        def get_tc_filename(tc):
            return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))

        test_cases = test_case.get('playlist', [test_case])
        def try_rm_tcs_files():
            for tc in test_cases:
                tc_filename = get_tc_filename(tc)
                try_rm(tc_filename)
                try_rm(tc_filename + '.part')
                try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
        try_rm_tcs_files()
        try:
            try_num = 1
            while True:
                try:
                    ydl.download([test_case['url']])
                except (DownloadError, ExtractorError) as err:
                    # Check if the exception is not a network related one
                    if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
                        raise

                    if try_num == RETRIES:
                        report_warning(u'Failed due to network errors, skipping...')
                        return

                    print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))

                    try_num += 1
                else:
                    break

            for tc in test_cases:
                tc_filename = get_tc_filename(tc)
                if not test_case.get('params', {}).get('skip_download', False):
                    self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
                    self.assertTrue(tc_filename in finished_hook_called)
                info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
                self.assertTrue(os.path.exists(info_json_fn))
                if 'md5' in tc:
                    md5_for_file = _file_md5(tc_filename)
                    self.assertEqual(md5_for_file, tc['md5'])
                with io.open(info_json_fn, encoding='utf-8') as infof:
                    info_dict = json.load(infof)
                for (info_field, expected) in tc.get('info_dict', {}).items():
                    if isinstance(expected, compat_str) and expected.startswith('re:'):
                        got = info_dict.get(info_field)
                        match_str = expected[len('re:'):]
                        match_rex = re.compile(match_str)

                        self.assertTrue(
                            isinstance(got, compat_str) and match_rex.match(got),
                            u'field %s (value: %r) should match %r' % (info_field, got, match_str))
                    else:
                        if isinstance(expected, compat_str) and expected.startswith('md5:'):
                            got = 'md5:' + md5(info_dict.get(info_field))
                        else:
                            got = info_dict.get(info_field)
                        self.assertEqual(expected, got,
                            u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))

                # If checkable fields are missing from the test case, print the info_dict
                test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
                    for key, value in info_dict.items()
                    if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
                if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
                    sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')

                # Check for the presence of mandatory fields
                for key in ('id', 'url', 'title', 'ext'):
                    self.assertTrue(key in info_dict.keys() and info_dict[key])
                # Check for mandatory fields that are automatically set by YoutubeDL
                for key in ['webpage_url', 'extractor', 'extractor_key']:
                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
        finally:
            try_rm_tcs_files()

    return test_template

### And add them to TestDownload
for n, test_case in enumerate(defs):
    test_method = generator(test_case)
    tname = 'test_' + str(test_case['name'])
    i = 1
    while hasattr(TestDownload, tname):
        tname = 'test_'  + str(test_case['name']) + '_' + str(i)
        i += 1
    test_method.__name__ = tname
    setattr(TestDownload, test_method.__name__, test_method)
    del test_method


if __name__ == '__main__':
    unittest.main()
Commit	Line	Data
fd5ff020 FV	1	#!/usr/bin/env python
fd5ff020 FV	2
44a5f171 PH	3	# Allow direct execution
	4	import os
	5	import sys
	6	import unittest
	7	sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
	8
dd508b7c FV	9	from test.helper import (
	10	get_params,
	11	get_testcases,
dd508b7c FV	12	try_rm,
	13	md5,
	14	report_warning
	15	)
44a5f171 PH	16
44a5f171 PH	17
efe8902f	18	import hashlib
fd5ff020	19	import io
7f60b5aa	20	import json
491ed3dd	21	import re
6b3aef80	22	import socket
cdab8aa3	23
8222d8de	24	import youtube_dl.YoutubeDL
44a5f171	25	from youtube_dl.utils import (
dcf3eec4	26	compat_http_client,
44a5f171 PH	27	compat_str,
44a5f171 PH	28	compat_urllib_error,
f6cc16f5	29	compat_HTTPError,
44a5f171 PH	30	DownloadError,
	31	ExtractorError,
	32	UnavailableVideoError,
	33	)
9ee2b5f6	34	from youtube_dl.extractor import get_info_extractor
fd5ff020	35
8cc83b8d FV	36	RETRIES = 3
8cc83b8d FV	37
8222d8de	38	class YoutubeDL(youtube_dl.YoutubeDL):
fd5ff020	39	def __init__(self, args, *kwargs):
fd5ff020	40	self.to_stderr = self.to_screen
0eaf520d	41	self.processed_info_dicts = []
8222d8de	42	super(YoutubeDL, self).__init__(args, *kwargs)
476203d0	43	def report_warning(self, message):
be95cac1 FV	44	# Don't accept warnings during tests
be95cac1 FV	45	raise ExtractorError(message)
0eaf520d FV	46	def process_info(self, info_dict):
0eaf520d FV	47	self.processed_info_dicts.append(info_dict)
8222d8de	48	return super(YoutubeDL, self).process_info(info_dict)
1535ac2a	49
fd5ff020 FV	50	def _file_md5(fn):
	51	with open(fn, 'rb') as f:
	52	return hashlib.md5(f.read()).hexdigest()
	53
fc2c063e	54	defs = get_testcases()
6b47c7f2	55
0eaf520d	56
1535ac2a	57	class TestDownload(unittest.TestCase):
744435f2	58	maxDiff = None
fd5ff020	59	def setUp(self):
fd5ff020 FV	60	self.defs = defs
fd5ff020 FV	61
911ee27e	62	### Dynamically generate tests
5d01a647 PH	63	def generator(test_case):
5d01a647 PH	64
1535ac2a	65	def test_template(self):
d1cade5a	66	ie = youtube_dl.extractor.get_info_extractor(test_case['name'])
9ee2b5f6	67	other_ies = [get_info_extractor(ie_key) for ie_key in test_case.get('add_ie', [])]
bc2884af JMF	68	def print_skipping(reason):
bc2884af JMF	69	print('Skipping %s: %s' % (test_case['name'], reason))
9ee2b5f6	70	if not ie.working():
bc2884af	71	print_skipping('IE marked as not _WORKING')
fd5ff020	72	return
702665c0 JMF	73	if 'playlist' not in test_case:
	74	info_dict = test_case.get('info_dict', {})
	75	if not test_case.get('file') and not (info_dict.get('id') and info_dict.get('ext')):
	76	print_skipping('The output file cannot be know, the "file" '
	77	'key is missing or the info_dict is incomplete')
	78	return
fd5ff020	79	if 'skip' in test_case:
bc2884af	80	print_skipping(test_case['skip'])
fd5ff020	81	return
9ee2b5f6 JMF	82	for other_ie in other_ies:
	83	if not other_ie.working():
	84	print_skipping(u'test depends on %sIE, marked as not WORKING' % other_ie.ie_key())
	85	return
0eaf520d	86
44a5f171	87	params = get_params(test_case.get('params', {}))
0eaf520d	88
8222d8de	89	ydl = YoutubeDL(params)
023fa8c4	90	ydl.add_default_info_extractors()
bffbd5f0 PH	91	finished_hook_called = set()
	92	def _hook(status):
	93	if status['status'] == 'finished':
	94	finished_hook_called.add(status['filename'])
933605d7	95	ydl.add_progress_hook(_hook)
5c892b0b	96
702665c0 JMF	97	def get_tc_filename(tc):
	98	return tc.get('file') or ydl.prepare_filename(tc.get('info_dict', {}))
	99
5c892b0b	100	test_cases = test_case.get('playlist', [test_case])
702665c0 JMF	101	def try_rm_tcs_files():
	102	for tc in test_cases:
	103	tc_filename = get_tc_filename(tc)
	104	try_rm(tc_filename)
	105	try_rm(tc_filename + '.part')
4eb92208	106	try_rm(os.path.splitext(tc_filename)[0] + '.info.json')
702665c0	107	try_rm_tcs_files()
5c892b0b	108	try:
dd508b7c FV	109	try_num = 1
dd508b7c FV	110	while True:
8cc83b8d	111	try:
8222d8de	112	ydl.download([test_case['url']])
8cc83b8d	113	except (DownloadError, ExtractorError) as err:
8cc83b8d	114	# Check if the exception is not a network related one
dcf3eec4	115	if not err.exc_info[0] in (compat_urllib_error.URLError, socket.timeout, UnavailableVideoError, compat_http_client.BadStatusLine) or (err.exc_info[0] == compat_HTTPError and err.exc_info[1].code == 503):
8cc83b8d FV	116	raise
8cc83b8d FV	117
dd508b7c FV	118	if try_num == RETRIES:
	119	report_warning(u'Failed due to network errors, skipping...')
	120	return
	121
	122	print('Retrying: {0} failed tries\n\n##########\n\n'.format(try_num))
	123
	124	try_num += 1
8cc83b8d FV	125	else:
8cc83b8d FV	126	break
5c892b0b PH	127
5c892b0b PH	128	for tc in test_cases:
702665c0	129	tc_filename = get_tc_filename(tc)
511eda8e	130	if not test_case.get('params', {}).get('skip_download', False):
702665c0 JMF	131	self.assertTrue(os.path.exists(tc_filename), msg='Missing file ' + tc_filename)
702665c0 JMF	132	self.assertTrue(tc_filename in finished_hook_called)
4eb92208 PH	133	info_json_fn = os.path.splitext(tc_filename)[0] + '.info.json'
4eb92208 PH	134	self.assertTrue(os.path.exists(info_json_fn))
5c892b0b	135	if 'md5' in tc:
702665c0	136	md5_for_file = _file_md5(tc_filename)
5c892b0b	137	self.assertEqual(md5_for_file, tc['md5'])
4eb92208	138	with io.open(info_json_fn, encoding='utf-8') as infof:
5c892b0b	139	info_dict = json.load(infof)
51ce3a75	140	for (info_field, expected) in tc.get('info_dict', {}).items():
491ed3dd	141	if isinstance(expected, compat_str) and expected.startswith('re:'):
51ce3a75	142	got = info_dict.get(info_field)
491ed3dd PH	143	match_str = expected[len('re:'):]
	144	match_rex = re.compile(match_str)
	145
	146	self.assertTrue(
	147	isinstance(got, compat_str) and match_rex.match(got),
	148	u'field %s (value: %r) should match %r' % (info_field, got, match_str))
	149	else:
	150	if isinstance(expected, compat_str) and expected.startswith('md5:'):
	151	got = 'md5:' + md5(info_dict.get(info_field))
	152	else:
	153	got = info_dict.get(info_field)
	154	self.assertEqual(expected, got,
	155	u'invalid value for field %s, expected %r, got %r' % (info_field, expected, got))
78d3442b FV	156
78d3442b FV	157	# If checkable fields are missing from the test case, print the info_dict
ee55fcbe	158	test_info_dict = dict((key, value if not isinstance(value, compat_str) or len(value) < 250 else 'md5:' + md5(value))
78d3442b FV	159	for key, value in info_dict.items()
	160	if value and key in ('title', 'description', 'uploader', 'upload_date', 'uploader_id', 'location'))
	161	if not all(key in tc.get('info_dict', {}).keys() for key in test_info_dict.keys()):
484aaeb2	162	sys.stderr.write(u'\n"info_dict": ' + json.dumps(test_info_dict, ensure_ascii=False, indent=4) + u'\n')
78d3442b FV	163
	164	# Check for the presence of mandatory fields
	165	for key in ('id', 'url', 'title', 'ext'):
	166	self.assertTrue(key in info_dict.keys() and info_dict[key])
9103bbc5	167	# Check for mandatory fields that are automatically set by YoutubeDL
be97abc2	168	for key in ['webpage_url', 'extractor', 'extractor_key']:
9103bbc5	169	self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
5c892b0b	170	finally:
702665c0	171	try_rm_tcs_files()
fd5ff020	172
1535ac2a	173	return test_template
fd5ff020	174
5d01a647	175	### And add them to TestDownload
f7ab6cbe	176	for n, test_case in enumerate(defs):
5d01a647	177	test_method = generator(test_case)
2eb88d95 PH	178	tname = 'test_' + str(test_case['name'])
	179	i = 1
	180	while hasattr(TestDownload, tname):
41beccba	181	tname = 'test_' + str(test_case['name']) + '_' + str(i)
2eb88d95 PH	182	i += 1
2eb88d95 PH	183	test_method.__name__ = tname
fd5ff020	184	setattr(TestDownload, test_method.__name__, test_method)
5d01a647	185	del test_method
cdab8aa3 PH	186
	187
	188	if __name__ == '__main__':
	189	unittest.main()