]> jfr.im git - yt-dlp.git/blobdiff - test/test_YoutubeDL.py
[test:download] Raise on network errors (#10283)
[yt-dlp.git] / test / test_YoutubeDL.py
index ee6c52713581071a691ff0ffbcbbf711abb0b206..841ce1af3e7572e778acea18721ad948924b37f7 100644 (file)
@@ -8,10 +8,11 @@
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 
+import contextlib
 import copy
 import json
 
-from test.helper import FakeYDL, assertRegexpMatches
+from test.helper import FakeYDL, assertRegexpMatches, try_rm
 from yt_dlp import YoutubeDL
 from yt_dlp.compat import compat_os_name
 from yt_dlp.extractor import YoutubeIE
@@ -24,6 +25,7 @@
     int_or_none,
     match_filter_func,
 )
+from yt_dlp.utils.traversal import traverse_obj
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -128,8 +130,8 @@ def test(inp, *expected, multi=False):
                 'allow_multiple_audio_streams': multi,
             })
             ydl.process_ie_result(info_dict.copy())
-            downloaded = map(lambda x: x['format_id'], ydl.downloaded_info_dicts)
-            self.assertEqual(list(downloaded), list(expected))
+            downloaded = [x['format_id'] for x in ydl.downloaded_info_dicts]
+            self.assertEqual(downloaded, list(expected))
 
         test('20/47', '47')
         test('20/71/worst', '35')
@@ -139,6 +141,8 @@ def test(inp, *expected, multi=False):
         test('example-with-dashes', 'example-with-dashes')
         test('all', '2', '47', '45', 'example-with-dashes', '35')
         test('mergeall', '2+47+45+example-with-dashes+35', multi=True)
+        # See: https://github.com/yt-dlp/yt-dlp/pulls/8797
+        test('7_a/worst', '35')
 
     def test_format_selection_audio(self):
         formats = [
@@ -180,7 +184,7 @@ def test_format_selection_audio_exts(self):
         ]
 
         info_dict = _make_result(formats)
-        ydl = YDL({'format': 'best'})
+        ydl = YDL({'format': 'best', 'format_sort': ['abr', 'ext']})
         ydl.sort_formats(info_dict)
         ydl.process_ie_result(copy.deepcopy(info_dict))
         downloaded = ydl.downloaded_info_dicts[0]
@@ -192,7 +196,7 @@ def test_format_selection_audio_exts(self):
         downloaded = ydl.downloaded_info_dicts[0]
         self.assertEqual(downloaded['format_id'], 'mp3-64')
 
-        ydl = YDL({'prefer_free_formats': True})
+        ydl = YDL({'prefer_free_formats': True, 'format_sort': ['abr', 'ext']})
         ydl.sort_formats(info_dict)
         ydl.process_ie_result(copy.deepcopy(info_dict))
         downloaded = ydl.downloaded_info_dicts[0]
@@ -512,10 +516,8 @@ def test_format_filtering(self):
         self.assertEqual(downloaded_ids, ['D', 'C', 'B'])
 
         ydl = YDL({'format': 'best[height<40]'})
-        try:
+        with contextlib.suppress(ExtractorError):
             ydl.process_ie_result(info_dict)
-        except ExtractorError:
-            pass
         self.assertEqual(ydl.downloaded_info_dicts, [])
 
     def test_default_format_spec(self):
@@ -649,8 +651,8 @@ def test_add_extra_info(self):
         'formats': [
             {'id': 'id 1', 'height': 1080, 'width': 1920},
             {'id': 'id 2', 'height': 720},
-            {'id': 'id 3'}
-        ]
+            {'id': 'id 3'},
+        ],
     }
 
     def test_prepare_outtmpl_and_filename(self):
@@ -668,7 +670,7 @@ def test(tmpl, expected, *, info=None, **params):
             for (name, got), expect in zip((('outtmpl', out), ('filename', fname)), expected):
                 if callable(expect):
                     self.assertTrue(expect(got), f'Wrong {name} from {tmpl}')
-                else:
+                elif expect is not None:
                     self.assertEqual(got, expect, f'Wrong {name} from {tmpl}')
 
         # Side-effects
@@ -683,7 +685,8 @@ def test(tmpl, expected, *, info=None, **params):
         test('%(id)s.%(ext)s', '1234.mp4')
         test('%(duration_string)s', ('27:46:40', '27-46-40'))
         test('%(resolution)s', '1080p')
-        test('%(playlist_index)s', '001')
+        test('%(playlist_index|)s', '001')
+        test('%(playlist_index&{}!)s', '1!')
         test('%(playlist_autonumber)s', '02')
         test('%(autonumber)s', '00001')
         test('%(autonumber+2)03d', '005', autonumber_start=3)
@@ -726,7 +729,7 @@ def expect_same_infodict(out):
                 self.assertEqual(got_dict.get(info_field), expected, info_field)
             return True
 
-        test('%()j', (expect_same_infodict, str))
+        test('%()j', (expect_same_infodict, None))
 
         # NA placeholder
         NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
@@ -754,20 +757,23 @@ def expect_same_infodict(out):
         test('%(ext)c', 'm')
         test('%(id)d %(id)r', "1234 '1234'")
         test('%(id)r %(height)r', "'1234' 1080")
+        test('%(title5)a %(height)a', (R"'\xe1\xe9\xed \U0001d400' 1080", None))
         test('%(ext)s-%(ext|def)d', 'mp4-def')
-        test('%(width|0)04d', '0000')
+        test('%(width|0)04d', '0')
         test('a%(width|b)d', 'ab', outtmpl_na_placeholder='none')
 
         FORMATS = self.outtmpl_info['formats']
-        sanitize = lambda x: x.replace(':', ':').replace('"', """).replace('\n', ' ')
 
         # Custom type casting
         test('%(formats.:.id)l', 'id 1, id 2, id 3')
         test('%(formats.:.id)#l', ('id 1\nid 2\nid 3', 'id 1 id 2 id 3'))
         test('%(ext)l', 'mp4')
         test('%(formats.:.id) 18l', '  id 1, id 2, id 3')
-        test('%(formats)j', (json.dumps(FORMATS), sanitize(json.dumps(FORMATS))))
-        test('%(formats)#j', (json.dumps(FORMATS, indent=4), sanitize(json.dumps(FORMATS, indent=4))))
+        test('%(formats)j', (json.dumps(FORMATS), None))
+        test('%(formats)#j', (
+            json.dumps(FORMATS, indent=4),
+            json.dumps(FORMATS, indent=4).replace(':', ':').replace('"', '"').replace('\n', ' '),
+        ))
         test('%(title5).3B', 'á')
         test('%(title5)U', 'áéí 𝐀')
         test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
@@ -779,9 +785,9 @@ def expect_same_infodict(out):
         test('%(title4)#S', 'foo_bar_test')
         test('%(title4).10S', ('foo "bar" ', 'foo "bar"' + ('#' if compat_os_name == 'nt' else ' ')))
         if compat_os_name == 'nt':
-            test('%(title4)q', ('"foo \\"bar\\" test"', ""foo ⧹"bar⧹" test""))
-            test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', '"id 1" "id 2" "id 3"'))
-            test('%(formats.0.id)#q', ('"id 1"', '"id 1"'))
+            test('%(title4)q', ('"foo ""bar"" test"', None))
+            test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', None))
+            test('%(formats.0.id)#q', ('"id 1"', None))
         else:
             test('%(title4)q', ('\'foo "bar" test\'', '\'foo "bar" test\''))
             test('%(formats.:.id)#q', "'id 1' 'id 2' 'id 3'")
@@ -792,8 +798,9 @@ def expect_same_infodict(out):
         test('%(title|%)s %(title|%%)s', '% %%')
         test('%(id+1-height+3)05d', '00158')
         test('%(width+100)05d', 'NA')
-        test('%(formats.0) 15s', ('% 15s' % FORMATS[0], '% 15s' % sanitize(str(FORMATS[0]))))
-        test('%(formats.0)r', (repr(FORMATS[0]), sanitize(repr(FORMATS[0]))))
+        test('%(filesize*8)d', '8192')
+        test('%(formats.0) 15s', ('% 15s' % FORMATS[0], None))
+        test('%(formats.0)r', (repr(FORMATS[0]), None))
         test('%(height.0)03d', '001')
         test('%(-height.0)04d', '-001')
         test('%(formats.-1.id)s', FORMATS[-1]['id'])
@@ -805,7 +812,7 @@ def expect_same_infodict(out):
         out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
                           if 'height' in f else {'id': f['id']}
                           for f in FORMATS])
-        test('%(formats.:.{id,height.:2})j', (out, sanitize(out)))
+        test('%(formats.:.{id,height.:2})j', (out, None))
         test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
         test('%(.{id,title})j', ('{"id": "1234"}', '{"id": "1234"}'))
 
@@ -825,6 +832,7 @@ def expect_same_infodict(out):
         test('%(id&hi {:>10} {}|)s', 'hi       1234 1234')
         test(R'%(id&{0} {}|)s', 'NA')
         test(R'%(id&{0.1}|)s', 'NA')
+        test('%(height&{:,d})S', '1,080')
 
         # Laziness
         def gen():
@@ -834,8 +842,8 @@ def gen():
 
         # Empty filename
         test('%(foo|)s-%(bar|)s.%(ext)s', '-.mp4')
-        # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4'))  # fixme
-        # test('%(foo|)s', ('', '_'))  # fixme
+        # test('%(foo|)s.%(ext)s', ('.mp4', '_.mp4'))  # FIXME: ?
+        # test('%(foo|)s', ('', '_'))  # FIXME: ?
 
         # Environment variable expansion for prepare_filename
         os.environ['__yt_dlp_var'] = 'expanded'
@@ -852,7 +860,7 @@ def gen():
         test('Hello %(title1)s', 'Hello $PATH')
         test('Hello %(title2)s', 'Hello %PATH%')
         test('%(title3)s', ('foo/bar\\test', 'foo⧸bar⧹test'))
-        test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo⧸bar⧹test' % os.path.sep))
+        test('folder/%(title3)s', ('folder/foo/bar\\test', f'folder{os.path.sep}foo⧸bar⧹test'))
 
     def test_format_note(self):
         ydl = YoutubeDL()
@@ -874,22 +882,22 @@ def run(self, info):
                     f.write('EXAMPLE')
                 return [info['filepath']], info
 
-        def run_pp(params, PP):
+        def run_pp(params, pp):
             with open(filename, 'w') as f:
                 f.write('EXAMPLE')
             ydl = YoutubeDL(params)
-            ydl.add_post_processor(PP())
+            ydl.add_post_processor(pp())
             ydl.post_process(filename, {'filepath': filename})
 
         run_pp({'keepvideo': True}, SimplePP)
-        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
-        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist')
+        self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist')
         os.unlink(filename)
         os.unlink(audiofile)
 
         run_pp({'keepvideo': False}, SimplePP)
-        self.assertFalse(os.path.exists(filename), '%s exists' % filename)
-        self.assertTrue(os.path.exists(audiofile), '%s doesn\'t exist' % audiofile)
+        self.assertFalse(os.path.exists(filename), f'{filename} exists')
+        self.assertTrue(os.path.exists(audiofile), f'{audiofile} doesn\'t exist')
         os.unlink(audiofile)
 
         class ModifierPP(PostProcessor):
@@ -899,7 +907,7 @@ def run(self, info):
                 return [], info
 
         run_pp({'keepvideo': False}, ModifierPP)
-        self.assertTrue(os.path.exists(filename), '%s doesn\'t exist' % filename)
+        self.assertTrue(os.path.exists(filename), f'{filename} doesn\'t exist')
         os.unlink(filename)
 
     def test_match_filter(self):
@@ -911,7 +919,7 @@ def test_match_filter(self):
             'duration': 30,
             'filesize': 10 * 1024,
             'playlist_id': '42',
-            'uploader': "變態妍字幕版 太妍 тест",
+            'uploader': '變態妍字幕版 太妍 тест',
             'creator': "тест ' 123 ' тест--",
             'webpage_url': 'http://example.com/watch?v=shenanigans',
         }
@@ -924,7 +932,7 @@ def test_match_filter(self):
             'description': 'foo',
             'filesize': 5 * 1024,
             'playlist_id': '43',
-            'uploader': "тест 123",
+            'uploader': 'тест 123',
             'webpage_url': 'http://example.com/watch?v=SHENANIGANS',
         }
         videos = [first, second]
@@ -932,7 +940,7 @@ def test_match_filter(self):
         def get_videos(filter_=None):
             ydl = YDL({'match_filter': filter_, 'simulate': True})
             for v in videos:
-                ydl.process_ie_result(v, download=True)
+                ydl.process_ie_result(v.copy(), download=True)
             return [v['id'] for v in ydl.downloaded_info_dicts]
 
         res = get_videos()
@@ -1171,7 +1179,7 @@ def _real_extract(self, url):
                     })
                 return {
                     'id': video_id,
-                    'title': 'Video %s' % video_id,
+                    'title': f'Video {video_id}',
                     'formats': formats,
                 }
 
@@ -1185,8 +1193,8 @@ def _entries(self):
                         '_type': 'url_transparent',
                         'ie_key': VideoIE.ie_key(),
                         'id': video_id,
-                        'url': 'video:%s' % video_id,
-                        'title': 'Video Transparent %s' % video_id,
+                        'url': f'video:{video_id}',
+                        'title': f'Video Transparent {video_id}',
                     }
 
             def _real_extract(self, url):
@@ -1209,6 +1217,129 @@ def _real_extract(self, url):
         self.assertEqual(downloaded['extractor'], 'Video')
         self.assertEqual(downloaded['extractor_key'], 'Video')
 
+    def test_header_cookies(self):
+        from http.cookiejar import Cookie
+
+        ydl = FakeYDL()
+        ydl.report_warning = lambda *_, **__: None
+
+        def cookie(name, value, version=None, domain='', path='', secure=False, expires=None):
+            return Cookie(
+                version or 0, name, value, None, False,
+                domain, bool(domain), bool(domain), path, bool(path),
+                secure, expires, False, None, None, rest={})
+
+        _test_url = 'https://yt.dlp/test'
+
+        def test(encoded_cookies, cookies, *, headers=False, round_trip=None, error_re=None):
+            def _test():
+                ydl.cookiejar.clear()
+                ydl._load_cookies(encoded_cookies, autoscope=headers)
+                if headers:
+                    ydl._apply_header_cookies(_test_url)
+                data = {'url': _test_url}
+                ydl._calc_headers(data)
+                self.assertCountEqual(
+                    map(vars, ydl.cookiejar), map(vars, cookies),
+                    'Extracted cookiejar.Cookie is not the same')
+                if not headers:
+                    self.assertEqual(
+                        data.get('cookies'), round_trip or encoded_cookies,
+                        'Cookie is not the same as round trip')
+                ydl.__dict__['_YoutubeDL__header_cookies'] = []
+
+            with self.subTest(msg=encoded_cookies):
+                if not error_re:
+                    _test()
+                    return
+                with self.assertRaisesRegex(Exception, error_re):
+                    _test()
+
+        test('test=value; Domain=.yt.dlp', [cookie('test', 'value', domain='.yt.dlp')])
+        test('test=value', [cookie('test', 'value')], error_re=r'Unscoped cookies are not allowed')
+        test('cookie1=value1; Domain=.yt.dlp; Path=/test; cookie2=value2; Domain=.yt.dlp; Path=/', [
+            cookie('cookie1', 'value1', domain='.yt.dlp', path='/test'),
+            cookie('cookie2', 'value2', domain='.yt.dlp', path='/')])
+        test('test=value; Domain=.yt.dlp; Path=/test; Secure; Expires=9999999999', [
+            cookie('test', 'value', domain='.yt.dlp', path='/test', secure=True, expires=9999999999)])
+        test('test="value; "; path=/test; domain=.yt.dlp', [
+            cookie('test', 'value; ', domain='.yt.dlp', path='/test')],
+            round_trip='test="value\\073 "; Domain=.yt.dlp; Path=/test')
+        test('name=; Domain=.yt.dlp', [cookie('name', '', domain='.yt.dlp')],
+             round_trip='name=""; Domain=.yt.dlp')
+
+        test('test=value', [cookie('test', 'value', domain='.yt.dlp')], headers=True)
+        test('cookie1=value; Domain=.yt.dlp; cookie2=value', [], headers=True, error_re=r'Invalid syntax')
+        ydl.deprecated_feature = ydl.report_error
+        test('test=value', [], headers=True, error_re=r'Passing cookies as a header is a potential security risk')
+
+    def test_infojson_cookies(self):
+        TEST_FILE = 'test_infojson_cookies.info.json'
+        TEST_URL = 'https://example.com/example.mp4'
+        COOKIES = 'a=b; Domain=.example.com; c=d; Domain=.example.com'
+        COOKIE_HEADER = {'Cookie': 'a=b; c=d'}
+
+        ydl = FakeYDL()
+        ydl.process_info = lambda x: ydl._write_info_json('test', x, TEST_FILE)
+
+        def make_info(info_header_cookies=False, fmts_header_cookies=False, cookies_field=False):
+            fmt = {'url': TEST_URL}
+            if fmts_header_cookies:
+                fmt['http_headers'] = COOKIE_HEADER
+            if cookies_field:
+                fmt['cookies'] = COOKIES
+            return _make_result([fmt], http_headers=COOKIE_HEADER if info_header_cookies else None)
+
+        def test(initial_info, note):
+            result = {}
+            result['processed'] = ydl.process_ie_result(initial_info)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=f'No cookies set in cookiejar after initial process when {note}')
+            ydl.cookiejar.clear()
+            with open(TEST_FILE) as infojson:
+                result['loaded'] = ydl.sanitize_info(json.load(infojson), True)
+            result['final'] = ydl.process_ie_result(result['loaded'].copy(), download=False)
+            self.assertTrue(ydl.cookiejar.get_cookies_for_url(TEST_URL),
+                            msg=f'No cookies set in cookiejar after final process when {note}')
+            ydl.cookiejar.clear()
+            for key in ('processed', 'loaded', 'final'):
+                info = result[key]
+                self.assertIsNone(
+                    traverse_obj(info, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False),
+                    msg=f'Cookie header not removed in {key} result when {note}')
+                self.assertEqual(
+                    traverse_obj(info, ((None, ('formats', 0)), 'cookies'), get_all=False), COOKIES,
+                    msg=f'No cookies field found in {key} result when {note}')
+
+        test({'url': TEST_URL, 'http_headers': COOKIE_HEADER, 'id': '1', 'title': 'x'}, 'no formats field')
+        test(make_info(info_header_cookies=True), 'info_dict header cokies')
+        test(make_info(fmts_header_cookies=True), 'format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True), 'info_dict and format header cookies')
+        test(make_info(info_header_cookies=True, fmts_header_cookies=True, cookies_field=True), 'all cookies fields')
+        test(make_info(cookies_field=True), 'cookies format field')
+        test({'url': TEST_URL, 'cookies': COOKIES, 'id': '1', 'title': 'x'}, 'info_dict cookies field only')
+
+        try_rm(TEST_FILE)
+
+    def test_add_headers_cookie(self):
+        def check_for_cookie_header(result):
+            return traverse_obj(result, ((None, ('formats', 0)), 'http_headers', 'Cookie'), casesense=False, get_all=False)
+
+        ydl = FakeYDL({'http_headers': {'Cookie': 'a=b'}})
+        ydl._apply_header_cookies(_make_result([])['webpage_url'])  # Scope to input webpage URL: .example.com
+
+        fmt = {'url': 'https://example.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies in result info_dict')
+        self.assertEqual(result.get('cookies'), 'a=b; Domain=.example.com', msg='No cookies were set in cookies field')
+        self.assertIn('a=b', ydl.cookiejar.get_cookie_header(fmt['url']), msg='No cookies were set in cookiejar')
+
+        fmt = {'url': 'https://wrong.com/video.mp4'}
+        result = ydl.process_ie_result(_make_result([fmt]), download=False)
+        self.assertIsNone(check_for_cookie_header(result), msg='http_headers cookies for wrong domain')
+        self.assertFalse(result.get('cookies'), msg='Cookies set in cookies field for wrong domain')
+        self.assertFalse(ydl.cookiejar.get_cookie_header(fmt['url']), msg='Cookies set in cookiejar for wrong domain')
+
 
 if __name__ == '__main__':
     unittest.main()