X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/cefecac12cd3c70f9c7a30992c60b05c2eb5d34e..54a63e80af82791d2f0985bd0176bb182963fd5f:/test/test_InfoExtractor.py diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py index bdd01e41a..31e8f8244 100644 --- a/test/test_InfoExtractor.py +++ b/test/test_InfoExtractor.py @@ -1,27 +1,32 @@ -#!/usr/bin/env python - -from __future__ import unicode_literals +#!/usr/bin/env python3 # Allow direct execution -import io import os import sys import unittest + sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) -from test.helper import FakeYDL, expect_dict, expect_value, http_server_port -from youtube_dlc.compat import compat_etree_fromstring, compat_http_server -from youtube_dlc.extractor.common import InfoExtractor -from youtube_dlc.extractor import YoutubeIE, get_info_extractor -from youtube_dlc.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError + +import http.server import threading +from test.helper import FakeYDL, expect_dict, expect_value, http_server_port +from yt_dlp.compat import compat_etree_fromstring +from yt_dlp.extractor import YoutubeIE, get_info_extractor +from yt_dlp.extractor.common import InfoExtractor +from yt_dlp.utils import ( + ExtractorError, + RegexNotFoundError, + encode_data_uri, + strip_jsonp, +) TEAPOT_RESPONSE_STATUS = 418 TEAPOT_RESPONSE_BODY = "

418 I'm a teapot

" -class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler): +class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler): def log_message(self, format, *args): pass @@ -35,13 +40,15 @@ def do_GET(self): assert False -class TestIE(InfoExtractor): - pass +class DummyIE(InfoExtractor): + def _sort_formats(self, formats, field_preference=[]): + self._downloader.sort_formats( + {'formats': formats, '_format_sort_fields': field_preference}) class TestInfoExtractor(unittest.TestCase): def setUp(self): - self.ie = TestIE(FakeYDL()) + self.ie = DummyIE(FakeYDL()) def test_ie_key(self): self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE) @@ -62,6 +69,7 @@ def test_opengraph(self): + ''' self.assertEqual(ie._og_search_title(html), 'Foo') self.assertEqual(ie._og_search_description(html), 'Some video\'s description ') @@ -74,6 +82,7 @@ def test_opengraph(self): self.assertEqual(ie._og_search_property(('test0', 'test1'), html), 'foo > < bar') self.assertRaises(RegexNotFoundError, ie._og_search_property, 'test0', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._og_search_property, ('test0', 'test00'), html, None, fatal=True) + self.assertEqual(ie._og_search_property('test4', html), 'unquoted-value') def test_html_search_meta(self): ie = self.ie @@ -98,6 +107,209 @@ def test_html_search_meta(self): self.assertRaises(RegexNotFoundError, ie._html_search_meta, 'z', html, None, fatal=True) self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True) + def test_search_json_ld_realworld(self): + _TESTS = [ + # https://github.com/ytdl-org/youtube-dl/issues/23306 + ( + r'''''', + { + 'title': '1 On 1 With Kleio', + 'description': 'Kleio Valentien', + 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4', + 'timestamp': 1449347075, + 'duration': 743.0, + 'view_count': 1120958, + 'width': 1920, + 'height': 1080, + }, + {}, + ), + ( + r'''''', + { + 'timestamp': 1636523400, + 'title': 'md5:91fe569e952e4d146485740ae927662b', + }, + {'expected_type': 'NewsArticle'}, + ), + ( + r''' + ''', + { + 'chapters': [ + {'title': 'Explosie Turnhout', 'start_time': 70, 'end_time': 440}, + {'title': 'Jaarwisseling', 'start_time': 440, 'end_time': 1179}, + {'title': 'Natuurbranden Colorado', 'start_time': 1179, 'end_time': 1263}, + {'title': 'Klimaatverandering', 'start_time': 1263, 'end_time': 1367}, + {'title': 'Zacht weer', 'start_time': 1367, 'end_time': 1383}, + {'title': 'Financiële balans', 'start_time': 1383, 'end_time': 1484}, + {'title': 'Club Brugge', 'start_time': 1484, 'end_time': 1575}, + {'title': 'Mentale gezondheid bij topsporters', 'start_time': 1575, 'end_time': 1728}, + {'title': 'Olympische Winterspelen', 'start_time': 1728, 'end_time': 1873}, + {'title': 'Sober oudjaar in Nederland', 'start_time': 1873, 'end_time': 2079.23}, + ], + 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)', + }, {}, + ), + ( + # test multiple thumbnails in a list + r''' +''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ), + ( + # test single thumbnail + r''' +''', + { + 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}], + }, + {}, + ), + ] + for html, expected_dict, search_json_ld_kwargs in _TESTS: + expect_dict( + self, + self.ie._search_json_ld(html, None, **search_json_ld_kwargs), + expected_dict, + ) + def test_download_json(self): uri = encode_data_uri(b'{"foo": "blah"}', 'application/json') self.assertEqual(self.ie._download_json(uri, None), {'foo': 'blah'}) @@ -108,6 +320,18 @@ def test_download_json(self): self.assertEqual(self.ie._download_json(uri, None, fatal=False), None) def test_parse_html5_media_entries(self): + # inline video tag + expect_dict( + self, + self.ie._parse_html5_media_entries( + 'https://127.0.0.1/video.html', + r'