X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/177877c54493d0cb32f65e87ff9ed88a030cfbdb..ac668111128b5f124b4271b3aa4c35f6e71a4749:/test/test_InfoExtractor.py
diff --git a/test/test_InfoExtractor.py b/test/test_InfoExtractor.py
index c4b7f689e..f0571c41a 100644
--- a/test/test_InfoExtractor.py
+++ b/test/test_InfoExtractor.py
@@ -1,27 +1,31 @@
-#!/usr/bin/env python
-
-from __future__ import unicode_literals
-
+#!/usr/bin/env python3
# Allow direct execution
-import io
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from yt_dlp.compat import compat_etree_fromstring, compat_http_server
-from yt_dlp.extractor.common import InfoExtractor
-from yt_dlp.extractor import YoutubeIE, get_info_extractor
-from yt_dlp.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+
+import http.server
import threading
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from yt_dlp.compat import compat_etree_fromstring
+from yt_dlp.extractor import YoutubeIE, get_info_extractor
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ encode_data_uri,
+ strip_jsonp,
+)
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "
418 I'm a teapot
"
-class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
@@ -35,13 +39,13 @@ def do_GET(self):
assert False
-class TestIE(InfoExtractor):
+class DummyIE(InfoExtractor):
pass
class TestInfoExtractor(unittest.TestCase):
def setUp(self):
- self.ie = TestIE(FakeYDL())
+ self.ie = DummyIE(FakeYDL())
def test_ie_key(self):
self.assertEqual(get_info_extractor(YoutubeIE.ie_key()), YoutubeIE)
@@ -99,10 +103,10 @@ def test_html_search_meta(self):
self.assertRaises(RegexNotFoundError, ie._html_search_meta, ('z', 'x'), html, None, fatal=True)
def test_search_json_ld_realworld(self):
- # https://github.com/ytdl-org/youtube-dl/issues/23306
- expect_dict(
- self,
- self.ie._search_json_ld(r'''''', None),
- {
- 'title': '1 On 1 With Kleio',
- 'description': 'Kleio Valentien',
- 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
- 'timestamp': 1449347075,
- 'duration': 743.0,
- 'view_count': 1120958,
- 'width': 1920,
- 'height': 1080,
- })
+ ''',
+ {
+ 'title': '1 On 1 With Kleio',
+ 'description': 'Kleio Valentien',
+ 'url': 'https://gvideo.eporner.com/xN49A1cT3eB/xN49A1cT3eB.mp4',
+ 'timestamp': 1449347075,
+ 'duration': 743.0,
+ 'view_count': 1120958,
+ 'width': 1920,
+ 'height': 1080,
+ },
+ {},
+ ),
+ (
+ r'''''',
+ {
+ 'timestamp': 1636523400,
+ 'title': 'md5:91fe569e952e4d146485740ae927662b',
+ },
+ {'expected_type': 'NewsArticle'},
+ ),
+ (
+ r'''
+ ''',
+ {
+ 'chapters': [
+ {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440},
+ {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179},
+ {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263},
+ {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367},
+ {"title": "Zacht weer", "start_time": 1367, "end_time": 1383},
+ {"title": "Financiële balans", "start_time": 1383, "end_time": 1484},
+ {"title": "Club Brugge", "start_time": 1484, "end_time": 1575},
+ {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728},
+ {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873},
+ {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23}
+ ],
+ 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)'
+ }, {}
+ ),
+ (
+ # test multiple thumbnails in a list
+ r'''
+''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ ),
+ (
+ # test single thumbnail
+ r'''
+''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ )
+ ]
+ for html, expected_dict, search_json_ld_kwargs in _TESTS:
+ expect_dict(
+ self,
+ self.ie._search_json_ld(html, None, **search_json_ld_kwargs),
+ expected_dict
+ )
def test_download_json(self):
uri = encode_data_uri(b'{"foo": "blah"}', 'application/json')
@@ -346,6 +504,24 @@ def test_parse_html5_media_entries(self):
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
@@ -857,8 +1033,7 @@ def test_parse_m3u8_formats(self):
]
for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
- with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f:
formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
@@ -1203,10 +1378,9 @@ def test_parse_mpd_formats(self):
]
for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
@@ -1395,10 +1569,9 @@ def test_parse_ism_formats(self):
]
for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
+ compat_etree_fromstring(f.read().encode()), ism_url=ism_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
expect_value(self, subtitles, expected_subtitles, None)
@@ -1422,10 +1595,9 @@ def test_parse_f4m_formats(self):
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
@@ -1470,10 +1642,9 @@ def test_parse_xspf(self):
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
- with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f:
entries = self.ie._parse_xspf(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
@@ -1486,7 +1657,7 @@ def test_response_with_expected_status_returns_content(self):
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
- httpd = compat_http_server.HTTPServer(
+ httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)