#!/usr/bin/env python3
-from __future__ import unicode_literals
-
# Allow direct execution
-import io
import os
import sys
import unittest
+
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
-from yt_dlp.compat import compat_etree_fromstring, compat_http_server
-from yt_dlp.extractor.common import InfoExtractor
-from yt_dlp.extractor import YoutubeIE, get_info_extractor
-from yt_dlp.utils import encode_data_uri, strip_jsonp, ExtractorError, RegexNotFoundError
+
+import http.server
import threading
+from test.helper import FakeYDL, expect_dict, expect_value, http_server_port
+from yt_dlp.compat import compat_etree_fromstring
+from yt_dlp.extractor import YoutubeIE, get_info_extractor
+from yt_dlp.extractor.common import InfoExtractor
+from yt_dlp.utils import (
+ ExtractorError,
+ RegexNotFoundError,
+ encode_data_uri,
+ strip_jsonp,
+)
TEAPOT_RESPONSE_STATUS = 418
TEAPOT_RESPONSE_BODY = "<h1>418 I'm a teapot</h1>"
-class InfoExtractorTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
+class InfoExtractorTestRequestHandler(http.server.BaseHTTPRequestHandler):
def log_message(self, format, *args):
pass
},
{'expected_type': 'NewsArticle'},
),
+ (
+ r'''<script type="application/ld+json">
+ {"url":"/vrtnu/a-z/het-journaal/2021/het-journaal-het-journaal-19u-20211231/",
+ "name":"Het journaal 19u",
+ "description":"Het journaal 19u van vrijdag 31 december 2021.",
+ "potentialAction":{"url":"https://vrtnu.page.link/pfVy6ihgCAJKgHqe8","@type":"ShareAction"},
+ "mainEntityOfPage":{"@id":"1640092242445","@type":"WebPage"},
+ "publication":[{
+ "startDate":"2021-12-31T19:00:00.000+01:00",
+ "endDate":"2022-01-30T23:55:00.000+01:00",
+ "publishedBy":{"name":"een","@type":"Organization"},
+ "publishedOn":{"url":"https://www.vrt.be/vrtnu/","name":"VRT NU","@type":"BroadcastService"},
+ "@id":"pbs-pub-3a7ec233-da95-4c1e-9b2b-cf5fdfebcbe8",
+ "@type":"BroadcastEvent"
+ }],
+ "video":{
+ "name":"Het journaal - Aflevering 365 (Seizoen 2021)",
+ "description":"Het journaal 19u van vrijdag 31 december 2021. Bekijk aflevering 365 van seizoen 2021 met VRT NU via de site of app.",
+ "thumbnailUrl":"//images.vrt.be/width1280/2021/12/31/80d5ed00-6a64-11ec-b07d-02b7b76bf47f.jpg",
+ "expires":"2022-01-30T23:55:00.000+01:00",
+ "hasPart":[
+ {"name":"Explosie Turnhout","startOffset":70,"@type":"Clip"},
+ {"name":"Jaarwisseling","startOffset":440,"@type":"Clip"},
+ {"name":"Natuurbranden Colorado","startOffset":1179,"@type":"Clip"},
+ {"name":"Klimaatverandering","startOffset":1263,"@type":"Clip"},
+ {"name":"Zacht weer","startOffset":1367,"@type":"Clip"},
+ {"name":"Financiƫle balans","startOffset":1383,"@type":"Clip"},
+ {"name":"Club Brugge","startOffset":1484,"@type":"Clip"},
+ {"name":"Mentale gezondheid bij topsporters","startOffset":1575,"@type":"Clip"},
+ {"name":"Olympische Winterspelen","startOffset":1728,"@type":"Clip"},
+ {"name":"Sober oudjaar in Nederland","startOffset":1873,"@type":"Clip"}
+ ],
+ "duration":"PT34M39.23S",
+ "uploadDate":"2021-12-31T19:00:00.000+01:00",
+ "@id":"vid-9457d0c6-b8ac-4aba-b5e1-15aa3a3295b5",
+ "@type":"VideoObject"
+ },
+ "genre":["Nieuws en actua"],
+ "episodeNumber":365,
+ "partOfSeries":{"name":"Het journaal","@id":"222831405527","@type":"TVSeries"},
+ "partOfSeason":{"name":"Seizoen 2021","@id":"961809365527","@type":"TVSeason"},
+ "@context":"https://schema.org","@id":"961685295527","@type":"TVEpisode"}</script>
+ ''',
+ {
+ 'chapters': [
+ {"title": "Explosie Turnhout", "start_time": 70, "end_time": 440},
+ {"title": "Jaarwisseling", "start_time": 440, "end_time": 1179},
+ {"title": "Natuurbranden Colorado", "start_time": 1179, "end_time": 1263},
+ {"title": "Klimaatverandering", "start_time": 1263, "end_time": 1367},
+ {"title": "Zacht weer", "start_time": 1367, "end_time": 1383},
+ {"title": "Financiƫle balans", "start_time": 1383, "end_time": 1484},
+ {"title": "Club Brugge", "start_time": 1484, "end_time": 1575},
+ {"title": "Mentale gezondheid bij topsporters", "start_time": 1575, "end_time": 1728},
+ {"title": "Olympische Winterspelen", "start_time": 1728, "end_time": 1873},
+ {"title": "Sober oudjaar in Nederland", "start_time": 1873, "end_time": 2079.23}
+ ],
+ 'title': 'Het journaal - Aflevering 365 (Seizoen 2021)'
+ }, {}
+ ),
+ (
+ # test multiple thumbnails in a list
+ r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":["https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"]}
+</script>''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ ),
+ (
+ # test single thumbnail
+ r'''
+<script type="application/ld+json">
+{"@context":"https://schema.org",
+"@type":"VideoObject",
+"thumbnailUrl":"https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg"}
+</script>''',
+ {
+ 'thumbnails': [{'url': 'https://www.rainews.it/cropgd/640x360/dl/img/2021/12/30/1640886376927_GettyImages.jpg'}],
+ },
+ {},
+ )
]
for html, expected_dict, search_json_ld_kwargs in _TESTS:
expect_dict(
}],
})
+ # from https://0000.studio/
+ # with type attribute but without extension in URL
+ expect_dict(
+ self,
+ self.ie._parse_html5_media_entries(
+ 'https://0000.studio',
+ r'''
+ <video src="https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92"
+ controls="controls" type="video/mp4" preload="metadata" autoplay="autoplay" playsinline class="object-contain">
+ </video>
+ ''', None)[0],
+ {
+ 'formats': [{
+ 'url': 'https://d1ggyt9m8pwf3g.cloudfront.net/protected/ap-northeast-1:1864af40-28d5-492b-b739-b32314b1a527/archive/clip/838db6a7-8973-4cd6-840d-8517e4093c92',
+ 'ext': 'mp4',
+ }],
+ })
+
def test_extract_jwplayer_data_realworld(self):
# from http://www.suffolk.edu/sjc/
expect_dict(
]
for m3u8_file, m3u8_url, expected_formats, expected_subs in _TEST_CASES:
- with io.open('./test/testdata/m3u8/%s.m3u8' % m3u8_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/m3u8/%s.m3u8' % m3u8_file, encoding='utf-8') as f:
formats, subs = self.ie._parse_m3u8_formats_and_subtitles(
f.read(), m3u8_url, ext='mp4')
self.ie._sort_formats(formats)
]
for mpd_file, mpd_url, mpd_base_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/mpd/%s.mpd' % mpd_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/mpd/%s.mpd' % mpd_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_mpd_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
mpd_base_url=mpd_base_url, mpd_url=mpd_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
]
for ism_file, ism_url, expected_formats, expected_subtitles in _TEST_CASES:
- with io.open('./test/testdata/ism/%s.Manifest' % ism_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/ism/%s.Manifest' % ism_file, encoding='utf-8') as f:
formats, subtitles = self.ie._parse_ism_formats_and_subtitles(
- compat_etree_fromstring(f.read().encode('utf-8')), ism_url=ism_url)
+ compat_etree_fromstring(f.read().encode()), ism_url=ism_url)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
expect_value(self, subtitles, expected_subtitles, None)
]
for f4m_file, f4m_url, expected_formats in _TEST_CASES:
- with io.open('./test/testdata/f4m/%s.f4m' % f4m_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/f4m/%s.f4m' % f4m_file, encoding='utf-8') as f:
formats = self.ie._parse_f4m_formats(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
f4m_url, None)
self.ie._sort_formats(formats)
expect_value(self, formats, expected_formats, None)
]
for xspf_file, xspf_url, expected_entries in _TEST_CASES:
- with io.open('./test/testdata/xspf/%s.xspf' % xspf_file,
- mode='r', encoding='utf-8') as f:
+ with open('./test/testdata/xspf/%s.xspf' % xspf_file, encoding='utf-8') as f:
entries = self.ie._parse_xspf(
- compat_etree_fromstring(f.read().encode('utf-8')),
+ compat_etree_fromstring(f.read().encode()),
xspf_file, xspf_url=xspf_url, xspf_base_url=xspf_url)
expect_value(self, entries, expected_entries, None)
for i in range(len(entries)):
# or the underlying `_download_webpage_handle` returning no content
# when a response matches `expected_status`.
- httpd = compat_http_server.HTTPServer(
+ httpd = http.server.HTTPServer(
('127.0.0.1', 0), InfoExtractorTestRequestHandler)
port = http_server_port(httpd)
server_thread = threading.Thread(target=httpd.serve_forever)