]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/maariv.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / maariv.py
CommitLineData
c5f01bf7
AP
1from .common import InfoExtractor
2from ..utils import (
3 int_or_none,
4 parse_resolution,
5 unified_timestamp,
6 url_or_none,
7)
8from ..utils.traversal import traverse_obj
9
10
11class MaarivIE(InfoExtractor):
12 IE_NAME = 'maariv.co.il'
13 _VALID_URL = r'https?://player\.maariv\.co\.il/public/player\.html\?(?:[^#]+&)?media=(?P<id>\d+)'
14 _EMBED_REGEX = [rf'<iframe[^>]+\bsrc=[\'"](?P<url>{_VALID_URL})']
15 _TESTS = [{
16 'url': 'https://player.maariv.co.il/public/player.html?player=maariv-desktop&media=3611585',
17 'info_dict': {
18 'id': '3611585',
19 'duration': 75,
20 'ext': 'mp4',
21 'upload_date': '20231009',
22 'title': 'מבצע חרבות ברזל',
23 'timestamp': 1696851301,
24 },
25 }]
26 _WEBPAGE_TESTS = [{
27 'url': 'https://www.maariv.co.il/news/law/Article-1044008',
28 'info_dict': {
29 'id': '3611585',
30 'duration': 75,
31 'ext': 'mp4',
32 'upload_date': '20231009',
33 'title': 'מבצע חרבות ברזל',
34 'timestamp': 1696851301,
35 },
36 }]
37
38 def _real_extract(self, url):
39 video_id = self._match_id(url)
40 data = self._download_json(
41 f'https://dal.walla.co.il/media/{video_id}?origin=player.maariv.co.il', video_id)['data']
42
43 formats = []
44 if hls_url := traverse_obj(data, ('video', 'url', {url_or_none})):
45 formats.extend(self._extract_m3u8_formats(hls_url, video_id, m3u8_id='hls', fatal=False))
46
47 for http_format in traverse_obj(data, ('video', 'stream_urls', ..., 'stream_url', {url_or_none})):
48 formats.append({
49 'url': http_format,
50 'format_id': 'http',
51 **parse_resolution(http_format),
52 })
53
54 return {
55 'id': video_id,
56 **traverse_obj(data, {
57 'title': 'title',
58 'duration': ('video', 'duration', {int_or_none}),
59 'timestamp': ('upload_date', {unified_timestamp}),
60 }),
61 'formats': formats,
62 }