4 from .common
import InfoExtractor
16 class MainStreamingIE(InfoExtractor
):
17 _VALID_URL
= r
'https?://(?:webtools-?)?(?P<host>[A-Za-z0-9-]*\.msvdn.net)/(?:embed|amp_embed|content)/(?P<id>\w+)'
18 IE_DESC
= 'MainStreaming Player'
22 # Live stream offline, has alternative content id
23 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/53EN6GxbWaJC',
26 'title': 'Diretta homepage 2021-12-31 12:00',
28 'live_status': 'was_live',
30 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
32 'expected_warnings': [
33 'Ignoring alternative content ID: WDAF1KOWUpH3',
34 'MainStreaming said: Live event is OFFLINE'
36 'skip': 'live stream offline'
39 'url': 'https://webtools-e18da6642b684f8aa9ae449862783a56.msvdn.net/embed/WDAF1KOWUpH3',
42 'title': 'Playlist homepage',
44 'playlist_mincount': 2
47 'url': 'https://webtools-859c1818ed614cc5b0047439470927b0.msvdn.net/embed/tDoFkZD3T1Lw',
50 'title': r
're:Class CNBC Live \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
51 'live_status': 'is_live',
53 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
57 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/EUlZfGWkGpOd?autoPlay=false',
60 'title': 'La Settimana ',
61 'description': '03 Ottobre ore 02:00',
63 'live_status': 'not_live',
64 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
68 # video without webtools- prefix
69 'url': 'https://f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/MfuWmzL2lGkA?autoplay=false&T=1635860445',
72 'title': 'TG Mattina',
73 'description': '06 Ottobre ore 08:00',
75 'live_status': 'not_live',
76 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
80 # always-on livestream with DVR
81 'url': 'https://webtools-f5842579ff984c1c98d63b8d789673eb.msvdn.net/embed/HVvPMzy',
84 'title': r
're:^Diretta LaC News24 \d{4}-\d{2}-\d{2} \d{2}:\d{2}$',
85 'description': 'canale all news',
86 'live_status': 'is_live',
88 'thumbnail': r
're:https?://[A-Za-z0-9-]*\.msvdn.net/image/\w+/poster',
91 'skip_download': True,
95 'url': 'https://webtools.msvdn.net/embed/MfuWmzL2lGkA',
98 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/amp_embed/tDoFkZD3T1Lw',
101 'url': 'https://859c1818ed614cc5b0047439470927b0.msvdn.net/content/tDoFkZD3T1Lw#',
102 'only_matching': True
107 def _extract_urls(webpage
):
109 r
'<iframe[^>]+?src=["\']?
(?P
<url
>%s)["\']?' % MainStreamingIE._VALID_URL, webpage)
111 return [group[0] for group in mobj]
113 def _playlist_entries(self, host, playlist_content):
114 for entry in playlist_content:
115 content_id = entry.get('contentID')
118 'ie_key': MainStreamingIE.ie_key(),
120 'duration': int_or_none(traverse_obj(entry, ('duration', 'totalSeconds'))),
121 'title': entry.get('title'),
122 'url': f'https://{host}/embed/{content_id}'
126 def _get_webtools_host(host):
127 if not host.startswith('webtools'):
128 host = 'webtools' + ('-' if not host.startswith('.') else '') + host
131 def _get_webtools_base_url(self, host):
132 return f'{self.http_scheme()}//{self._get_webtools_host(host)}'
134 def _call_api(self, host: str, path: str, item_id: str, query=None, note='Downloading API JSON', fatal=False):
135 # JSON API, does not appear to be documented
136 return self._call_webtools_api(host, '/api/v2/' + path, item_id, query, note, fatal)
138 def _call_webtools_api(self, host: str, path: str, item_id: str, query=None, note='Downloading webtools API JSON', fatal=False):
139 # webtools docs: https://webtools.msvdn.net/
140 return self._download_json(
141 urljoin(self._get_webtools_base_url(host), path), item_id, query=query, note=note, fatal=fatal)
143 def _real_extract(self, url):
144 host, video_id = self._match_valid_url(url).groups()
145 content_info = try_get(
147 host, f'content/{video_id}', video_id, note='Downloading content info API JSON'), lambda x: x['playerContentInfo'])
150 webpage = self._download_webpage(url, video_id)
151 player_config = self._parse_json(
153 r'config\s*=\s*({.+?})\s*;', webpage, 'mainstreaming player config',
154 default='{}', flags=re.DOTALL),
155 video_id, transform_source=js_to_json, fatal=False) or {}
156 content_info = player_config['contentInfo']
158 host = content_info.get('host') or host
159 video_id = content_info.get('contentID') or video_id
160 title = content_info.get('title')
161 description = traverse_obj(content_info, 'longDescription', 'shortDescription', expected_type=str)
162 live_status = 'not_live'
163 if content_info.get('drmEnabled'):
164 self.report_drm(video_id)
166 alternative_content_id = content_info.get('alternativeContentID')
167 if alternative_content_id:
168 self.report_warning(f'Ignoring alternative content ID: {alternative_content_id}')
170 content_type = int_or_none(content_info.get('contentType'))
171 format_base_url = None
175 if content_type == 20:
176 dvr_enabled = traverse_obj(content_info, ('playerSettings', 'dvrEnabled'), expected_type=bool)
177 format_base_url = f"https
://{host}
/live
/{content_info['liveSourceID']}
/{video_id}
/%s{'?DVR' if dvr_enabled else ''}
"
178 live_status = 'is_live'
179 heartbeat = self._call_api(host, f'heartbeat/{video_id}', video_id, note='Checking stream status') or {}
180 if heartbeat.get('heartBeatUp') is False:
181 self.raise_no_formats(f'MainStreaming said: {heartbeat.get("responseMessage")}', expected=True)
182 live_status = 'was_live'
185 elif content_type == 31:
186 return self.playlist_result(
187 self._playlist_entries(host, content_info.get('playlistContents')), video_id, title, description)
188 # Normal video content?
189 elif content_type == 10:
190 format_base_url = f'https://{host}/vod/{video_id}/%s'
192 # Note: in https://webtools.msvdn.net/loader/playerV2.js there is mention of original.mp3 format,
193 # however it seems to be the same as original.mp4?
194 formats.append({'url': format_base_url % 'original.mp4', 'format_note': 'original', 'quality': 1})
196 self.raise_no_formats(f'Unknown content type {content_type}')
199 m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
200 format_base_url % 'playlist.m3u8', video_id=video_id, fatal=False)
201 mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(
202 format_base_url % 'manifest.mpd', video_id=video_id, fatal=False)
204 subtitles = self._merge_subtitles(m3u8_subs, mpd_subs)
205 formats.extend(m3u8_formats + mpd_formats)
207 self._sort_formats(formats)
212 'description': description,
214 'live_status': live_status,
215 'duration': parse_duration(content_info.get('duration')),
216 'tags': content_info.get('tags'),
217 'subtitles': subtitles,
218 'thumbnail': urljoin(self._get_webtools_base_url(host), f'image/{video_id}/poster')