]>
Commit | Line | Data |
---|---|---|
1 | import json | |
2 | import re | |
3 | import time | |
4 | import urllib.parse | |
5 | import uuid | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | int_or_none, | |
12 | strip_or_none, | |
13 | traverse_obj, | |
14 | try_call, | |
15 | unified_timestamp, | |
16 | ) | |
17 | ||
18 | ||
19 | class RedBeeBaseIE(InfoExtractor): | |
20 | _DEVICE_ID = str(uuid.uuid4()) | |
21 | ||
22 | @property | |
23 | def _API_URL(self): | |
24 | """ | |
25 | Ref: https://apidocs.emp.ebsd.ericsson.net | |
26 | Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT | |
27 | """ | |
28 | return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}' | |
29 | ||
30 | def _get_bearer_token(self, asset_id, jwt=None): | |
31 | request = { | |
32 | 'deviceId': self._DEVICE_ID, | |
33 | 'device': { | |
34 | 'deviceId': self._DEVICE_ID, | |
35 | 'name': 'Mozilla Firefox 102', | |
36 | 'type': 'WEB', | |
37 | }, | |
38 | } | |
39 | if jwt: | |
40 | request['jwt'] = jwt | |
41 | ||
42 | return self._download_json( | |
43 | f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', | |
44 | asset_id, data=json.dumps(request).encode('utf-8'), headers={ | |
45 | 'Content-Type': 'application/json;charset=utf-8' | |
46 | })['sessionToken'] | |
47 | ||
48 | def _get_formats_and_subtitles(self, asset_id, **kwargs): | |
49 | bearer_token = self._get_bearer_token(asset_id, **kwargs) | |
50 | api_response = self._download_json( | |
51 | f'{self._API_URL}/entitlement/{asset_id}/play', | |
52 | asset_id, headers={ | |
53 | 'Authorization': f'Bearer {bearer_token}', | |
54 | 'Accept': 'application/json, text/plain, */*' | |
55 | }) | |
56 | ||
57 | formats, subtitles = [], {} | |
58 | for format in api_response['formats']: | |
59 | if not format.get('mediaLocator'): | |
60 | continue | |
61 | ||
62 | fmts, subs = [], {} | |
63 | if format.get('format') == 'DASH': | |
64 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
65 | format['mediaLocator'], asset_id, fatal=False) | |
66 | elif format.get('format') == 'SMOOTHSTREAMING': | |
67 | fmts, subs = self._extract_ism_formats_and_subtitles( | |
68 | format['mediaLocator'], asset_id, fatal=False) | |
69 | elif format.get('format') == 'HLS': | |
70 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
71 | format['mediaLocator'], asset_id, fatal=False) | |
72 | ||
73 | if format.get('drm'): | |
74 | for f in fmts: | |
75 | f['has_drm'] = True | |
76 | ||
77 | formats.extend(fmts) | |
78 | self._merge_subtitles(subs, target=subtitles) | |
79 | ||
80 | return formats, subtitles | |
81 | ||
82 | ||
83 | class ParliamentLiveUKIE(RedBeeBaseIE): | |
84 | IE_NAME = 'parliamentlive.tv' | |
85 | IE_DESC = 'UK parliament videos' | |
86 | _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | |
87 | ||
88 | _REDBEE_CUSTOMER = 'UKParliament' | |
89 | _REDBEE_BUSINESS_UNIT = 'ParliamentLive' | |
90 | ||
91 | _TESTS = [{ | |
92 | 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', | |
93 | 'info_dict': { | |
94 | 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', | |
95 | 'ext': 'mp4', | |
96 | 'title': 'Home Affairs Committee', | |
97 | 'timestamp': 1395153872, | |
98 | 'upload_date': '20140318', | |
99 | 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail', | |
100 | }, | |
101 | }, { | |
102 | 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', | |
103 | 'only_matching': True, | |
104 | }, { | |
105 | 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377', | |
106 | 'info_dict': { | |
107 | 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377', | |
108 | 'ext': 'mp4', | |
109 | 'title': 'House of Commons', | |
110 | 'timestamp': 1658392447, | |
111 | 'upload_date': '20220721', | |
112 | 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail', | |
113 | }, | |
114 | }] | |
115 | ||
116 | def _real_extract(self, url): | |
117 | video_id = self._match_id(url) | |
118 | ||
119 | formats, subtitles = self._get_formats_and_subtitles(video_id) | |
120 | ||
121 | video_info = self._download_json( | |
122 | f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) | |
123 | ||
124 | return { | |
125 | 'id': video_id, | |
126 | 'formats': formats, | |
127 | 'subtitles': subtitles, | |
128 | 'title': traverse_obj(video_info, ('event', 'title')), | |
129 | 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), | |
130 | 'timestamp': traverse_obj( | |
131 | video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), | |
132 | '_format_sort_fields': ('res', 'proto'), | |
133 | } | |
134 | ||
135 | ||
136 | class RTBFIE(RedBeeBaseIE): | |
137 | _WORKING = False | |
138 | _VALID_URL = r'''(?x) | |
139 | https?://(?:www\.)?rtbf\.be/ | |
140 | (?: | |
141 | video/[^?]+\?.*\bid=| | |
142 | ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| | |
143 | auvio/[^/]+\?.*\b(?P<live>l)?id= | |
144 | )(?P<id>\d+)''' | |
145 | _NETRC_MACHINE = 'rtbf' | |
146 | ||
147 | _REDBEE_CUSTOMER = 'RTBF' | |
148 | _REDBEE_BUSINESS_UNIT = 'Auvio' | |
149 | ||
150 | _TESTS = [{ | |
151 | 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | |
152 | 'md5': '8c876a1cceeb6cf31b476461ade72384', | |
153 | 'info_dict': { | |
154 | 'id': '1921274', | |
155 | 'ext': 'mp4', | |
156 | 'title': 'Les Diables au coeur (épisode 2)', | |
157 | 'description': '(du 25/04/2014)', | |
158 | 'duration': 3099.54, | |
159 | 'upload_date': '20140425', | |
160 | 'timestamp': 1398456300, | |
161 | }, | |
162 | 'skip': 'No longer available', | |
163 | }, { | |
164 | # geo restricted | |
165 | 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', | |
166 | 'only_matching': True, | |
167 | }, { | |
168 | 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', | |
169 | 'only_matching': True, | |
170 | }, { | |
171 | 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', | |
172 | 'only_matching': True, | |
173 | }, { | |
174 | # Live | |
175 | 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', | |
176 | 'only_matching': True, | |
177 | }, { | |
178 | # Audio | |
179 | 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', | |
180 | 'only_matching': True, | |
181 | }, { | |
182 | # With Subtitle | |
183 | 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', | |
184 | 'only_matching': True, | |
185 | }, { | |
186 | 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926', | |
187 | 'md5': 'd5d11bb62169fef38d7ce7ac531e034f', | |
188 | 'info_dict': { | |
189 | 'id': '2921926', | |
190 | 'ext': 'mp4', | |
191 | 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme', | |
192 | 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52', | |
193 | 'duration': 5258.8, | |
194 | 'upload_date': '20220727', | |
195 | 'timestamp': 1658934000, | |
196 | 'series': '#Investigation', | |
197 | 'thumbnail': r're:^https?://[^?&]+\.jpg$', | |
198 | }, | |
199 | }, { | |
200 | 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492', | |
201 | 'md5': '054f9f143bc79c89647c35e5a7d35fa8', | |
202 | 'info_dict': { | |
203 | 'id': '2920492', | |
204 | 'ext': 'mp4', | |
205 | 'title': '04 - Le crime de la rue Royale', | |
206 | 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6', | |
207 | 'duration': 1574.6, | |
208 | 'upload_date': '20220723', | |
209 | 'timestamp': 1658596887, | |
210 | 'series': 'La Belgique criminelle - TV', | |
211 | 'thumbnail': r're:^https?://[^?&]+\.jpg$', | |
212 | }, | |
213 | }] | |
214 | ||
215 | _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' | |
216 | _PROVIDERS = { | |
217 | 'YOUTUBE': 'Youtube', | |
218 | 'DAILYMOTION': 'Dailymotion', | |
219 | 'VIMEO': 'Vimeo', | |
220 | } | |
221 | _QUALITIES = [ | |
222 | ('mobile', 'SD'), | |
223 | ('web', 'MD'), | |
224 | ('high', 'HD'), | |
225 | ] | |
226 | _LOGIN_URL = 'https://login.rtbf.be/accounts.login' | |
227 | _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO' | |
228 | _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}' | |
229 | ||
230 | def _perform_login(self, username, password): | |
231 | if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID): | |
232 | return | |
233 | ||
234 | self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600) | |
235 | ||
236 | login_response = self._download_json( | |
237 | self._LOGIN_URL, None, data=urllib.parse.urlencode({ | |
238 | 'loginID': username, | |
239 | 'password': password, | |
240 | 'APIKey': self._GIGYA_API_KEY, | |
241 | 'targetEnv': 'jssdk', | |
242 | 'sessionExpiration': '-2', | |
243 | }).encode('utf-8'), headers={ | |
244 | 'Content-Type': 'application/x-www-form-urlencoded', | |
245 | }) | |
246 | ||
247 | if login_response['statusCode'] != 200: | |
248 | raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True) | |
249 | ||
250 | self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], | |
251 | secure=True, expire_time=time.time() + 3600) | |
252 | ||
253 | def _get_formats_and_subtitles(self, url, media_id): | |
254 | login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID) | |
255 | if not login_token: | |
256 | self.raise_login_required() | |
257 | ||
258 | session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json( | |
259 | 'https://login.rtbf.be/accounts.getJWT', media_id, query={ | |
260 | 'login_token': login_token.value, | |
261 | 'APIKey': self._GIGYA_API_KEY, | |
262 | 'sdk': 'js_latest', | |
263 | 'authMode': 'cookie', | |
264 | 'pageURL': url, | |
265 | 'sdkBuild': '13273', | |
266 | 'format': 'json', | |
267 | })['id_token'] | |
268 | ||
269 | return super()._get_formats_and_subtitles(media_id, jwt=session_jwt) | |
270 | ||
271 | def _real_extract(self, url): | |
272 | live, media_id = self._match_valid_url(url).groups() | |
273 | embed_page = self._download_webpage( | |
274 | 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), | |
275 | media_id, query={'id': media_id}) | |
276 | ||
277 | media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False) | |
278 | if not media_data: | |
279 | if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page): | |
280 | raise ExtractorError('Livestream has ended.', expected=True) | |
281 | if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page): | |
282 | self.raise_login_required() | |
283 | ||
284 | raise ExtractorError('Could not find media data') | |
285 | ||
286 | data = self._parse_json(media_data, media_id) | |
287 | ||
288 | error = data.get('error') | |
289 | if error: | |
290 | raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | |
291 | ||
292 | provider = data.get('provider') | |
293 | if provider in self._PROVIDERS: | |
294 | return self.url_result(data['url'], self._PROVIDERS[provider]) | |
295 | ||
296 | title = traverse_obj(data, 'subtitle', 'title') | |
297 | is_live = data.get('isLive') | |
298 | height_re = r'-(\d+)p\.' | |
299 | formats, subtitles = [], {} | |
300 | ||
301 | # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake' | |
302 | # since all they contain is a 20s video that is completely unrelated. | |
303 | # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092 | |
304 | m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls') | |
305 | if m3u8_url: | |
306 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
307 | m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False) | |
308 | formats.extend(fmts) | |
309 | self._merge_subtitles(subs, target=subtitles) | |
310 | ||
311 | fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x | |
312 | http_url = data.get('url') | |
313 | if formats and http_url and re.search(height_re, http_url): | |
314 | http_url = fix_url(http_url) | |
315 | for m3u8_f in formats[:]: | |
316 | height = m3u8_f.get('height') | |
317 | if not height: | |
318 | continue | |
319 | f = m3u8_f.copy() | |
320 | del f['protocol'] | |
321 | f.update({ | |
322 | 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), | |
323 | 'url': re.sub(height_re, '-%dp.' % height, http_url), | |
324 | }) | |
325 | formats.append(f) | |
326 | else: | |
327 | sources = data.get('sources') or {} | |
328 | for key, format_id in self._QUALITIES: | |
329 | format_url = sources.get(key) | |
330 | if not format_url: | |
331 | continue | |
332 | height = int_or_none(self._search_regex( | |
333 | height_re, format_url, 'height', default=None)) | |
334 | formats.append({ | |
335 | 'format_id': format_id, | |
336 | 'url': fix_url(format_url), | |
337 | 'height': height, | |
338 | }) | |
339 | ||
340 | mpd_url = None if data.get('isLive') else data.get('urlDash') | |
341 | if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): | |
342 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
343 | mpd_url, media_id, mpd_id='dash', fatal=False) | |
344 | formats.extend(fmts) | |
345 | self._merge_subtitles(subs, target=subtitles) | |
346 | ||
347 | audio_url = data.get('urlAudio') | |
348 | if audio_url: | |
349 | formats.append({ | |
350 | 'format_id': 'audio', | |
351 | 'url': audio_url, | |
352 | 'vcodec': 'none', | |
353 | }) | |
354 | ||
355 | for track in (data.get('tracks') or {}).values(): | |
356 | sub_url = track.get('url') | |
357 | if not sub_url: | |
358 | continue | |
359 | subtitles.setdefault(track.get('lang') or 'fr', []).append({ | |
360 | 'url': sub_url, | |
361 | }) | |
362 | ||
363 | if not formats: | |
364 | fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id) | |
365 | formats.extend(fmts) | |
366 | self._merge_subtitles(subs, target=subtitles) | |
367 | ||
368 | return { | |
369 | 'id': media_id, | |
370 | 'formats': formats, | |
371 | 'title': title, | |
372 | 'description': strip_or_none(data.get('description')), | |
373 | 'thumbnail': data.get('thumbnail'), | |
374 | 'duration': float_or_none(data.get('realDuration')), | |
375 | 'timestamp': int_or_none(data.get('liveFrom')), | |
376 | 'series': data.get('programLabel'), | |
377 | 'subtitles': subtitles, | |
378 | 'is_live': is_live, | |
379 | '_format_sort_fields': ('res', 'proto'), | |
380 | } |