]>
Commit | Line | Data |
---|---|---|
1 | import json | |
2 | import re | |
3 | import time | |
4 | import urllib.parse | |
5 | import uuid | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..utils import ( | |
9 | ExtractorError, | |
10 | float_or_none, | |
11 | int_or_none, | |
12 | strip_or_none, | |
13 | traverse_obj, | |
14 | unified_timestamp, | |
15 | ) | |
16 | ||
17 | ||
18 | class RedBeeBaseIE(InfoExtractor): | |
19 | _DEVICE_ID = str(uuid.uuid4()) | |
20 | ||
21 | @property | |
22 | def _API_URL(self): | |
23 | """ | |
24 | Ref: https://apidocs.emp.ebsd.ericsson.net | |
25 | Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT | |
26 | """ | |
27 | return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}' | |
28 | ||
29 | def _get_bearer_token(self, asset_id, jwt=None): | |
30 | request = { | |
31 | 'deviceId': self._DEVICE_ID, | |
32 | 'device': { | |
33 | 'deviceId': self._DEVICE_ID, | |
34 | 'name': 'Mozilla Firefox 102', | |
35 | 'type': 'WEB', | |
36 | }, | |
37 | } | |
38 | if jwt: | |
39 | request['jwt'] = jwt | |
40 | ||
41 | return self._download_json( | |
42 | f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}', | |
43 | asset_id, data=json.dumps(request).encode('utf-8'), headers={ | |
44 | 'Content-Type': 'application/json;charset=utf-8' | |
45 | })['sessionToken'] | |
46 | ||
47 | def _get_formats_and_subtitles(self, asset_id, **kwargs): | |
48 | bearer_token = self._get_bearer_token(asset_id, **kwargs) | |
49 | api_response = self._download_json( | |
50 | f'{self._API_URL}/entitlement/{asset_id}/play', | |
51 | asset_id, headers={ | |
52 | 'Authorization': f'Bearer {bearer_token}', | |
53 | 'Accept': 'application/json, text/plain, */*' | |
54 | }) | |
55 | ||
56 | formats, subtitles = [], {} | |
57 | for format in api_response['formats']: | |
58 | if not format.get('mediaLocator'): | |
59 | continue | |
60 | ||
61 | fmts, subs = [], {} | |
62 | if format.get('format') == 'DASH': | |
63 | fmts, subs = self._extract_mpd_formats_and_subtitles( | |
64 | format['mediaLocator'], asset_id, fatal=False) | |
65 | elif format.get('format') == 'SMOOTHSTREAMING': | |
66 | fmts, subs = self._extract_ism_formats_and_subtitles( | |
67 | format['mediaLocator'], asset_id, fatal=False) | |
68 | elif format.get('format') == 'HLS': | |
69 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
70 | format['mediaLocator'], asset_id, fatal=False) | |
71 | ||
72 | formats.extend(fmts) | |
73 | self._merge_subtitles(subs, target=subtitles) | |
74 | ||
75 | return formats, subtitles | |
76 | ||
77 | ||
78 | class ParliamentLiveUKIE(RedBeeBaseIE): | |
79 | IE_NAME = 'parliamentlive.tv' | |
80 | IE_DESC = 'UK parliament videos' | |
81 | _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})' | |
82 | ||
83 | _REDBEE_CUSTOMER = 'UKParliament' | |
84 | _REDBEE_BUSINESS_UNIT = 'ParliamentLive' | |
85 | ||
86 | _TESTS = [{ | |
87 | 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b', | |
88 | 'info_dict': { | |
89 | 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b', | |
90 | 'ext': 'mp4', | |
91 | 'title': 'Home Affairs Committee', | |
92 | 'timestamp': 1395153872, | |
93 | 'upload_date': '20140318', | |
94 | 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail', | |
95 | }, | |
96 | }, { | |
97 | 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4', | |
98 | 'only_matching': True, | |
99 | }, { | |
100 | 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377', | |
101 | 'info_dict': { | |
102 | 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377', | |
103 | 'ext': 'mp4', | |
104 | 'title': 'House of Commons', | |
105 | 'timestamp': 1658392447, | |
106 | 'upload_date': '20220721', | |
107 | 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail', | |
108 | }, | |
109 | }] | |
110 | ||
111 | def _real_extract(self, url): | |
112 | video_id = self._match_id(url) | |
113 | ||
114 | formats, subtitles = self._get_formats_and_subtitles(video_id) | |
115 | self._sort_formats(formats) | |
116 | ||
117 | video_info = self._download_json( | |
118 | f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False) | |
119 | ||
120 | self._sort_formats(formats, ['res', 'proto']) | |
121 | ||
122 | return { | |
123 | 'id': video_id, | |
124 | 'formats': formats, | |
125 | 'subtitles': subtitles, | |
126 | 'title': traverse_obj(video_info, ('event', 'title')), | |
127 | 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'), | |
128 | 'timestamp': traverse_obj( | |
129 | video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp), | |
130 | } | |
131 | ||
132 | ||
133 | class RTBFIE(RedBeeBaseIE): | |
134 | _VALID_URL = r'''(?x) | |
135 | https?://(?:www\.)?rtbf\.be/ | |
136 | (?: | |
137 | video/[^?]+\?.*\bid=| | |
138 | ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=| | |
139 | auvio/[^/]+\?.*\b(?P<live>l)?id= | |
140 | )(?P<id>\d+)''' | |
141 | _NETRC_MACHINE = 'rtbf' | |
142 | ||
143 | _REDBEE_CUSTOMER = 'RTBF' | |
144 | _REDBEE_BUSINESS_UNIT = 'Auvio' | |
145 | ||
146 | _TESTS = [{ | |
147 | 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274', | |
148 | 'md5': '8c876a1cceeb6cf31b476461ade72384', | |
149 | 'info_dict': { | |
150 | 'id': '1921274', | |
151 | 'ext': 'mp4', | |
152 | 'title': 'Les Diables au coeur (épisode 2)', | |
153 | 'description': '(du 25/04/2014)', | |
154 | 'duration': 3099.54, | |
155 | 'upload_date': '20140425', | |
156 | 'timestamp': 1398456300, | |
157 | }, | |
158 | 'skip': 'No longer available', | |
159 | }, { | |
160 | # geo restricted | |
161 | 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442', | |
162 | 'only_matching': True, | |
163 | }, { | |
164 | 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858', | |
165 | 'only_matching': True, | |
166 | }, { | |
167 | 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996', | |
168 | 'only_matching': True, | |
169 | }, { | |
170 | # Live | |
171 | 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775', | |
172 | 'only_matching': True, | |
173 | }, { | |
174 | # Audio | |
175 | 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811', | |
176 | 'only_matching': True, | |
177 | }, { | |
178 | # With Subtitle | |
179 | 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588', | |
180 | 'only_matching': True, | |
181 | }, { | |
182 | 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926', | |
183 | 'md5': 'd5d11bb62169fef38d7ce7ac531e034f', | |
184 | 'info_dict': { | |
185 | 'id': '2921926', | |
186 | 'ext': 'mp4', | |
187 | 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme', | |
188 | 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52', | |
189 | 'duration': 5258.8, | |
190 | 'upload_date': '20220727', | |
191 | 'timestamp': 1658934000, | |
192 | 'series': '#Investigation', | |
193 | 'thumbnail': r're:^https?://[^?&]+\.jpg$', | |
194 | }, | |
195 | }, { | |
196 | 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492', | |
197 | 'md5': '054f9f143bc79c89647c35e5a7d35fa8', | |
198 | 'info_dict': { | |
199 | 'id': '2920492', | |
200 | 'ext': 'mp4', | |
201 | 'title': '04 - Le crime de la rue Royale', | |
202 | 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6', | |
203 | 'duration': 1574.6, | |
204 | 'upload_date': '20220723', | |
205 | 'timestamp': 1658596887, | |
206 | 'series': 'La Belgique criminelle - TV', | |
207 | 'thumbnail': r're:^https?://[^?&]+\.jpg$', | |
208 | }, | |
209 | }] | |
210 | ||
211 | _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be' | |
212 | _PROVIDERS = { | |
213 | 'YOUTUBE': 'Youtube', | |
214 | 'DAILYMOTION': 'Dailymotion', | |
215 | 'VIMEO': 'Vimeo', | |
216 | } | |
217 | _QUALITIES = [ | |
218 | ('mobile', 'SD'), | |
219 | ('web', 'MD'), | |
220 | ('high', 'HD'), | |
221 | ] | |
222 | _LOGIN_URL = 'https://login.rtbf.be/accounts.login' | |
223 | _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO' | |
224 | _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}' | |
225 | ||
226 | def _perform_login(self, username, password): | |
227 | if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID): | |
228 | return | |
229 | ||
230 | self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600) | |
231 | ||
232 | login_response = self._download_json( | |
233 | self._LOGIN_URL, None, data=urllib.parse.urlencode({ | |
234 | 'loginID': username, | |
235 | 'password': password, | |
236 | 'APIKey': self._GIGYA_API_KEY, | |
237 | 'targetEnv': 'jssdk', | |
238 | 'sessionExpiration': '-2', | |
239 | }).encode('utf-8'), headers={ | |
240 | 'Content-Type': 'application/x-www-form-urlencoded', | |
241 | }) | |
242 | ||
243 | if login_response['statusCode'] != 200: | |
244 | raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True) | |
245 | ||
246 | self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'], | |
247 | secure=True, expire_time=time.time() + 3600) | |
248 | ||
249 | def _get_formats_and_subtitles(self, url, media_id): | |
250 | login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID) | |
251 | if not login_token: | |
252 | self.raise_login_required() | |
253 | ||
254 | session_jwt = self._download_json( | |
255 | 'https://login.rtbf.be/accounts.getJWT', media_id, query={ | |
256 | 'login_token': login_token.value, | |
257 | 'APIKey': self._GIGYA_API_KEY, | |
258 | 'sdk': 'js_latest', | |
259 | 'authMode': 'cookie', | |
260 | 'pageURL': url, | |
261 | 'sdkBuild': '13273', | |
262 | 'format': 'json', | |
263 | })['id_token'] | |
264 | ||
265 | return super()._get_formats_and_subtitles(media_id, jwt=session_jwt) | |
266 | ||
267 | def _real_extract(self, url): | |
268 | live, media_id = self._match_valid_url(url).groups() | |
269 | embed_page = self._download_webpage( | |
270 | 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'), | |
271 | media_id, query={'id': media_id}) | |
272 | data = self._parse_json(self._html_search_regex( | |
273 | r'data-media="([^"]+)"', embed_page, 'media data'), media_id) | |
274 | ||
275 | error = data.get('error') | |
276 | if error: | |
277 | raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True) | |
278 | ||
279 | provider = data.get('provider') | |
280 | if provider in self._PROVIDERS: | |
281 | return self.url_result(data['url'], self._PROVIDERS[provider]) | |
282 | ||
283 | title = data['subtitle'] | |
284 | is_live = data.get('isLive') | |
285 | height_re = r'-(\d+)p\.' | |
286 | formats = [] | |
287 | ||
288 | m3u8_url = data.get('urlHlsAes128') or data.get('urlHls') | |
289 | if m3u8_url: | |
290 | formats.extend(self._extract_m3u8_formats( | |
291 | m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)) | |
292 | ||
293 | fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x | |
294 | http_url = data.get('url') | |
295 | if formats and http_url and re.search(height_re, http_url): | |
296 | http_url = fix_url(http_url) | |
297 | for m3u8_f in formats[:]: | |
298 | height = m3u8_f.get('height') | |
299 | if not height: | |
300 | continue | |
301 | f = m3u8_f.copy() | |
302 | del f['protocol'] | |
303 | f.update({ | |
304 | 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'), | |
305 | 'url': re.sub(height_re, '-%dp.' % height, http_url), | |
306 | }) | |
307 | formats.append(f) | |
308 | else: | |
309 | sources = data.get('sources') or {} | |
310 | for key, format_id in self._QUALITIES: | |
311 | format_url = sources.get(key) | |
312 | if not format_url: | |
313 | continue | |
314 | height = int_or_none(self._search_regex( | |
315 | height_re, format_url, 'height', default=None)) | |
316 | formats.append({ | |
317 | 'format_id': format_id, | |
318 | 'url': fix_url(format_url), | |
319 | 'height': height, | |
320 | }) | |
321 | ||
322 | mpd_url = data.get('urlDash') | |
323 | if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')): | |
324 | formats.extend(self._extract_mpd_formats( | |
325 | mpd_url, media_id, mpd_id='dash', fatal=False)) | |
326 | ||
327 | audio_url = data.get('urlAudio') | |
328 | if audio_url: | |
329 | formats.append({ | |
330 | 'format_id': 'audio', | |
331 | 'url': audio_url, | |
332 | 'vcodec': 'none', | |
333 | }) | |
334 | ||
335 | subtitles = {} | |
336 | for track in (data.get('tracks') or {}).values(): | |
337 | sub_url = track.get('url') | |
338 | if not sub_url: | |
339 | continue | |
340 | subtitles.setdefault(track.get('lang') or 'fr', []).append({ | |
341 | 'url': sub_url, | |
342 | }) | |
343 | ||
344 | if not formats: | |
345 | fmts, subs = self._get_formats_and_subtitles(url, media_id) | |
346 | formats.extend(fmts) | |
347 | self._merge_subtitles(subs, target=subtitles) | |
348 | ||
349 | self._sort_formats(formats, ['res', 'proto']) | |
350 | return { | |
351 | 'id': media_id, | |
352 | 'formats': formats, | |
353 | 'title': title, | |
354 | 'description': strip_or_none(data.get('description')), | |
355 | 'thumbnail': data.get('thumbnail'), | |
356 | 'duration': float_or_none(data.get('realDuration')), | |
357 | 'timestamp': int_or_none(data.get('liveFrom')), | |
358 | 'series': data.get('programLabel'), | |
359 | 'subtitles': subtitles, | |
360 | 'is_live': is_live, | |
361 | } |