]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/redbee.py
[extractor] Deprecate `_sort_formats`
[yt-dlp.git] / yt_dlp / extractor / redbee.py
CommitLineData
2a5e5477
E
1import json
2import re
3import time
4import urllib.parse
5import uuid
6
7from .common import InfoExtractor
8from ..utils import (
9 ExtractorError,
10 float_or_none,
11 int_or_none,
12 strip_or_none,
13 traverse_obj,
b85703d1 14 try_call,
2a5e5477
E
15 unified_timestamp,
16)
17
18
19class RedBeeBaseIE(InfoExtractor):
20 _DEVICE_ID = str(uuid.uuid4())
21
22 @property
23 def _API_URL(self):
24 """
25 Ref: https://apidocs.emp.ebsd.ericsson.net
26 Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
27 """
28 return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
29
30 def _get_bearer_token(self, asset_id, jwt=None):
31 request = {
32 'deviceId': self._DEVICE_ID,
33 'device': {
34 'deviceId': self._DEVICE_ID,
35 'name': 'Mozilla Firefox 102',
36 'type': 'WEB',
37 },
38 }
39 if jwt:
40 request['jwt'] = jwt
41
42 return self._download_json(
43 f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
44 asset_id, data=json.dumps(request).encode('utf-8'), headers={
45 'Content-Type': 'application/json;charset=utf-8'
46 })['sessionToken']
47
48 def _get_formats_and_subtitles(self, asset_id, **kwargs):
49 bearer_token = self._get_bearer_token(asset_id, **kwargs)
50 api_response = self._download_json(
51 f'{self._API_URL}/entitlement/{asset_id}/play',
52 asset_id, headers={
53 'Authorization': f'Bearer {bearer_token}',
54 'Accept': 'application/json, text/plain, */*'
55 })
56
57 formats, subtitles = [], {}
58 for format in api_response['formats']:
59 if not format.get('mediaLocator'):
60 continue
61
62 fmts, subs = [], {}
63 if format.get('format') == 'DASH':
64 fmts, subs = self._extract_mpd_formats_and_subtitles(
65 format['mediaLocator'], asset_id, fatal=False)
66 elif format.get('format') == 'SMOOTHSTREAMING':
67 fmts, subs = self._extract_ism_formats_and_subtitles(
68 format['mediaLocator'], asset_id, fatal=False)
69 elif format.get('format') == 'HLS':
70 fmts, subs = self._extract_m3u8_formats_and_subtitles(
71 format['mediaLocator'], asset_id, fatal=False)
72
2b3e43e2
E
73 if format.get('drm'):
74 for f in fmts:
75 f['has_drm'] = True
76
2a5e5477
E
77 formats.extend(fmts)
78 self._merge_subtitles(subs, target=subtitles)
79
80 return formats, subtitles
81
82
83class ParliamentLiveUKIE(RedBeeBaseIE):
84 IE_NAME = 'parliamentlive.tv'
85 IE_DESC = 'UK parliament videos'
86 _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
87
88 _REDBEE_CUSTOMER = 'UKParliament'
89 _REDBEE_BUSINESS_UNIT = 'ParliamentLive'
90
91 _TESTS = [{
92 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
93 'info_dict': {
94 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
95 'ext': 'mp4',
96 'title': 'Home Affairs Committee',
97 'timestamp': 1395153872,
98 'upload_date': '20140318',
99 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
100 },
101 }, {
102 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
103 'only_matching': True,
104 }, {
105 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
106 'info_dict': {
107 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
108 'ext': 'mp4',
109 'title': 'House of Commons',
110 'timestamp': 1658392447,
111 'upload_date': '20220721',
112 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
113 },
114 }]
115
116 def _real_extract(self, url):
117 video_id = self._match_id(url)
118
119 formats, subtitles = self._get_formats_and_subtitles(video_id)
2a5e5477
E
120
121 video_info = self._download_json(
122 f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
123
2a5e5477
E
124 return {
125 'id': video_id,
126 'formats': formats,
127 'subtitles': subtitles,
128 'title': traverse_obj(video_info, ('event', 'title')),
129 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
130 'timestamp': traverse_obj(
131 video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
9f14daf2 132 '_format_sort_fields': ('res', 'proto'),
2a5e5477
E
133 }
134
135
136class RTBFIE(RedBeeBaseIE):
137 _VALID_URL = r'''(?x)
138 https?://(?:www\.)?rtbf\.be/
139 (?:
140 video/[^?]+\?.*\bid=|
141 ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
142 auvio/[^/]+\?.*\b(?P<live>l)?id=
143 )(?P<id>\d+)'''
144 _NETRC_MACHINE = 'rtbf'
145
146 _REDBEE_CUSTOMER = 'RTBF'
147 _REDBEE_BUSINESS_UNIT = 'Auvio'
148
149 _TESTS = [{
150 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
151 'md5': '8c876a1cceeb6cf31b476461ade72384',
152 'info_dict': {
153 'id': '1921274',
154 'ext': 'mp4',
155 'title': 'Les Diables au coeur (épisode 2)',
156 'description': '(du 25/04/2014)',
157 'duration': 3099.54,
158 'upload_date': '20140425',
159 'timestamp': 1398456300,
160 },
161 'skip': 'No longer available',
162 }, {
163 # geo restricted
164 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
165 'only_matching': True,
166 }, {
167 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
168 'only_matching': True,
169 }, {
170 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
171 'only_matching': True,
172 }, {
173 # Live
174 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
175 'only_matching': True,
176 }, {
177 # Audio
178 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
179 'only_matching': True,
180 }, {
181 # With Subtitle
182 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
183 'only_matching': True,
184 }, {
185 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
186 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
187 'info_dict': {
188 'id': '2921926',
189 'ext': 'mp4',
190 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
191 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
192 'duration': 5258.8,
193 'upload_date': '20220727',
194 'timestamp': 1658934000,
195 'series': '#Investigation',
196 'thumbnail': r're:^https?://[^?&]+\.jpg$',
197 },
198 }, {
199 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
200 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
201 'info_dict': {
202 'id': '2920492',
203 'ext': 'mp4',
204 'title': '04 - Le crime de la rue Royale',
205 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
206 'duration': 1574.6,
207 'upload_date': '20220723',
208 'timestamp': 1658596887,
209 'series': 'La Belgique criminelle - TV',
210 'thumbnail': r're:^https?://[^?&]+\.jpg$',
211 },
212 }]
213
214 _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
215 _PROVIDERS = {
216 'YOUTUBE': 'Youtube',
217 'DAILYMOTION': 'Dailymotion',
218 'VIMEO': 'Vimeo',
219 }
220 _QUALITIES = [
221 ('mobile', 'SD'),
222 ('web', 'MD'),
223 ('high', 'HD'),
224 ]
225 _LOGIN_URL = 'https://login.rtbf.be/accounts.login'
226 _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
227 _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
228
229 def _perform_login(self, username, password):
230 if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
231 return
232
233 self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
234
235 login_response = self._download_json(
236 self._LOGIN_URL, None, data=urllib.parse.urlencode({
237 'loginID': username,
238 'password': password,
239 'APIKey': self._GIGYA_API_KEY,
240 'targetEnv': 'jssdk',
241 'sessionExpiration': '-2',
242 }).encode('utf-8'), headers={
243 'Content-Type': 'application/x-www-form-urlencoded',
244 })
245
246 if login_response['statusCode'] != 200:
247 raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True)
248
249 self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
250 secure=True, expire_time=time.time() + 3600)
251
252 def _get_formats_and_subtitles(self, url, media_id):
253 login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
254 if not login_token:
255 self.raise_login_required()
256
b85703d1 257 session_jwt = try_call(lambda: self._get_cookies(url)['rtbf_jwt'].value) or self._download_json(
2a5e5477
E
258 'https://login.rtbf.be/accounts.getJWT', media_id, query={
259 'login_token': login_token.value,
260 'APIKey': self._GIGYA_API_KEY,
261 'sdk': 'js_latest',
262 'authMode': 'cookie',
263 'pageURL': url,
264 'sdkBuild': '13273',
265 'format': 'json',
266 })['id_token']
267
268 return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
269
270 def _real_extract(self, url):
271 live, media_id = self._match_valid_url(url).groups()
272 embed_page = self._download_webpage(
273 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
274 media_id, query={'id': media_id})
2b3e43e2
E
275
276 media_data = self._html_search_regex(r'data-media="([^"]+)"', embed_page, 'media data', fatal=False)
277 if not media_data:
278 if re.search(r'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page):
279 raise ExtractorError('Livestream has ended.', expected=True)
280 if re.search(r'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page):
281 self.raise_login_required()
282
283 raise ExtractorError('Could not find media data')
284
285 data = self._parse_json(media_data, media_id)
2a5e5477
E
286
287 error = data.get('error')
288 if error:
289 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
290
291 provider = data.get('provider')
292 if provider in self._PROVIDERS:
293 return self.url_result(data['url'], self._PROVIDERS[provider])
294
2b3e43e2 295 title = traverse_obj(data, 'subtitle', 'title')
2a5e5477
E
296 is_live = data.get('isLive')
297 height_re = r'-(\d+)p\.'
2b3e43e2 298 formats, subtitles = [], {}
2a5e5477 299
2b3e43e2
E
300 # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
301 # since all they contain is a 20s video that is completely unrelated.
302 # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092
303 m3u8_url = None if data.get('isLive') else traverse_obj(data, 'urlHlsAes128', 'urlHls')
2a5e5477 304 if m3u8_url:
2b3e43e2
E
305 fmts, subs = self._extract_m3u8_formats_and_subtitles(
306 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False)
307 formats.extend(fmts)
308 self._merge_subtitles(subs, target=subtitles)
2a5e5477
E
309
310 fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
311 http_url = data.get('url')
312 if formats and http_url and re.search(height_re, http_url):
313 http_url = fix_url(http_url)
314 for m3u8_f in formats[:]:
315 height = m3u8_f.get('height')
316 if not height:
317 continue
318 f = m3u8_f.copy()
319 del f['protocol']
320 f.update({
321 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
322 'url': re.sub(height_re, '-%dp.' % height, http_url),
323 })
324 formats.append(f)
325 else:
326 sources = data.get('sources') or {}
327 for key, format_id in self._QUALITIES:
328 format_url = sources.get(key)
329 if not format_url:
330 continue
331 height = int_or_none(self._search_regex(
332 height_re, format_url, 'height', default=None))
333 formats.append({
334 'format_id': format_id,
335 'url': fix_url(format_url),
336 'height': height,
337 })
338
2b3e43e2 339 mpd_url = None if data.get('isLive') else data.get('urlDash')
2a5e5477 340 if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
2b3e43e2
E
341 fmts, subs = self._extract_mpd_formats_and_subtitles(
342 mpd_url, media_id, mpd_id='dash', fatal=False)
343 formats.extend(fmts)
344 self._merge_subtitles(subs, target=subtitles)
2a5e5477
E
345
346 audio_url = data.get('urlAudio')
347 if audio_url:
348 formats.append({
349 'format_id': 'audio',
350 'url': audio_url,
351 'vcodec': 'none',
352 })
353
2a5e5477
E
354 for track in (data.get('tracks') or {}).values():
355 sub_url = track.get('url')
356 if not sub_url:
357 continue
358 subtitles.setdefault(track.get('lang') or 'fr', []).append({
359 'url': sub_url,
360 })
361
362 if not formats:
2b3e43e2 363 fmts, subs = self._get_formats_and_subtitles(url, f'live_{media_id}' if is_live else media_id)
2a5e5477
E
364 formats.extend(fmts)
365 self._merge_subtitles(subs, target=subtitles)
366
2a5e5477
E
367 return {
368 'id': media_id,
369 'formats': formats,
370 'title': title,
371 'description': strip_or_none(data.get('description')),
372 'thumbnail': data.get('thumbnail'),
373 'duration': float_or_none(data.get('realDuration')),
374 'timestamp': int_or_none(data.get('liveFrom')),
375 'series': data.get('programLabel'),
376 'subtitles': subtitles,
377 'is_live': is_live,
9f14daf2 378 '_format_sort_fields': ('res', 'proto'),
2a5e5477 379 }