]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/redbee.py
[youtube] Fix error reporting of "Incomplete data"
[yt-dlp.git] / yt_dlp / extractor / redbee.py
1 import json
2 import re
3 import time
4 import urllib.parse
5 import uuid
6
7 from .common import InfoExtractor
8 from ..utils import (
9 ExtractorError,
10 float_or_none,
11 int_or_none,
12 strip_or_none,
13 traverse_obj,
14 unified_timestamp,
15 )
16
17
18 class RedBeeBaseIE(InfoExtractor):
19 _DEVICE_ID = str(uuid.uuid4())
20
21 @property
22 def _API_URL(self):
23 """
24 Ref: https://apidocs.emp.ebsd.ericsson.net
25 Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
26 """
27 return f'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
28
29 def _get_bearer_token(self, asset_id, jwt=None):
30 request = {
31 'deviceId': self._DEVICE_ID,
32 'device': {
33 'deviceId': self._DEVICE_ID,
34 'name': 'Mozilla Firefox 102',
35 'type': 'WEB',
36 },
37 }
38 if jwt:
39 request['jwt'] = jwt
40
41 return self._download_json(
42 f'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
43 asset_id, data=json.dumps(request).encode('utf-8'), headers={
44 'Content-Type': 'application/json;charset=utf-8'
45 })['sessionToken']
46
47 def _get_formats_and_subtitles(self, asset_id, **kwargs):
48 bearer_token = self._get_bearer_token(asset_id, **kwargs)
49 api_response = self._download_json(
50 f'{self._API_URL}/entitlement/{asset_id}/play',
51 asset_id, headers={
52 'Authorization': f'Bearer {bearer_token}',
53 'Accept': 'application/json, text/plain, */*'
54 })
55
56 formats, subtitles = [], {}
57 for format in api_response['formats']:
58 if not format.get('mediaLocator'):
59 continue
60
61 fmts, subs = [], {}
62 if format.get('format') == 'DASH':
63 fmts, subs = self._extract_mpd_formats_and_subtitles(
64 format['mediaLocator'], asset_id, fatal=False)
65 elif format.get('format') == 'SMOOTHSTREAMING':
66 fmts, subs = self._extract_ism_formats_and_subtitles(
67 format['mediaLocator'], asset_id, fatal=False)
68 elif format.get('format') == 'HLS':
69 fmts, subs = self._extract_m3u8_formats_and_subtitles(
70 format['mediaLocator'], asset_id, fatal=False)
71
72 formats.extend(fmts)
73 self._merge_subtitles(subs, target=subtitles)
74
75 return formats, subtitles
76
77
78 class ParliamentLiveUKIE(RedBeeBaseIE):
79 IE_NAME = 'parliamentlive.tv'
80 IE_DESC = 'UK parliament videos'
81 _VALID_URL = r'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
82
83 _REDBEE_CUSTOMER = 'UKParliament'
84 _REDBEE_BUSINESS_UNIT = 'ParliamentLive'
85
86 _TESTS = [{
87 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
88 'info_dict': {
89 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
90 'ext': 'mp4',
91 'title': 'Home Affairs Committee',
92 'timestamp': 1395153872,
93 'upload_date': '20140318',
94 'thumbnail': r're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
95 },
96 }, {
97 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
98 'only_matching': True,
99 }, {
100 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
101 'info_dict': {
102 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
103 'ext': 'mp4',
104 'title': 'House of Commons',
105 'timestamp': 1658392447,
106 'upload_date': '20220721',
107 'thumbnail': r're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
108 },
109 }]
110
111 def _real_extract(self, url):
112 video_id = self._match_id(url)
113
114 formats, subtitles = self._get_formats_and_subtitles(video_id)
115 self._sort_formats(formats)
116
117 video_info = self._download_json(
118 f'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id, fatal=False)
119
120 self._sort_formats(formats, ['res', 'proto'])
121
122 return {
123 'id': video_id,
124 'formats': formats,
125 'subtitles': subtitles,
126 'title': traverse_obj(video_info, ('event', 'title')),
127 'thumbnail': traverse_obj(video_info, 'thumbnailUrl'),
128 'timestamp': traverse_obj(
129 video_info, ('event', 'publishedStartTime'), expected_type=unified_timestamp),
130 }
131
132
133 class RTBFIE(RedBeeBaseIE):
134 _VALID_URL = r'''(?x)
135 https?://(?:www\.)?rtbf\.be/
136 (?:
137 video/[^?]+\?.*\bid=|
138 ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
139 auvio/[^/]+\?.*\b(?P<live>l)?id=
140 )(?P<id>\d+)'''
141 _NETRC_MACHINE = 'rtbf'
142
143 _REDBEE_CUSTOMER = 'RTBF'
144 _REDBEE_BUSINESS_UNIT = 'Auvio'
145
146 _TESTS = [{
147 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
148 'md5': '8c876a1cceeb6cf31b476461ade72384',
149 'info_dict': {
150 'id': '1921274',
151 'ext': 'mp4',
152 'title': 'Les Diables au coeur (épisode 2)',
153 'description': '(du 25/04/2014)',
154 'duration': 3099.54,
155 'upload_date': '20140425',
156 'timestamp': 1398456300,
157 },
158 'skip': 'No longer available',
159 }, {
160 # geo restricted
161 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
162 'only_matching': True,
163 }, {
164 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
165 'only_matching': True,
166 }, {
167 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
168 'only_matching': True,
169 }, {
170 # Live
171 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
172 'only_matching': True,
173 }, {
174 # Audio
175 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
176 'only_matching': True,
177 }, {
178 # With Subtitle
179 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
180 'only_matching': True,
181 }, {
182 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
183 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
184 'info_dict': {
185 'id': '2921926',
186 'ext': 'mp4',
187 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
188 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
189 'duration': 5258.8,
190 'upload_date': '20220727',
191 'timestamp': 1658934000,
192 'series': '#Investigation',
193 'thumbnail': r're:^https?://[^?&]+\.jpg$',
194 },
195 }, {
196 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
197 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
198 'info_dict': {
199 'id': '2920492',
200 'ext': 'mp4',
201 'title': '04 - Le crime de la rue Royale',
202 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
203 'duration': 1574.6,
204 'upload_date': '20220723',
205 'timestamp': 1658596887,
206 'series': 'La Belgique criminelle - TV',
207 'thumbnail': r're:^https?://[^?&]+\.jpg$',
208 },
209 }]
210
211 _IMAGE_HOST = 'http://ds1.ds.static.rtbf.be'
212 _PROVIDERS = {
213 'YOUTUBE': 'Youtube',
214 'DAILYMOTION': 'Dailymotion',
215 'VIMEO': 'Vimeo',
216 }
217 _QUALITIES = [
218 ('mobile', 'SD'),
219 ('web', 'MD'),
220 ('high', 'HD'),
221 ]
222 _LOGIN_URL = 'https://login.rtbf.be/accounts.login'
223 _GIGYA_API_KEY = '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
224 _LOGIN_COOKIE_ID = f'glt_{_GIGYA_API_KEY}'
225
226 def _perform_login(self, username, password):
227 if self._get_cookies(self._LOGIN_URL).get(self._LOGIN_COOKIE_ID):
228 return
229
230 self._set_cookie('.rtbf.be', 'gmid', 'gmid.ver4', secure=True, expire_time=time.time() + 3600)
231
232 login_response = self._download_json(
233 self._LOGIN_URL, None, data=urllib.parse.urlencode({
234 'loginID': username,
235 'password': password,
236 'APIKey': self._GIGYA_API_KEY,
237 'targetEnv': 'jssdk',
238 'sessionExpiration': '-2',
239 }).encode('utf-8'), headers={
240 'Content-Type': 'application/x-www-form-urlencoded',
241 })
242
243 if login_response['statusCode'] != 200:
244 raise ExtractorError('Login failed. Server message: %s' % login_response['errorMessage'], expected=True)
245
246 self._set_cookie('.rtbf.be', self._LOGIN_COOKIE_ID, login_response['sessionInfo']['login_token'],
247 secure=True, expire_time=time.time() + 3600)
248
249 def _get_formats_and_subtitles(self, url, media_id):
250 login_token = self._get_cookies(url).get(self._LOGIN_COOKIE_ID)
251 if not login_token:
252 self.raise_login_required()
253
254 session_jwt = self._download_json(
255 'https://login.rtbf.be/accounts.getJWT', media_id, query={
256 'login_token': login_token.value,
257 'APIKey': self._GIGYA_API_KEY,
258 'sdk': 'js_latest',
259 'authMode': 'cookie',
260 'pageURL': url,
261 'sdkBuild': '13273',
262 'format': 'json',
263 })['id_token']
264
265 return super()._get_formats_and_subtitles(media_id, jwt=session_jwt)
266
267 def _real_extract(self, url):
268 live, media_id = self._match_valid_url(url).groups()
269 embed_page = self._download_webpage(
270 'https://www.rtbf.be/auvio/embed/' + ('direct' if live else 'media'),
271 media_id, query={'id': media_id})
272 data = self._parse_json(self._html_search_regex(
273 r'data-media="([^"]+)"', embed_page, 'media data'), media_id)
274
275 error = data.get('error')
276 if error:
277 raise ExtractorError('%s said: %s' % (self.IE_NAME, error), expected=True)
278
279 provider = data.get('provider')
280 if provider in self._PROVIDERS:
281 return self.url_result(data['url'], self._PROVIDERS[provider])
282
283 title = data['subtitle']
284 is_live = data.get('isLive')
285 height_re = r'-(\d+)p\.'
286 formats = []
287
288 m3u8_url = data.get('urlHlsAes128') or data.get('urlHls')
289 if m3u8_url:
290 formats.extend(self._extract_m3u8_formats(
291 m3u8_url, media_id, 'mp4', m3u8_id='hls', fatal=False))
292
293 fix_url = lambda x: x.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x else x
294 http_url = data.get('url')
295 if formats and http_url and re.search(height_re, http_url):
296 http_url = fix_url(http_url)
297 for m3u8_f in formats[:]:
298 height = m3u8_f.get('height')
299 if not height:
300 continue
301 f = m3u8_f.copy()
302 del f['protocol']
303 f.update({
304 'format_id': m3u8_f['format_id'].replace('hls-', 'http-'),
305 'url': re.sub(height_re, '-%dp.' % height, http_url),
306 })
307 formats.append(f)
308 else:
309 sources = data.get('sources') or {}
310 for key, format_id in self._QUALITIES:
311 format_url = sources.get(key)
312 if not format_url:
313 continue
314 height = int_or_none(self._search_regex(
315 height_re, format_url, 'height', default=None))
316 formats.append({
317 'format_id': format_id,
318 'url': fix_url(format_url),
319 'height': height,
320 })
321
322 mpd_url = data.get('urlDash')
323 if mpd_url and (self.get_param('allow_unplayable_formats') or not data.get('drm')):
324 formats.extend(self._extract_mpd_formats(
325 mpd_url, media_id, mpd_id='dash', fatal=False))
326
327 audio_url = data.get('urlAudio')
328 if audio_url:
329 formats.append({
330 'format_id': 'audio',
331 'url': audio_url,
332 'vcodec': 'none',
333 })
334
335 subtitles = {}
336 for track in (data.get('tracks') or {}).values():
337 sub_url = track.get('url')
338 if not sub_url:
339 continue
340 subtitles.setdefault(track.get('lang') or 'fr', []).append({
341 'url': sub_url,
342 })
343
344 if not formats:
345 fmts, subs = self._get_formats_and_subtitles(url, media_id)
346 formats.extend(fmts)
347 self._merge_subtitles(subs, target=subtitles)
348
349 self._sort_formats(formats, ['res', 'proto'])
350 return {
351 'id': media_id,
352 'formats': formats,
353 'title': title,
354 'description': strip_or_none(data.get('description')),
355 'thumbnail': data.get('thumbnail'),
356 'duration': float_or_none(data.get('realDuration')),
357 'timestamp': int_or_none(data.get('liveFrom')),
358 'series': data.get('programLabel'),
359 'subtitles': subtitles,
360 'is_live': is_live,
361 }