]>
jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/redbee.py
7 from .common
import InfoExtractor
19 class RedBeeBaseIE(InfoExtractor
):
20 _DEVICE_ID
= str(uuid
.uuid4())
25 Ref: https://apidocs.emp.ebsd.ericsson.net
26 Subclasses must set _REDBEE_CUSTOMER, _REDBEE_BUSINESS_UNIT
28 return f
'https://exposure.api.redbee.live/v2/customer/{self._REDBEE_CUSTOMER}/businessunit/{self._REDBEE_BUSINESS_UNIT}'
30 def _get_bearer_token(self
, asset_id
, jwt
=None):
32 'deviceId': self
._DEVICE
_ID
,
34 'deviceId': self
._DEVICE
_ID
,
35 'name': 'Mozilla Firefox 102',
42 return self
._download
_json
(
43 f
'{self._API_URL}/auth/{"gigyaLogin" if jwt else "anonymous"}',
44 asset_id
, data
=json
.dumps(request
).encode('utf-8'), headers
={
45 'Content-Type': 'application/json;charset=utf-8'
48 def _get_formats_and_subtitles(self
, asset_id
, **kwargs
):
49 bearer_token
= self
._get
_bearer
_token
(asset_id
, **kwargs
)
50 api_response
= self
._download
_json
(
51 f
'{self._API_URL}/entitlement/{asset_id}/play',
53 'Authorization': f
'Bearer {bearer_token}',
54 'Accept': 'application/json, text/plain, */*'
57 formats
, subtitles
= [], {}
58 for format
in api_response
['formats']:
59 if not format
.get('mediaLocator'):
63 if format
.get('format') == 'DASH':
64 fmts
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
65 format
['mediaLocator'], asset_id
, fatal
=False)
66 elif format
.get('format') == 'SMOOTHSTREAMING':
67 fmts
, subs
= self
._extract
_ism
_formats
_and
_subtitles
(
68 format
['mediaLocator'], asset_id
, fatal
=False)
69 elif format
.get('format') == 'HLS':
70 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
71 format
['mediaLocator'], asset_id
, fatal
=False)
78 self
._merge
_subtitles
(subs
, target
=subtitles
)
80 return formats
, subtitles
83 class ParliamentLiveUKIE(RedBeeBaseIE
):
84 IE_NAME
= 'parliamentlive.tv'
85 IE_DESC
= 'UK parliament videos'
86 _VALID_URL
= r
'(?i)https?://(?:www\.)?parliamentlive\.tv/Event/Index/(?P<id>[\da-f]{8}-[\da-f]{4}-[\da-f]{4}-[\da-f]{4}-[\da-f]{12})'
88 _REDBEE_CUSTOMER
= 'UKParliament'
89 _REDBEE_BUSINESS_UNIT
= 'ParliamentLive'
92 'url': 'http://parliamentlive.tv/Event/Index/c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
94 'id': 'c1e9d44d-fd6c-4263-b50f-97ed26cc998b',
96 'title': 'Home Affairs Committee',
97 'timestamp': 1395153872,
98 'upload_date': '20140318',
99 'thumbnail': r
're:https?://[^?#]+c1e9d44d-fd6c-4263-b50f-97ed26cc998b[^/]*/thumbnail',
102 'url': 'http://parliamentlive.tv/event/index/3f24936f-130f-40bf-9a5d-b3d6479da6a4',
103 'only_matching': True,
105 'url': 'https://parliamentlive.tv/Event/Index/27cf25e4-e77b-42a3-93c5-c815cd6d7377',
107 'id': '27cf25e4-e77b-42a3-93c5-c815cd6d7377',
109 'title': 'House of Commons',
110 'timestamp': 1658392447,
111 'upload_date': '20220721',
112 'thumbnail': r
're:https?://[^?#]+27cf25e4-e77b-42a3-93c5-c815cd6d7377[^/]*/thumbnail',
116 def _real_extract(self
, url
):
117 video_id
= self
._match
_id
(url
)
119 formats
, subtitles
= self
._get
_formats
_and
_subtitles
(video_id
)
120 self
._sort
_formats
(formats
)
122 video_info
= self
._download
_json
(
123 f
'https://www.parliamentlive.tv/Event/GetShareVideo/{video_id}', video_id
, fatal
=False)
125 self
._sort
_formats
(formats
, ['res', 'proto'])
130 'subtitles': subtitles
,
131 'title': traverse_obj(video_info
, ('event', 'title')),
132 'thumbnail': traverse_obj(video_info
, 'thumbnailUrl'),
133 'timestamp': traverse_obj(
134 video_info
, ('event', 'publishedStartTime'), expected_type
=unified_timestamp
),
138 class RTBFIE(RedBeeBaseIE
):
139 _VALID_URL
= r
'''(?x)
140 https?://(?:www\.)?rtbf\.be/
142 video/[^?]+\?.*\bid=|
143 ouftivi/(?:[^/]+/)*[^?]+\?.*\bvideoId=|
144 auvio/[^/]+\?.*\b(?P<live>l)?id=
146 _NETRC_MACHINE
= 'rtbf'
148 _REDBEE_CUSTOMER
= 'RTBF'
149 _REDBEE_BUSINESS_UNIT
= 'Auvio'
152 'url': 'https://www.rtbf.be/video/detail_les-diables-au-coeur-episode-2?id=1921274',
153 'md5': '8c876a1cceeb6cf31b476461ade72384',
157 'title': 'Les Diables au coeur (épisode 2)',
158 'description': '(du 25/04/2014)',
160 'upload_date': '20140425',
161 'timestamp': 1398456300,
163 'skip': 'No longer available',
166 'url': 'http://www.rtbf.be/ouftivi/heros/detail_scooby-doo-mysteres-associes?id=1097&videoId=2057442',
167 'only_matching': True,
169 'url': 'http://www.rtbf.be/ouftivi/niouzz?videoId=2055858',
170 'only_matching': True,
172 'url': 'http://www.rtbf.be/auvio/detail_jeudi-en-prime-siegfried-bracke?id=2102996',
173 'only_matching': True,
176 'url': 'https://www.rtbf.be/auvio/direct_pure-fm?lid=134775',
177 'only_matching': True,
180 'url': 'https://www.rtbf.be/auvio/detail_cinq-heures-cinema?id=2360811',
181 'only_matching': True,
184 'url': 'https://www.rtbf.be/auvio/detail_les-carnets-du-bourlingueur?id=2361588',
185 'only_matching': True,
187 'url': 'https://www.rtbf.be/auvio/detail_investigation?id=2921926',
188 'md5': 'd5d11bb62169fef38d7ce7ac531e034f',
192 'title': 'Le handicap un confinement perpétuel - Maladie de Lyme',
193 'description': 'md5:dcbd5dcf6015488c9069b057c15ccc52',
195 'upload_date': '20220727',
196 'timestamp': 1658934000,
197 'series': '#Investigation',
198 'thumbnail': r
're:^https?://[^?&]+\.jpg$',
201 'url': 'https://www.rtbf.be/auvio/detail_la-belgique-criminelle?id=2920492',
202 'md5': '054f9f143bc79c89647c35e5a7d35fa8',
206 'title': '04 - Le crime de la rue Royale',
207 'description': 'md5:0c3da1efab286df83f2ab3f8f96bd7a6',
209 'upload_date': '20220723',
210 'timestamp': 1658596887,
211 'series': 'La Belgique criminelle - TV',
212 'thumbnail': r
're:^https?://[^?&]+\.jpg$',
216 _IMAGE_HOST
= 'http://ds1.ds.static.rtbf.be'
218 'YOUTUBE': 'Youtube',
219 'DAILYMOTION': 'Dailymotion',
227 _LOGIN_URL
= 'https://login.rtbf.be/accounts.login'
228 _GIGYA_API_KEY
= '3_kWKuPgcdAybqnqxq_MvHVk0-6PN8Zk8pIIkJM_yXOu-qLPDDsGOtIDFfpGivtbeO'
229 _LOGIN_COOKIE_ID
= f
'glt_{_GIGYA_API_KEY}'
231 def _perform_login(self
, username
, password
):
232 if self
._get
_cookies
(self
._LOGIN
_URL
).get(self
._LOGIN
_COOKIE
_ID
):
235 self
._set
_cookie
('.rtbf.be', 'gmid', 'gmid.ver4', secure
=True, expire_time
=time
.time() + 3600)
237 login_response
= self
._download
_json
(
238 self
._LOGIN
_URL
, None, data
=urllib
.parse
.urlencode({
240 'password': password
,
241 'APIKey': self
._GIGYA
_API
_KEY
,
242 'targetEnv': 'jssdk',
243 'sessionExpiration': '-2',
244 }).encode('utf-8'), headers
={
245 'Content-Type': 'application/x-www-form-urlencoded',
248 if login_response
['statusCode'] != 200:
249 raise ExtractorError('Login failed. Server message: %s' % login_response
['errorMessage'], expected
=True)
251 self
._set
_cookie
('.rtbf.be', self
._LOGIN
_COOKIE
_ID
, login_response
['sessionInfo']['login_token'],
252 secure
=True, expire_time
=time
.time() + 3600)
254 def _get_formats_and_subtitles(self
, url
, media_id
):
255 login_token
= self
._get
_cookies
(url
).get(self
._LOGIN
_COOKIE
_ID
)
257 self
.raise_login_required()
259 session_jwt
= try_call(lambda: self
._get
_cookies
(url
)['rtbf_jwt'].value
) or self
._download
_json
(
260 'https://login.rtbf.be/accounts.getJWT', media_id
, query
={
261 'login_token': login_token
.value
,
262 'APIKey': self
._GIGYA
_API
_KEY
,
264 'authMode': 'cookie',
270 return super()._get
_formats
_and
_subtitles
(media_id
, jwt
=session_jwt
)
272 def _real_extract(self
, url
):
273 live
, media_id
= self
._match
_valid
_url
(url
).groups()
274 embed_page
= self
._download
_webpage
(
275 'https://www.rtbf.be/auvio/embed/' + ('direct' if live
else 'media'),
276 media_id
, query
={'id': media_id}
)
278 media_data
= self
._html
_search
_regex
(r
'data-media="([^"]+)"', embed_page
, 'media data', fatal
=False)
280 if re
.search(r
'<div[^>]+id="js-error-expired"[^>]+class="(?![^"]*hidden)', embed_page
):
281 raise ExtractorError('Livestream has ended.', expected
=True)
282 if re
.search(r
'<div[^>]+id="js-sso-connect"[^>]+class="(?![^"]*hidden)', embed_page
):
283 self
.raise_login_required()
285 raise ExtractorError('Could not find media data')
287 data
= self
._parse
_json
(media_data
, media_id
)
289 error
= data
.get('error')
291 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, error
), expected
=True)
293 provider
= data
.get('provider')
294 if provider
in self
._PROVIDERS
:
295 return self
.url_result(data
['url'], self
._PROVIDERS
[provider
])
297 title
= traverse_obj(data
, 'subtitle', 'title')
298 is_live
= data
.get('isLive')
299 height_re
= r
'-(\d+)p\.'
300 formats
, subtitles
= [], {}
302 # The old api still returns m3u8 and mpd manifest for livestreams, but these are 'fake'
303 # since all they contain is a 20s video that is completely unrelated.
304 # https://github.com/yt-dlp/yt-dlp/issues/4656#issuecomment-1214461092
305 m3u8_url
= None if data
.get('isLive') else traverse_obj(data
, 'urlHlsAes128', 'urlHls')
307 fmts
, subs
= self
._extract
_m
3u8_formats
_and
_subtitles
(
308 m3u8_url
, media_id
, 'mp4', m3u8_id
='hls', fatal
=False)
310 self
._merge
_subtitles
(subs
, target
=subtitles
)
312 fix_url
= lambda x
: x
.replace('//rtbf-vod.', '//rtbf.') if '/geo/drm/' in x
else x
313 http_url
= data
.get('url')
314 if formats
and http_url
and re
.search(height_re
, http_url
):
315 http_url
= fix_url(http_url
)
316 for m3u8_f
in formats
[:]:
317 height
= m3u8_f
.get('height')
323 'format_id': m3u8_f
['format_id'].replace('hls-', 'http-'),
324 'url': re
.sub(height_re
, '-%dp.' % height
, http_url
),
328 sources
= data
.get('sources') or {}
329 for key
, format_id
in self
._QUALITIES
:
330 format_url
= sources
.get(key
)
333 height
= int_or_none(self
._search
_regex
(
334 height_re
, format_url
, 'height', default
=None))
336 'format_id': format_id
,
337 'url': fix_url(format_url
),
341 mpd_url
= None if data
.get('isLive') else data
.get('urlDash')
342 if mpd_url
and (self
.get_param('allow_unplayable_formats') or not data
.get('drm')):
343 fmts
, subs
= self
._extract
_mpd
_formats
_and
_subtitles
(
344 mpd_url
, media_id
, mpd_id
='dash', fatal
=False)
346 self
._merge
_subtitles
(subs
, target
=subtitles
)
348 audio_url
= data
.get('urlAudio')
351 'format_id': 'audio',
356 for track
in (data
.get('tracks') or {}).values():
357 sub_url
= track
.get('url')
360 subtitles
.setdefault(track
.get('lang') or 'fr', []).append({
365 fmts
, subs
= self
._get
_formats
_and
_subtitles
(url
, f
'live_{media_id}' if is_live
else media_id
)
367 self
._merge
_subtitles
(subs
, target
=subtitles
)
369 self
._sort
_formats
(formats
, ['res', 'proto'])
374 'description': strip_or_none(data
.get('description')),
375 'thumbnail': data
.get('thumbnail'),
376 'duration': float_or_none(data
.get('realDuration')),
377 'timestamp': int_or_none(data
.get('liveFrom')),
378 'series': data
.get('programLabel'),
379 'subtitles': subtitles
,