]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nebula.py
[ie/ERRJupiter] Add extractor (#8549)
[yt-dlp.git] / yt_dlp / extractor / nebula.py
CommitLineData
359df0fc 1import itertools
bdc196a4 2import json
bdc196a4 3
359df0fc 4from .common import InfoExtractor
3d2623a8 5from ..networking.exceptions import HTTPError
45d82be6 6from ..utils import (
7 ExtractorError,
8 int_or_none,
9 make_archive_id,
10 parse_iso8601,
11 smuggle_url,
12 try_call,
13 unsmuggle_url,
14 update_url_query,
15 url_or_none,
16 urljoin,
17)
18from ..utils.traversal import traverse_obj
359df0fc 19
cbfe2e5c 20_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
4cca2eb1 21
359df0fc
HH
22
23class NebulaBaseIE(InfoExtractor):
24 _NETRC_MACHINE = 'watchnebula'
45d82be6 25 _token = _api_token = None
359df0fc 26
45d82be6 27 def _perform_login(self, username, password):
28 try:
29 response = self._download_json(
30 'https://nebula.tv/auth/login/', None,
31 'Logging in to Nebula', 'Login failed',
32 data=json.dumps({'email': username, 'password': password}).encode(),
33 headers={'content-type': 'application/json'})
34 except ExtractorError as e:
35 if isinstance(e.cause, HTTPError) and e.cause.status == 400:
36 raise ExtractorError('Login failed: Invalid username or password', expected=True)
37 raise
38 self._api_token = traverse_obj(response, ('key', {str}))
39 if not self._api_token:
40 raise ExtractorError('Login failed: No token')
359df0fc 41
45d82be6 42 def _call_api(self, *args, **kwargs):
43 if self._token:
44 kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
359df0fc 45 try:
45d82be6 46 return self._download_json(*args, **kwargs)
47 except ExtractorError as e:
48 if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
359df0fc 49 raise
45d82be6 50 self.to_screen(
51 f'Reauthorizing with Nebula and retrying, because last API call resulted in error {e.cause.status}')
52 self._real_initialize()
53 if self._token:
54 kwargs.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}'
55 return self._download_json(*args, **kwargs)
359df0fc 56
45d82be6 57 def _real_initialize(self):
58 if not self._api_token:
59 self._api_token = try_call(
60 lambda: self._get_cookies('https://nebula.tv')['nebula_auth.apiToken'].value)
61 self._token = self._download_json(
62 'https://users.api.nebula.app/api/v1/authorization/', None,
63 headers={'Authorization': f'Token {self._api_token}'} if self._api_token else None,
64 note='Authorizing to Nebula', data=b'')['token']
bdc196a4 65
45d82be6 66 def _extract_formats(self, content_id, slug):
67 for retry in (False, True):
68 try:
69 fmts, subs = self._extract_m3u8_formats_and_subtitles(
70 f'https://content.api.nebula.app/{content_id.split(":")[0]}s/{content_id}/manifest.m3u8',
71 slug, 'mp4', query={
72 'token': self._token,
73 'app_version': '23.10.0',
74 'platform': 'ios',
75 })
76 return {'formats': fmts, 'subtitles': subs}
77 except ExtractorError as e:
78 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
79 self.raise_login_required()
80 if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
81 self.to_screen('Reauthorizing with Nebula and retrying, because fetching video resulted in error')
82 self._real_initialize()
83 continue
84 raise
359df0fc 85
45d82be6 86 def _extract_video_metadata(self, episode):
87 channel_url = traverse_obj(
88 episode, (('channel_slug', 'class_slug'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
359df0fc 89 return {
45d82be6 90 'id': episode['id'].partition(':')[2],
91 **traverse_obj(episode, {
92 'display_id': 'slug',
93 'title': 'title',
94 'description': 'description',
95 'timestamp': ('published_at', {parse_iso8601}),
96 'duration': ('duration', {int_or_none}),
97 'channel_id': 'channel_slug',
98 'uploader_id': 'channel_slug',
99 'channel': 'channel_title',
100 'uploader': 'channel_title',
101 'series': 'channel_title',
102 'creator': 'channel_title',
103 'thumbnail': ('images', 'thumbnail', 'src', {url_or_none}),
104 'episode_number': ('order', {int_or_none}),
105 # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
106 '_old_archive_ids': ('zype_id', {lambda x: [
107 make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
108 }),
109 'channel_url': channel_url,
110 'uploader_url': channel_url,
359df0fc
HH
111 }
112
359df0fc
HH
113
114class NebulaIE(NebulaBaseIE):
4cca2eb1 115 _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
45d82be6 116 _TESTS = [{
117 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
118 'info_dict': {
119 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
120 'ext': 'mp4',
121 'title': 'That Time Disney Remade Beauty and the Beast',
122 'description': 'md5:2aae3c4cfc5ee09a1ecdff0909618cf4',
123 'upload_date': '20180731',
124 'timestamp': 1533009600,
125 'channel': 'Lindsay Ellis',
126 'channel_id': 'lindsayellis',
127 'uploader': 'Lindsay Ellis',
128 'uploader_id': 'lindsayellis',
129 'uploader_url': r're:https://nebula\.(tv|app)/lindsayellis',
130 'series': 'Lindsay Ellis',
131 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
132 'channel_url': r're:https://nebula\.(tv|app)/lindsayellis',
133 'creator': 'Lindsay Ellis',
134 'duration': 2212,
135 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
136 '_old_archive_ids': ['nebula 5c271b40b13fd613090034fd', 'nebulasubscriptions 5c271b40b13fd613090034fd'],
bdc196a4 137 },
45d82be6 138 'params': {'skip_download': 'm3u8'},
139 }, {
140 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
141 'md5': 'd05739cf6c38c09322422f696b569c23',
142 'info_dict': {
143 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
144 'ext': 'mp4',
145 'title': 'Landing Craft - How The Allies Got Ashore',
146 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
147 'upload_date': '20200327',
148 'timestamp': 1585348140,
149 'channel': 'Real Engineering — The Logistics of D-Day',
150 'channel_id': 'd-day',
151 'uploader': 'Real Engineering — The Logistics of D-Day',
152 'uploader_id': 'd-day',
153 'series': 'Real Engineering — The Logistics of D-Day',
154 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
155 'creator': 'Real Engineering — The Logistics of D-Day',
156 'duration': 841,
157 'channel_url': 'https://nebula.tv/d-day',
158 'uploader_url': 'https://nebula.tv/d-day',
159 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
160 '_old_archive_ids': ['nebula 5e7e78171aaf320001fbd6be', 'nebulasubscriptions 5e7e78171aaf320001fbd6be'],
bdc196a4 161 },
45d82be6 162 'params': {'skip_download': 'm3u8'},
163 }, {
164 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
165 'md5': 'ebe28a7ad822b9ee172387d860487868',
166 'info_dict': {
167 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
168 'ext': 'mp4',
169 'title': 'Episode 1: The Draw',
170 'description': r'contains:There’s free money on offer… if the players can all work together.',
171 'upload_date': '20200323',
172 'timestamp': 1584980400,
173 'channel': 'Tom Scott Presents: Money',
174 'channel_id': 'tom-scott-presents-money',
175 'uploader': 'Tom Scott Presents: Money',
176 'uploader_id': 'tom-scott-presents-money',
177 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
178 'duration': 825,
179 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
180 'series': 'Tom Scott Presents: Money',
181 'display_id': 'money-episode-1-the-draw',
182 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
183 'creator': 'Tom Scott Presents: Money',
184 '_old_archive_ids': ['nebula 5e779ebdd157bc0001d1c75a', 'nebulasubscriptions 5e779ebdd157bc0001d1c75a'],
bdc196a4 185 },
45d82be6 186 'params': {'skip_download': 'm3u8'},
187 }, {
188 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
189 'only_matching': True,
190 }, {
191 'url': 'https://nebula.tv/videos/tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
192 'info_dict': {
193 'id': 'e389af9d-1dab-44f2-8788-ee24deb7ff0d',
194 'ext': 'mp4',
195 'display_id': 'tldrnewseu-did-the-us-really-blow-up-the-nordstream-pipelines',
196 'title': 'Did the US Really Blow Up the NordStream Pipelines?',
197 'description': 'md5:b4e2a14e3ff08f546a3209c75261e789',
198 'upload_date': '20230223',
199 'timestamp': 1677144070,
200 'channel': 'TLDR News EU',
201 'channel_id': 'tldrnewseu',
202 'uploader': 'TLDR News EU',
203 'uploader_id': 'tldrnewseu',
204 'uploader_url': r're:https://nebula\.(tv|app)/tldrnewseu',
205 'duration': 524,
206 'channel_url': r're:https://nebula\.(tv|app)/tldrnewseu',
207 'series': 'TLDR News EU',
208 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+',
209 'creator': 'TLDR News EU',
210 '_old_archive_ids': ['nebula 63f64c74366fcd00017c1513', 'nebulasubscriptions 63f64c74366fcd00017c1513'],
cbfe2e5c 211 },
45d82be6 212 'params': {'skip_download': 'm3u8'},
213 }, {
214 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
215 'only_matching': True,
216 }]
bdc196a4 217
359df0fc
HH
218 def _real_extract(self, url):
219 slug = self._match_id(url)
45d82be6 220 url, smuggled_data = unsmuggle_url(url, {})
221 if smuggled_data.get('id'):
222 return {
223 'id': smuggled_data['id'],
224 'display_id': slug,
225 'title': '',
226 **self._extract_formats(smuggled_data['id'], slug),
227 }
228
229 metadata = self._call_api(
230 f'https://content.api.nebula.app/content/videos/{slug}',
231 slug, note='Fetching video metadata')
232 return {
233 **self._extract_video_metadata(metadata),
234 **self._extract_formats(metadata['id'], slug),
235 }
236
237
238class NebulaClassIE(NebulaBaseIE):
239 IE_NAME = 'nebula:class'
240 _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>[-\w]+)/(?P<ep>\d+)'
241 _TESTS = [{
242 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
243 'info_dict': {
244 'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
245 'ext': 'mp4',
246 'display_id': '14',
247 'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
248 'episode_number': 14,
249 'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
250 'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
251 'duration': 646,
252 'episode': 'Episode 14',
253 'title': 'Photos, Sculpture, and Video',
254 },
255 'params': {'skip_download': 'm3u8'},
256 }]
257
258 def _real_extract(self, url):
259 slug, episode = self._match_valid_url(url).group('id', 'ep')
260 url, smuggled_data = unsmuggle_url(url, {})
261 if smuggled_data.get('id'):
262 return {
263 'id': smuggled_data['id'],
264 'display_id': slug,
265 'title': '',
266 **self._extract_formats(smuggled_data['id'], slug),
267 }
268
269 metadata = self._call_api(
270 f'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
271 slug, note='Fetching video metadata')
272 return {
273 **self._extract_video_metadata(metadata),
274 **self._extract_formats(metadata['id'], slug),
275 }
bdc196a4 276
bdc196a4 277
f3b3fe16
HH
278class NebulaSubscriptionsIE(NebulaBaseIE):
279 IE_NAME = 'nebula:subscriptions'
45d82be6 280 _VALID_URL = rf'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)'
281 _TESTS = [{
282 'url': 'https://nebula.tv/myshows',
283 'playlist_mincount': 1,
284 'info_dict': {
285 'id': 'myshows',
f3b3fe16 286 },
45d82be6 287 }]
f3b3fe16
HH
288
289 def _generate_playlist_entries(self):
45d82be6 290 next_url = update_url_query('https://content.api.nebula.app/video_episodes/', {
291 'following': 'true',
292 'include': 'engagement',
293 'ordering': '-published_at',
294 })
295 for page_num in itertools.count(1):
296 channel = self._call_api(
297 next_url, 'myshows', note=f'Retrieving subscriptions page {page_num}')
f3b3fe16 298 for episode in channel['results']:
45d82be6 299 metadata = self._extract_video_metadata(episode)
300 yield self.url_result(smuggle_url(
301 f'https://nebula.tv/videos/{metadata["display_id"]}',
302 {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
303 next_url = channel.get('next')
304 if not next_url:
305 return
f3b3fe16
HH
306
307 def _real_extract(self, url):
308 return self.playlist_result(self._generate_playlist_entries(), 'myshows')
309
310
311class NebulaChannelIE(NebulaBaseIE):
312 IE_NAME = 'nebula:channel'
45d82be6 313 _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|library|videos/)(?P<id>[-\w]+)/?(?:$|[?#])'
314 _TESTS = [{
315 'url': 'https://nebula.tv/tom-scott-presents-money',
316 'info_dict': {
317 'id': 'tom-scott-presents-money',
318 'title': 'Tom Scott Presents: Money',
319 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
320 },
321 'playlist_count': 5,
322 }, {
323 'url': 'https://nebula.tv/lindsayellis',
324 'info_dict': {
325 'id': 'lindsayellis',
326 'title': 'Lindsay Ellis',
327 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
328 },
329 'playlist_mincount': 2,
330 }, {
331 'url': 'https://nebula.tv/johnnyharris',
332 'info_dict': {
333 'id': 'johnnyharris',
334 'title': 'Johnny Harris',
335 'description': 'I make videos about maps and many other things.',
359df0fc 336 },
45d82be6 337 'playlist_mincount': 90,
338 }, {
339 'url': 'https://nebula.tv/copyright-for-fun-and-profit',
340 'info_dict': {
341 'id': 'copyright-for-fun-and-profit',
342 'title': 'Copyright for Fun and Profit',
343 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
344 },
345 'playlist_count': 23,
346 }]
bdc196a4 347
45d82be6 348 def _generate_playlist_entries(self, collection_id, collection_slug):
349 next_url = f'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
350 for page_num in itertools.count(1):
351 episodes = self._call_api(next_url, collection_slug, note=f'Retrieving channel page {page_num}')
352 for episode in episodes['results']:
353 metadata = self._extract_video_metadata(episode)
354 yield self.url_result(smuggle_url(
355 episode.get('share_url') or f'https://nebula.tv/videos/{metadata["display_id"]}',
356 {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
357 next_url = episodes.get('next')
359df0fc
HH
358 if not next_url:
359 break
45d82be6 360
361 def _generate_class_entries(self, channel):
362 for lesson in channel['lessons']:
363 metadata = self._extract_video_metadata(lesson)
364 yield self.url_result(smuggle_url(
365 lesson.get('share_url') or f'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
366 {'id': lesson['id']}), NebulaClassIE, url_transparent=True, **metadata)
bdc196a4
GS
367
368 def _real_extract(self, url):
45d82be6 369 collection_slug = self._match_id(url)
370 channel = self._call_api(
371 f'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
372 collection_slug, note='Retrieving channel')
373
374 if channel.get('type') == 'class':
375 entries = self._generate_class_entries(channel)
376 else:
377 entries = self._generate_playlist_entries(channel['id'], collection_slug)
bdc196a4 378
359df0fc 379 return self.playlist_result(
45d82be6 380 entries=entries,
381 playlist_id=collection_slug,
382 playlist_title=channel.get('title'),
383 playlist_description=channel.get('description'))