4 from .common
import InfoExtractor
5 from ..networking
.exceptions
import HTTPError
18 from ..utils
.traversal
import traverse_obj
20 _BASE_URL_RE
= r
'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
23 class NebulaBaseIE(InfoExtractor
):
24 _NETRC_MACHINE
= 'watchnebula'
25 _token
= _api_token
= None
27 def _perform_login(self
, username
, password
):
29 response
= self
._download
_json
(
30 'https://nebula.tv/auth/login/', None,
31 'Logging in to Nebula', 'Login failed',
32 data
=json
.dumps({'email': username, 'password': password}
).encode(),
33 headers
={'content-type': 'application/json'}
)
34 except ExtractorError
as e
:
35 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 400:
36 raise ExtractorError('Login failed: Invalid username or password', expected
=True)
38 self
._api
_token
= traverse_obj(response
, ('key', {str}
))
39 if not self
._api
_token
:
40 raise ExtractorError('Login failed: No token')
42 def _call_api(self
, *args
, **kwargs
):
44 kwargs
.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}
'
46 return self._download_json(*args, **kwargs)
47 except ExtractorError as e:
48 if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
51 f'Reauthorizing
with Nebula
and retrying
, because last API call resulted
in error {e.cause.status}
')
52 self._real_initialize()
54 kwargs.setdefault('headers
', {})['Authorization'] = f'Bearer {self._token}'
55 return self
._download
_json
(*args
, **kwargs
)
57 def _real_initialize(self
):
58 if not self
._api
_token
:
59 self
._api
_token
= try_call(
60 lambda: self
._get
_cookies
('https://nebula.tv')['nebula_auth.apiToken'].value
)
61 self
._token
= self
._download
_json
(
62 'https://users.api.nebula.app/api/v1/authorization/', None,
63 headers
={'Authorization': f'Token {self._api_token}
'} if self._api_token else None,
64 note='Authorizing to Nebula
', data=b'')['token
']
66 def _extract_formats(self, content_id, slug):
67 for retry in (False, True):
69 fmts, subs = self._extract_m3u8_formats_and_subtitles(
70 f'https
://content
.api
.nebula
.app
/{content_id.split(":")[0]}s
/{content_id}
/manifest
.m3u8
',
73 'app_version
': '23.10.0',
76 return {'formats': fmts, 'subtitles': subs}
77 except ExtractorError as e:
78 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
79 self.raise_login_required()
80 if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
81 self.to_screen('Reauthorizing
with Nebula
and retrying
, because fetching video resulted
in error
')
82 self._real_initialize()
86 def _extract_video_metadata(self, episode):
87 channel_url = traverse_obj(
88 episode, (('channel_slug
', 'class_slug
'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
90 'id': episode['id'].partition(':')[2],
91 **traverse_obj(episode, {
94 'description
': 'description
',
95 'timestamp
': ('published_at
', {parse_iso8601}),
96 'duration
': ('duration
', {int_or_none}),
97 'channel_id
': 'channel_slug
',
98 'uploader_id
': 'channel_slug
',
99 'channel
': 'channel_title
',
100 'uploader
': 'channel_title
',
101 'series
': 'channel_title
',
102 'creator
': 'channel_title
',
103 'thumbnail
': ('images
', 'thumbnail
', 'src
', {url_or_none}),
104 'episode_number
': ('order
', {int_or_none}),
105 # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
106 '_old_archive_ids
': ('zype_id
', {lambda x: [
107 make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
109 'channel_url
': channel_url,
110 'uploader_url
': channel_url,
114 class NebulaIE(NebulaBaseIE):
115 _VALID_URL = rf'{_BASE_URL_RE}
/videos
/(?P
<id>[-\w
]+)'
117 'url
': 'https
://nebula
.tv
/videos
/that
-time
-disney
-remade
-beauty
-and-the
-beast
',
119 'id': '84ed544d
-4afd
-4723-8cd5
-2b95261f0abf
',
121 'title
': 'That Time Disney Remade Beauty
and the Beast
',
122 'description
': 'md5
:2aae3c4cfc5ee09a1ecdff0909618cf4
',
123 'upload_date
': '20180731',
124 'timestamp
': 1533009600,
125 'channel
': 'Lindsay Ellis
',
126 'channel_id
': 'lindsayellis
',
127 'uploader
': 'Lindsay Ellis
',
128 'uploader_id
': 'lindsayellis
',
129 'uploader_url
': r're
:https
://nebula\
.(tv|app
)/lindsayellis
',
130 'series
': 'Lindsay Ellis
',
131 'display_id
': 'that
-time
-disney
-remade
-beauty
-and-the
-beast
',
132 'channel_url
': r're
:https
://nebula\
.(tv|app
)/lindsayellis
',
133 'creator
': 'Lindsay Ellis
',
135 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
136 '_old_archive_ids
': ['nebula
5c271b40b13fd613090034fd
', 'nebulasubscriptions
5c271b40b13fd613090034fd
'],
138 'params
': {'skip_download': 'm3u8'},
140 'url
': 'https
://nebula
.tv
/videos
/the
-logistics
-of
-d
-day
-landing
-craft
-how
-the
-allies
-got
-ashore
',
141 'md5
': 'd05739cf6c38c09322422f696b569c23
',
143 'id': '7e623145
-1b44
-4ca3
-aa0b
-ed25a247ea34
',
145 'title
': 'Landing Craft
- How The Allies Got Ashore
',
146 'description
': r're
:^In this episode we explore the unsung heroes of D
-Day
, the landing craft
.',
147 'upload_date
': '20200327',
148 'timestamp
': 1585348140,
149 'channel
': 'Real Engineering — The Logistics of D
-Day
',
150 'channel_id
': 'd
-day
',
151 'uploader
': 'Real Engineering — The Logistics of D
-Day
',
152 'uploader_id
': 'd
-day
',
153 'series
': 'Real Engineering — The Logistics of D
-Day
',
154 'display_id
': 'the
-logistics
-of
-d
-day
-landing
-craft
-how
-the
-allies
-got
-ashore
',
155 'creator
': 'Real Engineering — The Logistics of D
-Day
',
157 'channel_url
': 'https
://nebula
.tv
/d
-day
',
158 'uploader_url
': 'https
://nebula
.tv
/d
-day
',
159 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
160 '_old_archive_ids
': ['nebula
5e7e78171aaf320001fbd6be
', 'nebulasubscriptions
5e7e78171aaf320001fbd6be
'],
162 'params
': {'skip_download': 'm3u8'},
164 'url
': 'https
://nebula
.tv
/videos
/money
-episode
-1-the
-draw
',
165 'md5
': 'ebe28a7ad822b9ee172387d860487868
',
167 'id': 'b96c5714
-9e2b
-4ec3
-b3f1
-20f6e89cc553
',
169 'title
': 'Episode
1: The Draw
',
170 'description
': r'contains
:There’s free money on offer…
if the players can all work together
.',
171 'upload_date
': '20200323',
172 'timestamp
': 1584980400,
173 'channel
': 'Tom Scott Presents
: Money
',
174 'channel_id
': 'tom
-scott
-presents
-money
',
175 'uploader
': 'Tom Scott Presents
: Money
',
176 'uploader_id
': 'tom
-scott
-presents
-money
',
177 'uploader_url
': 'https
://nebula
.tv
/tom
-scott
-presents
-money
',
179 'channel_url
': 'https
://nebula
.tv
/tom
-scott
-presents
-money
',
180 'series
': 'Tom Scott Presents
: Money
',
181 'display_id
': 'money
-episode
-1-the
-draw
',
182 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
183 'creator
': 'Tom Scott Presents
: Money
',
184 '_old_archive_ids
': ['nebula
5e779ebdd157bc0001d1c75a
', 'nebulasubscriptions
5e779ebdd157bc0001d1c75a
'],
186 'params
': {'skip_download': 'm3u8'},
188 'url
': 'https
://watchnebula
.com
/videos
/money
-episode
-1-the
-draw
',
189 'only_matching
': True,
191 'url
': 'https
://nebula
.tv
/videos
/tldrnewseu
-did
-the
-us
-really
-blow
-up
-the
-nordstream
-pipelines
',
193 'id': 'e389af9d
-1dab
-44f2
-8788-ee24deb7ff0d
',
195 'display_id
': 'tldrnewseu
-did
-the
-us
-really
-blow
-up
-the
-nordstream
-pipelines
',
196 'title
': 'Did the US Really Blow Up the NordStream Pipelines?
',
197 'description
': 'md5
:b4e2a14e3ff08f546a3209c75261e789
',
198 'upload_date
': '20230223',
199 'timestamp
': 1677144070,
200 'channel
': 'TLDR News EU
',
201 'channel_id
': 'tldrnewseu
',
202 'uploader
': 'TLDR News EU
',
203 'uploader_id
': 'tldrnewseu
',
204 'uploader_url
': r're
:https
://nebula\
.(tv|app
)/tldrnewseu
',
206 'channel_url
': r're
:https
://nebula\
.(tv|app
)/tldrnewseu
',
207 'series
': 'TLDR News EU
',
208 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
209 'creator
': 'TLDR News EU
',
210 '_old_archive_ids
': ['nebula
63f64c74366fcd00017c1513
', 'nebulasubscriptions
63f64c74366fcd00017c1513
'],
212 'params
': {'skip_download': 'm3u8'},
214 'url
': 'https
://beta
.nebula
.tv
/videos
/money
-episode
-1-the
-draw
',
215 'only_matching
': True,
218 def _real_extract(self, url):
219 slug = self._match_id(url)
220 url, smuggled_data = unsmuggle_url(url, {})
221 if smuggled_data.get('id'):
223 'id': smuggled_data['id'],
226 **self._extract_formats(smuggled_data['id'], slug),
229 metadata = self._call_api(
230 f'https
://content
.api
.nebula
.app
/content
/videos
/{slug}
',
231 slug, note='Fetching video metadata
')
233 **self._extract_video_metadata(metadata),
234 **self._extract_formats(metadata['id'], slug),
238 class NebulaClassIE(NebulaBaseIE):
239 IE_NAME = 'nebula
:class'
240 _VALID_URL = rf'{_BASE_URL_RE}
/(?P
<id>[-\w
]+)/(?P
<ep
>\d
+)'
242 'url
': 'https
://nebula
.tv
/copyright
-for-fun
-and-profit
/14',
244 'id': 'd7432cdc
-c608
-474d
-942c
-f74345daed7b
',
247 'channel_url
': 'https
://nebula
.tv
/copyright
-for-fun
-and-profit
',
248 'episode_number
': 14,
249 'thumbnail
': 'https
://dj423fildxgac
.cloudfront
.net
/d533718d
-9307-42d4
-8fb0
-e283285e99c9
',
250 'uploader_url
': 'https
://nebula
.tv
/copyright
-for-fun
-and-profit
',
252 'episode
': 'Episode
14',
253 'title
': 'Photos
, Sculpture
, and Video
',
255 'params
': {'skip_download': 'm3u8'},
258 def _real_extract(self, url):
259 slug, episode = self._match_valid_url(url).group('id', 'ep
')
260 url, smuggled_data = unsmuggle_url(url, {})
261 if smuggled_data.get('id'):
263 'id': smuggled_data['id'],
266 **self._extract_formats(smuggled_data['id'], slug),
269 metadata = self._call_api(
270 f'https
://content
.api
.nebula
.app
/content
/{slug}
/{episode}
/?include
=lessons
',
271 slug, note='Fetching video metadata
')
273 **self._extract_video_metadata(metadata),
274 **self._extract_formats(metadata['id'], slug),
278 class NebulaSubscriptionsIE(NebulaBaseIE):
279 IE_NAME = 'nebula
:subscriptions
'
280 _VALID_URL = rf'{_BASE_URL_RE}
/(?P
<id>myshows|library
/latest
-videos
)'
282 'url
': 'https
://nebula
.tv
/myshows
',
283 'playlist_mincount
': 1,
289 def _generate_playlist_entries(self):
290 next_url = update_url_query('https
://content
.api
.nebula
.app
/video_episodes
/', {
292 'include
': 'engagement
',
293 'ordering
': '-published_at
',
295 for page_num in itertools.count(1):
296 channel = self._call_api(
297 next_url, 'myshows
', note=f'Retrieving subscriptions page {page_num}
')
298 for episode in channel['results
']:
299 metadata = self._extract_video_metadata(episode)
300 yield self.url_result(smuggle_url(
301 f'https
://nebula
.tv
/videos
/{metadata["display_id"]}
',
302 {'id': episode['id']}), NebulaIE, url_transparent=True, **metadata)
303 next_url = channel.get('next
')
307 def _real_extract(self, url):
308 return self.playlist_result(self._generate_playlist_entries(), 'myshows
')
311 class NebulaChannelIE(NebulaBaseIE):
312 IE_NAME = 'nebula
:channel
'
313 _VALID_URL = rf'{_BASE_URL_RE}
/(?
!myshows|library|videos
/)(?P
<id>[-\w
]+)/?
(?
:$|
[?
#])'
315 'url': 'https://nebula.tv/tom-scott-presents-money',
317 'id': 'tom-scott-presents-money',
318 'title': 'Tom Scott Presents: Money',
319 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
323 'url': 'https://nebula.tv/lindsayellis',
325 'id': 'lindsayellis',
326 'title': 'Lindsay Ellis',
327 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
329 'playlist_mincount': 2,
331 'url': 'https://nebula.tv/johnnyharris',
333 'id': 'johnnyharris',
334 'title': 'Johnny Harris',
335 'description': 'I make videos about maps and many other things.',
337 'playlist_mincount': 90,
339 'url': 'https://nebula.tv/copyright-for-fun-and-profit',
341 'id': 'copyright-for-fun-and-profit',
342 'title': 'Copyright for Fun and Profit',
343 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
345 'playlist_count': 23,
348 def _generate_playlist_entries(self
, collection_id
, collection_slug
):
349 next_url
= f
'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
350 for page_num
in itertools
.count(1):
351 episodes
= self
._call
_api
(next_url
, collection_slug
, note
=f
'Retrieving channel page {page_num}')
352 for episode
in episodes
['results']:
353 metadata
= self
._extract
_video
_metadata
(episode
)
354 yield self
.url_result(smuggle_url(
355 episode
.get('share_url') or f
'https://nebula.tv/videos/{metadata["display_id"]}',
356 {'id': episode['id']}
), NebulaIE
, url_transparent
=True, **metadata
)
357 next_url
= episodes
.get('next')
361 def _generate_class_entries(self
, channel
):
362 for lesson
in channel
['lessons']:
363 metadata
= self
._extract
_video
_metadata
(lesson
)
364 yield self
.url_result(smuggle_url(
365 lesson
.get('share_url') or f
'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
366 {'id': lesson['id']}
), NebulaClassIE
, url_transparent
=True, **metadata
)
368 def _real_extract(self
, url
):
369 collection_slug
= self
._match
_id
(url
)
370 channel
= self
._call
_api
(
371 f
'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
372 collection_slug
, note
='Retrieving channel')
374 if channel
.get('type') == 'class':
375 entries
= self
._generate
_class
_entries
(channel
)
377 entries
= self
._generate
_playlist
_entries
(channel
['id'], collection_slug
)
379 return self
.playlist_result(
381 playlist_id
=collection_slug
,
382 playlist_title
=channel
.get('title'),
383 playlist_description
=channel
.get('description'))