4 from .art19
import Art19IE
5 from .common
import InfoExtractor
6 from ..networking
.exceptions
import HTTPError
19 from ..utils
.traversal
import traverse_obj
21 _BASE_URL_RE
= r
'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
24 class NebulaBaseIE(InfoExtractor
):
25 _NETRC_MACHINE
= 'watchnebula'
26 _token
= _api_token
= None
28 def _perform_login(self
, username
, password
):
30 response
= self
._download
_json
(
31 'https://nebula.tv/auth/login/', None,
32 'Logging in to Nebula', 'Login failed',
33 data
=json
.dumps({'email': username, 'password': password}
).encode(),
34 headers
={'content-type': 'application/json'}
)
35 except ExtractorError
as e
:
36 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 400:
37 raise ExtractorError('Login failed: Invalid username or password', expected
=True)
39 self
._api
_token
= traverse_obj(response
, ('key', {str}
))
40 if not self
._api
_token
:
41 raise ExtractorError('Login failed: No token')
43 def _call_api(self
, *args
, **kwargs
):
45 kwargs
.setdefault('headers', {})['Authorization'] = f'Bearer {self._token}
'
47 return self._download_json(*args, **kwargs)
48 except ExtractorError as e:
49 if not isinstance(e.cause, HTTPError) or e.cause.status not in (401, 403):
52 f'Reauthorizing
with Nebula
and retrying
, because last API call resulted
in error {e.cause.status}
')
53 self._real_initialize()
55 kwargs.setdefault('headers
', {})['Authorization'] = f'Bearer {self._token}'
56 return self
._download
_json
(*args
, **kwargs
)
58 def _real_initialize(self
):
59 if not self
._api
_token
:
60 self
._api
_token
= try_call(
61 lambda: self
._get
_cookies
('https://nebula.tv')['nebula_auth.apiToken'].value
)
62 self
._token
= self
._download
_json
(
63 'https://users.api.nebula.app/api/v1/authorization/', None,
64 headers
={'Authorization': f'Token {self._api_token}
'} if self._api_token else None,
65 note='Authorizing to Nebula
', data=b'')['token
']
67 def _extract_formats(self, content_id, slug):
68 for retry in (False, True):
70 fmts, subs = self._extract_m3u8_formats_and_subtitles(
71 f'https
://content
.api
.nebula
.app
/{content_id.split(":")[0]}s
/{content_id}
/manifest
.m3u8
',
74 'app_version
': '23.10.0',
77 return {'formats': fmts, 'subtitles': subs}
78 except ExtractorError as e:
79 if isinstance(e.cause, HTTPError) and e.cause.status == 401:
80 self.raise_login_required()
81 if not retry and isinstance(e.cause, HTTPError) and e.cause.status == 403:
82 self.to_screen('Reauthorizing
with Nebula
and retrying
, because fetching video resulted
in error
')
83 self._real_initialize()
87 def _extract_video_metadata(self, episode):
88 channel_url = traverse_obj(
89 episode, (('channel_slug
', 'class_slug
'), {lambda x: urljoin('https://nebula.tv/', x)}), get_all=False)
91 'id': episode['id'].partition(':')[2],
92 **traverse_obj(episode, {
95 'description
': 'description
',
96 'timestamp
': ('published_at
', {parse_iso8601}),
97 'duration
': ('duration
', {int_or_none}),
98 'channel_id
': 'channel_slug
',
99 'uploader_id
': 'channel_slug
',
100 'channel
': 'channel_title
',
101 'uploader
': 'channel_title
',
102 'series
': 'channel_title
',
103 'creator
': 'channel_title
',
104 'thumbnail
': ('images
', 'thumbnail
', 'src
', {url_or_none}),
105 'episode_number
': ('order
', {int_or_none}),
106 # Old code was wrongly setting extractor_key from NebulaSubscriptionsIE
107 '_old_archive_ids
': ('zype_id
', {lambda x: [
108 make_archive_id(NebulaIE, x), make_archive_id(NebulaSubscriptionsIE, x)] if x else None}),
110 'channel_url
': channel_url,
111 'uploader_url
': channel_url,
115 class NebulaIE(NebulaBaseIE):
116 IE_NAME = 'nebula
:video
'
117 _VALID_URL = rf'{_BASE_URL_RE}
/videos
/(?P
<id>[\w
-]+)'
119 'url
': 'https
://nebula
.tv
/videos
/that
-time
-disney
-remade
-beauty
-and-the
-beast
',
121 'id': '84ed544d
-4afd
-4723-8cd5
-2b95261f0abf
',
123 'title
': 'That Time Disney Remade Beauty
and the Beast
',
124 'description
': 'md5
:2aae3c4cfc5ee09a1ecdff0909618cf4
',
125 'upload_date
': '20180731',
126 'timestamp
': 1533009600,
127 'channel
': 'Lindsay Ellis
',
128 'channel_id
': 'lindsayellis
',
129 'uploader
': 'Lindsay Ellis
',
130 'uploader_id
': 'lindsayellis
',
131 'uploader_url
': r're
:https
://nebula\
.(tv|app
)/lindsayellis
',
132 'series
': 'Lindsay Ellis
',
133 'display_id
': 'that
-time
-disney
-remade
-beauty
-and-the
-beast
',
134 'channel_url
': r're
:https
://nebula\
.(tv|app
)/lindsayellis
',
135 'creator
': 'Lindsay Ellis
',
137 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
138 '_old_archive_ids
': ['nebula
5c271b40b13fd613090034fd
', 'nebulasubscriptions
5c271b40b13fd613090034fd
'],
140 'params
': {'skip_download': 'm3u8'},
142 'url
': 'https
://nebula
.tv
/videos
/the
-logistics
-of
-d
-day
-landing
-craft
-how
-the
-allies
-got
-ashore
',
143 'md5
': 'd05739cf6c38c09322422f696b569c23
',
145 'id': '7e623145
-1b44
-4ca3
-aa0b
-ed25a247ea34
',
147 'title
': 'Landing Craft
- How The Allies Got Ashore
',
148 'description
': r're
:^In this episode we explore the unsung heroes of D
-Day
, the landing craft
.',
149 'upload_date
': '20200327',
150 'timestamp
': 1585348140,
151 'channel
': 'Real Engineering — The Logistics of D
-Day
',
152 'channel_id
': 'd
-day
',
153 'uploader
': 'Real Engineering — The Logistics of D
-Day
',
154 'uploader_id
': 'd
-day
',
155 'series
': 'Real Engineering — The Logistics of D
-Day
',
156 'display_id
': 'the
-logistics
-of
-d
-day
-landing
-craft
-how
-the
-allies
-got
-ashore
',
157 'creator
': 'Real Engineering — The Logistics of D
-Day
',
159 'channel_url
': 'https
://nebula
.tv
/d
-day
',
160 'uploader_url
': 'https
://nebula
.tv
/d
-day
',
161 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
162 '_old_archive_ids
': ['nebula
5e7e78171aaf320001fbd6be
', 'nebulasubscriptions
5e7e78171aaf320001fbd6be
'],
164 'params
': {'skip_download': 'm3u8'},
166 'url
': 'https
://nebula
.tv
/videos
/money
-episode
-1-the
-draw
',
167 'md5
': 'ebe28a7ad822b9ee172387d860487868
',
169 'id': 'b96c5714
-9e2b
-4ec3
-b3f1
-20f6e89cc553
',
171 'title
': 'Episode
1: The Draw
',
172 'description
': r'contains
:There’s free money on offer…
if the players can all work together
.',
173 'upload_date
': '20200323',
174 'timestamp
': 1584980400,
175 'channel
': 'Tom Scott Presents
: Money
',
176 'channel_id
': 'tom
-scott
-presents
-money
',
177 'uploader
': 'Tom Scott Presents
: Money
',
178 'uploader_id
': 'tom
-scott
-presents
-money
',
179 'uploader_url
': 'https
://nebula
.tv
/tom
-scott
-presents
-money
',
181 'channel_url
': 'https
://nebula
.tv
/tom
-scott
-presents
-money
',
182 'series
': 'Tom Scott Presents
: Money
',
183 'display_id
': 'money
-episode
-1-the
-draw
',
184 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
185 'creator
': 'Tom Scott Presents
: Money
',
186 '_old_archive_ids
': ['nebula
5e779ebdd157bc0001d1c75a
', 'nebulasubscriptions
5e779ebdd157bc0001d1c75a
'],
188 'params
': {'skip_download': 'm3u8'},
190 'url
': 'https
://watchnebula
.com
/videos
/money
-episode
-1-the
-draw
',
191 'only_matching
': True,
193 'url
': 'https
://nebula
.tv
/videos
/tldrnewseu
-did
-the
-us
-really
-blow
-up
-the
-nordstream
-pipelines
',
195 'id': 'e389af9d
-1dab
-44f2
-8788-ee24deb7ff0d
',
197 'display_id
': 'tldrnewseu
-did
-the
-us
-really
-blow
-up
-the
-nordstream
-pipelines
',
198 'title
': 'Did the US Really Blow Up the NordStream Pipelines?
',
199 'description
': 'md5
:b4e2a14e3ff08f546a3209c75261e789
',
200 'upload_date
': '20230223',
201 'timestamp
': 1677144070,
202 'channel
': 'TLDR News EU
',
203 'channel_id
': 'tldrnewseu
',
204 'uploader
': 'TLDR News EU
',
205 'uploader_id
': 'tldrnewseu
',
206 'uploader_url
': r're
:https
://nebula\
.(tv|app
)/tldrnewseu
',
208 'channel_url
': r're
:https
://nebula\
.(tv|app
)/tldrnewseu
',
209 'series
': 'TLDR News EU
',
210 'thumbnail
': r're
:https
://\w
+\
.cloudfront\
.net
/[\w
-]+',
211 'creator
': 'TLDR News EU
',
212 '_old_archive_ids
': ['nebula
63f64c74366fcd00017c1513
', 'nebulasubscriptions
63f64c74366fcd00017c1513
'],
214 'params
': {'skip_download': 'm3u8'},
216 'url
': 'https
://beta
.nebula
.tv
/videos
/money
-episode
-1-the
-draw
',
217 'only_matching
': True,
220 def _real_extract(self, url):
221 slug = self._match_id(url)
222 url, smuggled_data = unsmuggle_url(url, {})
223 if smuggled_data.get('id'):
225 'id': smuggled_data['id'],
228 **self._extract_formats(smuggled_data['id'], slug),
231 metadata = self._call_api(
232 f'https
://content
.api
.nebula
.app
/content
/videos
/{slug}
',
233 slug, note='Fetching video metadata
')
235 **self._extract_video_metadata(metadata),
236 **self._extract_formats(metadata['id'], slug),
240 class NebulaClassIE(NebulaBaseIE):
241 IE_NAME = 'nebula
:media
'
242 _VALID_URL = rf'{_BASE_URL_RE}
/(?
!(?
:myshows|library|videos
)/)(?P
<id>[\w
-]+)/(?P
<ep
>[\w
-]+)/?
(?
:$|
[?
#])'
244 'url': 'https://nebula.tv/copyright-for-fun-and-profit/14',
246 'id': 'd7432cdc-c608-474d-942c-f74345daed7b',
249 'channel_url': 'https://nebula.tv/copyright-for-fun-and-profit',
250 'episode_number': 14,
251 'thumbnail': 'https://dj423fildxgac.cloudfront.net/d533718d-9307-42d4-8fb0-e283285e99c9',
252 'uploader_url': 'https://nebula.tv/copyright-for-fun-and-profit',
254 'episode': 'Episode 14',
255 'title': 'Photos, Sculpture, and Video',
257 'params': {'skip_download': 'm3u8'}
,
259 'url': 'https://nebula.tv/extremitiespodcast/pyramiden-the-high-arctic-soviet-ghost-town',
262 'id': '018f65f0-0033-4021-8f87-2d132beb19aa',
263 'description': 'md5:05d2b23ab780c955e2511a2b9127acff',
264 'series_id': '335e8159-d663-491a-888f-1732285706ac',
265 'modified_timestamp': 1599091504,
266 'episode_id': '018f65f0-0033-4021-8f87-2d132beb19aa',
267 'series': 'Extremities',
268 'modified_date': '20200903',
269 'upload_date': '20200902',
270 'title': 'Pyramiden: The High-Arctic Soviet Ghost Town',
271 'release_timestamp': 1571237958,
272 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
273 'duration': 1546.05714,
274 'timestamp': 1599085608,
275 'release_date': '20191016',
278 'url': 'https://nebula.tv/thelayover/the-layover-episode-1',
281 'id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
283 'thumbnail': r
're:^https?://content\.production\.cdn\.art19\.com.*\.jpeg$',
284 'release_date': '20230304',
285 'modified_date': '20230403',
286 'series': 'The Layover',
287 'episode_id': '9d74a762-00bb-45a8-9e8d-9ed47c04a1d0',
288 'modified_timestamp': 1680554566,
289 'duration': 3130.46401,
290 'release_timestamp': 1677943800,
291 'title': 'The Layover — Episode 1',
292 'series_id': '874303a5-4900-4626-a4b6-2aacac34466a',
293 'upload_date': '20230303',
294 'episode': 'Episode 1',
295 'timestamp': 1677883672,
296 'description': 'md5:002cca89258e3bc7c268d5b8c24ba482',
300 def _real_extract(self
, url
):
301 slug
, episode
= self
._match
_valid
_url
(url
).group('id', 'ep')
302 url
, smuggled_data
= unsmuggle_url(url
, {})
303 if smuggled_data
.get('id'):
305 'id': smuggled_data
['id'],
308 **self
._extract
_formats
(smuggled_data
['id'], slug
),
311 metadata
= self
._call
_api
(
312 f
'https://content.api.nebula.app/content/{slug}/{episode}/?include=lessons',
313 slug
, note
='Fetching class/podcast metadata')
314 content_type
= metadata
.get('type')
315 if content_type
== 'lesson':
317 **self
._extract
_video
_metadata
(metadata
),
318 **self
._extract
_formats
(metadata
['id'], slug
),
320 elif content_type
== 'podcast_episode':
321 episode_url
= metadata
['episode_url']
322 if not episode_url
and metadata
.get('premium'):
323 self
.raise_login_required()
325 if Art19IE
.suitable(episode_url
):
326 return self
.url_result(episode_url
, Art19IE
)
327 return traverse_obj(metadata
, {
329 'url': ('episode_url', {url_or_none}
),
330 'title': ('title', {str}
),
331 'description': ('description', {str}
),
332 'timestamp': ('published_at', {parse_iso8601}
),
333 'duration': ('duration', {int_or_none}
),
334 'channel_id': ('channel_id', {str}
),
335 'chnanel': ('channel_title', {str}
),
336 'thumbnail': ('assets', 'regular', {url_or_none}
),
339 raise ExtractorError(f
'Unexpected content type {content_type!r}')
342 class NebulaSubscriptionsIE(NebulaBaseIE
):
343 IE_NAME
= 'nebula:subscriptions'
344 _VALID_URL
= rf
'{_BASE_URL_RE}/(?P<id>myshows|library/latest-videos)/?(?:$|[?#])'
346 'url': 'https://nebula.tv/myshows',
347 'playlist_mincount': 1,
353 def _generate_playlist_entries(self
):
354 next_url
= update_url_query('https://content.api.nebula.app/video_episodes/', {
356 'include': 'engagement',
357 'ordering': '-published_at',
359 for page_num
in itertools
.count(1):
360 channel
= self
._call
_api
(
361 next_url
, 'myshows', note
=f
'Retrieving subscriptions page {page_num}')
362 for episode
in channel
['results']:
363 metadata
= self
._extract
_video
_metadata
(episode
)
364 yield self
.url_result(smuggle_url(
365 f
'https://nebula.tv/videos/{metadata["display_id"]}',
366 {'id': episode['id']}
), NebulaIE
, url_transparent
=True, **metadata
)
367 next_url
= channel
.get('next')
371 def _real_extract(self
, url
):
372 return self
.playlist_result(self
._generate
_playlist
_entries
(), 'myshows')
375 class NebulaChannelIE(NebulaBaseIE
):
376 IE_NAME
= 'nebula:channel'
377 _VALID_URL
= rf
'{_BASE_URL_RE}/(?!myshows|library|videos)(?P<id>[\w-]+)/?(?:$|[?#])'
379 'url': 'https://nebula.tv/tom-scott-presents-money',
381 'id': 'tom-scott-presents-money',
382 'title': 'Tom Scott Presents: Money',
383 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
387 'url': 'https://nebula.tv/lindsayellis',
389 'id': 'lindsayellis',
390 'title': 'Lindsay Ellis',
391 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
393 'playlist_mincount': 2,
395 'url': 'https://nebula.tv/johnnyharris',
397 'id': 'johnnyharris',
398 'title': 'Johnny Harris',
399 'description': 'I make videos about maps and many other things.',
401 'playlist_mincount': 90,
403 'url': 'https://nebula.tv/copyright-for-fun-and-profit',
405 'id': 'copyright-for-fun-and-profit',
406 'title': 'Copyright for Fun and Profit',
407 'description': 'md5:6690248223eed044a9f11cd5a24f9742',
409 'playlist_count': 23,
411 'url': 'https://nebula.tv/trussissuespodcast',
413 'id': 'trussissuespodcast',
414 'title': 'The TLDR News Podcast',
415 'description': 'md5:a08c4483bc0b705881d3e0199e721385',
417 'playlist_mincount': 80,
420 def _generate_playlist_entries(self
, collection_id
, collection_slug
):
421 next_url
= f
'https://content.api.nebula.app/video_channels/{collection_id}/video_episodes/?ordering=-published_at'
422 for page_num
in itertools
.count(1):
423 episodes
= self
._call
_api
(next_url
, collection_slug
, note
=f
'Retrieving channel page {page_num}')
424 for episode
in episodes
['results']:
425 metadata
= self
._extract
_video
_metadata
(episode
)
426 yield self
.url_result(smuggle_url(
427 episode
.get('share_url') or f
'https://nebula.tv/videos/{metadata["display_id"]}',
428 {'id': episode['id']}
), NebulaIE
, url_transparent
=True, **metadata
)
429 next_url
= episodes
.get('next')
433 def _generate_class_entries(self
, channel
):
434 for lesson
in channel
['lessons']:
435 metadata
= self
._extract
_video
_metadata
(lesson
)
436 yield self
.url_result(smuggle_url(
437 lesson
.get('share_url') or f
'https://nebula.tv/{metadata["class_slug"]}/{metadata["slug"]}',
438 {'id': lesson['id']}
), NebulaClassIE
, url_transparent
=True, **metadata
)
440 def _generate_podcast_entries(self
, collection_id
, collection_slug
):
441 next_url
= f
'https://content.api.nebula.app/podcast_channels/{collection_id}/podcast_episodes/?ordering=-published_at&premium=true'
442 for page_num
in itertools
.count(1):
443 episodes
= self
._call
_api
(next_url
, collection_slug
, note
=f
'Retrieving podcast page {page_num}')
445 for episode
in traverse_obj(episodes
, ('results', lambda _
, v
: url_or_none(v
['share_url']))):
446 yield self
.url_result(episode
['share_url'], NebulaClassIE
)
447 next_url
= episodes
.get('next')
451 def _real_extract(self
, url
):
452 collection_slug
= self
._match
_id
(url
)
453 channel
= self
._call
_api
(
454 f
'https://content.api.nebula.app/content/{collection_slug}/?include=lessons',
455 collection_slug
, note
='Retrieving channel')
457 if channel
.get('type') == 'class':
458 entries
= self
._generate
_class
_entries
(channel
)
459 elif channel
.get('type') == 'podcast_channel':
460 entries
= self
._generate
_podcast
_entries
(channel
['id'], collection_slug
)
462 entries
= self
._generate
_playlist
_entries
(channel
['id'], collection_slug
)
464 return self
.playlist_result(
466 playlist_id
=collection_slug
,
467 playlist_title
=channel
.get('title'),
468 playlist_description
=channel
.get('description'))