]>
Commit | Line | Data |
---|---|---|
359df0fc | 1 | import itertools |
bdc196a4 | 2 | import json |
bdc196a4 | 3 | |
359df0fc | 4 | from .common import InfoExtractor |
3d2623a8 | 5 | from ..networking.exceptions import HTTPError |
3f756c8c | 6 | from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start |
359df0fc | 7 | |
cbfe2e5c | 8 | _BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)' |
4cca2eb1 | 9 | |
359df0fc HH |
10 | |
11 | class NebulaBaseIE(InfoExtractor): | |
12 | _NETRC_MACHINE = 'watchnebula' | |
13 | ||
14 | _nebula_api_token = None | |
15 | _nebula_bearer_token = None | |
359df0fc | 16 | |
f3b3fe16 HH |
17 | def _perform_nebula_auth(self, username, password): |
18 | if not username or not password: | |
d50ea3ce | 19 | self.raise_login_required(method='password') |
359df0fc HH |
20 | |
21 | data = json.dumps({'email': username, 'password': password}).encode('utf8') | |
22 | response = self._download_json( | |
23 | 'https://api.watchnebula.com/api/v1/auth/login/', | |
24 | data=data, fatal=False, video_id=None, | |
25 | headers={ | |
26 | 'content-type': 'application/json', | |
27 | # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint | |
28 | 'cookie': '' | |
29 | }, | |
30 | note='Logging in to Nebula with supplied credentials', | |
31 | errnote='Authentication failed or rejected') | |
32 | if not response or not response.get('key'): | |
d50ea3ce | 33 | self.raise_login_required(method='password') |
359df0fc HH |
34 | |
35 | return response['key'] | |
36 | ||
359df0fc HH |
37 | def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): |
38 | assert method in ('GET', 'POST',) | |
39 | assert auth_type in ('api', 'bearer',) | |
bdc196a4 | 40 | |
359df0fc HH |
41 | def inner_call(): |
42 | authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' | |
43 | return self._download_json( | |
44 | url, video_id, note=note, headers={'Authorization': authorization}, | |
45 | data=b'' if method == 'POST' else None) | |
46 | ||
47 | try: | |
48 | return inner_call() | |
49 | except ExtractorError as exc: | |
50 | # if 401 or 403, attempt credential re-auth and retry | |
3d2623a8 | 51 | if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403): |
359df0fc | 52 | self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') |
4c268f9c | 53 | self._perform_login() |
359df0fc HH |
54 | return inner_call() |
55 | else: | |
56 | raise | |
57 | ||
58 | def _fetch_nebula_bearer_token(self): | |
59 | """ | |
60 | Get a Bearer token for the Nebula API. This will be required to fetch video meta data. | |
61 | """ | |
62 | response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', | |
63 | method='POST', | |
64 | note='Authorizing to Nebula') | |
65 | return response['token'] | |
bdc196a4 | 66 | |
d50ea3ce | 67 | def _fetch_video_formats(self, slug): |
3f756c8c | 68 | stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/', |
d50ea3ce HH |
69 | video_id=slug, |
70 | auth_type='bearer', | |
71 | note='Fetching video stream info') | |
72 | manifest_url = stream_info['manifest'] | |
3f756c8c | 73 | return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4') |
359df0fc HH |
74 | |
75 | def _build_video_info(self, episode): | |
d50ea3ce | 76 | fmts, subs = self._fetch_video_formats(episode['slug']) |
359df0fc | 77 | channel_slug = episode['channel_slug'] |
d50ea3ce | 78 | channel_title = episode['channel_title'] |
3f756c8c | 79 | zype_id = episode.get('zype_id') |
359df0fc | 80 | return { |
3f756c8c | 81 | 'id': remove_start(episode['id'], 'video_episode:'), |
359df0fc | 82 | 'display_id': episode['slug'], |
d50ea3ce HH |
83 | 'formats': fmts, |
84 | 'subtitles': subs, | |
85 | 'webpage_url': f'https://nebula.tv/{episode["slug"]}', | |
359df0fc HH |
86 | 'title': episode['title'], |
87 | 'description': episode['description'], | |
88 | 'timestamp': parse_iso8601(episode['published_at']), | |
89 | 'thumbnails': [{ | |
90 | # 'id': tn.get('name'), # this appears to be null | |
91 | 'url': tn['original'], | |
92 | 'height': key, | |
93 | } for key, tn in episode['assets']['thumbnail'].items()], | |
94 | 'duration': episode['duration'], | |
d50ea3ce | 95 | 'channel': channel_title, |
359df0fc | 96 | 'channel_id': channel_slug, |
d50ea3ce HH |
97 | 'channel_url': f'https://nebula.tv/{channel_slug}', |
98 | 'uploader': channel_title, | |
359df0fc | 99 | 'uploader_id': channel_slug, |
d50ea3ce HH |
100 | 'uploader_url': f'https://nebula.tv/{channel_slug}', |
101 | 'series': channel_title, | |
102 | 'creator': channel_title, | |
3f756c8c RH |
103 | 'extractor_key': NebulaIE.ie_key(), |
104 | 'extractor': NebulaIE.IE_NAME, | |
105 | '_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None, | |
359df0fc HH |
106 | } |
107 | ||
52efa4b3 | 108 | def _perform_login(self, username=None, password=None): |
d50ea3ce | 109 | self._nebula_api_token = self._perform_nebula_auth(username, password) |
359df0fc | 110 | self._nebula_bearer_token = self._fetch_nebula_bearer_token() |
359df0fc | 111 | |
359df0fc HH |
112 | |
113 | class NebulaIE(NebulaBaseIE): | |
4cca2eb1 | 114 | _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' |
bdc196a4 GS |
115 | _TESTS = [ |
116 | { | |
d50ea3ce | 117 | 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast', |
f3b3fe16 | 118 | 'md5': '14944cfee8c7beeea106320c47560efc', |
bdc196a4 | 119 | 'info_dict': { |
3f756c8c | 120 | 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf', |
bdc196a4 GS |
121 | 'ext': 'mp4', |
122 | 'title': 'That Time Disney Remade Beauty and the Beast', | |
123 | 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', | |
124 | 'upload_date': '20180731', | |
125 | 'timestamp': 1533009600, | |
126 | 'channel': 'Lindsay Ellis', | |
359df0fc | 127 | 'channel_id': 'lindsayellis', |
bdc196a4 | 128 | 'uploader': 'Lindsay Ellis', |
359df0fc | 129 | 'uploader_id': 'lindsayellis', |
d50ea3ce | 130 | 'uploader_url': 'https://nebula.tv/lindsayellis', |
f3b3fe16 | 131 | 'series': 'Lindsay Ellis', |
f3b3fe16 | 132 | 'display_id': 'that-time-disney-remade-beauty-and-the-beast', |
d50ea3ce | 133 | 'channel_url': 'https://nebula.tv/lindsayellis', |
f3b3fe16 HH |
134 | 'creator': 'Lindsay Ellis', |
135 | 'duration': 2212, | |
f3b3fe16 | 136 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', |
bdc196a4 | 137 | }, |
bdc196a4 GS |
138 | }, |
139 | { | |
d50ea3ce | 140 | 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', |
f3b3fe16 | 141 | 'md5': 'd05739cf6c38c09322422f696b569c23', |
bdc196a4 | 142 | 'info_dict': { |
3f756c8c | 143 | 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34', |
bdc196a4 GS |
144 | 'ext': 'mp4', |
145 | 'title': 'Landing Craft - How The Allies Got Ashore', | |
146 | 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', | |
147 | 'upload_date': '20200327', | |
148 | 'timestamp': 1585348140, | |
3f756c8c RH |
149 | 'channel': 'Real Engineering — The Logistics of D-Day', |
150 | 'channel_id': 'd-day', | |
151 | 'uploader': 'Real Engineering — The Logistics of D-Day', | |
152 | 'uploader_id': 'd-day', | |
153 | 'series': 'Real Engineering — The Logistics of D-Day', | |
f3b3fe16 | 154 | 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', |
3f756c8c | 155 | 'creator': 'Real Engineering — The Logistics of D-Day', |
f3b3fe16 | 156 | 'duration': 841, |
3f756c8c RH |
157 | 'channel_url': 'https://nebula.tv/d-day', |
158 | 'uploader_url': 'https://nebula.tv/d-day', | |
f3b3fe16 | 159 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', |
bdc196a4 | 160 | }, |
bdc196a4 GS |
161 | }, |
162 | { | |
d50ea3ce | 163 | 'url': 'https://nebula.tv/videos/money-episode-1-the-draw', |
f3b3fe16 | 164 | 'md5': 'ebe28a7ad822b9ee172387d860487868', |
bdc196a4 | 165 | 'info_dict': { |
3f756c8c | 166 | 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553', |
bdc196a4 GS |
167 | 'ext': 'mp4', |
168 | 'title': 'Episode 1: The Draw', | |
169 | 'description': r'contains:There’s free money on offer… if the players can all work together.', | |
170 | 'upload_date': '20200323', | |
171 | 'timestamp': 1584980400, | |
172 | 'channel': 'Tom Scott Presents: Money', | |
359df0fc | 173 | 'channel_id': 'tom-scott-presents-money', |
bdc196a4 | 174 | 'uploader': 'Tom Scott Presents: Money', |
359df0fc | 175 | 'uploader_id': 'tom-scott-presents-money', |
d50ea3ce | 176 | 'uploader_url': 'https://nebula.tv/tom-scott-presents-money', |
f3b3fe16 | 177 | 'duration': 825, |
d50ea3ce | 178 | 'channel_url': 'https://nebula.tv/tom-scott-presents-money', |
f3b3fe16 HH |
179 | 'series': 'Tom Scott Presents: Money', |
180 | 'display_id': 'money-episode-1-the-draw', | |
181 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
f3b3fe16 | 182 | 'creator': 'Tom Scott Presents: Money', |
bdc196a4 | 183 | }, |
bdc196a4 GS |
184 | }, |
185 | { | |
186 | 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', | |
187 | 'only_matching': True, | |
188 | }, | |
cbfe2e5c | 189 | { |
190 | 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw', | |
191 | 'only_matching': True, | |
192 | }, | |
bdc196a4 | 193 | ] |
bdc196a4 | 194 | |
359df0fc | 195 | def _fetch_video_metadata(self, slug): |
3f756c8c | 196 | return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/', |
359df0fc HH |
197 | video_id=slug, |
198 | auth_type='bearer', | |
199 | note='Fetching video meta data') | |
bdc196a4 | 200 | |
359df0fc HH |
201 | def _real_extract(self, url): |
202 | slug = self._match_id(url) | |
203 | video = self._fetch_video_metadata(slug) | |
204 | return self._build_video_info(video) | |
bdc196a4 | 205 | |
bdc196a4 | 206 | |
f3b3fe16 HH |
207 | class NebulaSubscriptionsIE(NebulaBaseIE): |
208 | IE_NAME = 'nebula:subscriptions' | |
4cca2eb1 | 209 | _VALID_URL = rf'{_BASE_URL_RE}/myshows' |
f3b3fe16 HH |
210 | _TESTS = [ |
211 | { | |
d50ea3ce | 212 | 'url': 'https://nebula.tv/myshows', |
f3b3fe16 HH |
213 | 'playlist_mincount': 1, |
214 | 'info_dict': { | |
215 | 'id': 'myshows', | |
216 | }, | |
217 | }, | |
218 | ] | |
219 | ||
220 | def _generate_playlist_entries(self): | |
221 | next_url = 'https://content.watchnebula.com/library/video/?page_size=100' | |
222 | page_num = 1 | |
223 | while next_url: | |
224 | channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', | |
225 | note=f'Retrieving subscriptions page {page_num}') | |
226 | for episode in channel['results']: | |
227 | yield self._build_video_info(episode) | |
228 | next_url = channel['next'] | |
229 | page_num += 1 | |
230 | ||
231 | def _real_extract(self, url): | |
232 | return self.playlist_result(self._generate_playlist_entries(), 'myshows') | |
233 | ||
234 | ||
235 | class NebulaChannelIE(NebulaBaseIE): | |
236 | IE_NAME = 'nebula:channel' | |
4cca2eb1 | 237 | _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)' |
359df0fc HH |
238 | _TESTS = [ |
239 | { | |
d50ea3ce | 240 | 'url': 'https://nebula.tv/tom-scott-presents-money', |
359df0fc HH |
241 | 'info_dict': { |
242 | 'id': 'tom-scott-presents-money', | |
243 | 'title': 'Tom Scott Presents: Money', | |
244 | 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', | |
bdc196a4 | 245 | }, |
359df0fc | 246 | 'playlist_count': 5, |
359df0fc | 247 | }, { |
d50ea3ce | 248 | 'url': 'https://nebula.tv/lindsayellis', |
359df0fc HH |
249 | 'info_dict': { |
250 | 'id': 'lindsayellis', | |
251 | 'title': 'Lindsay Ellis', | |
252 | 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', | |
253 | }, | |
d50ea3ce | 254 | 'playlist_mincount': 2, |
359df0fc HH |
255 | }, |
256 | ] | |
bdc196a4 | 257 | |
359df0fc HH |
258 | def _generate_playlist_entries(self, collection_id, channel): |
259 | episodes = channel['episodes']['results'] | |
260 | for page_num in itertools.count(2): | |
261 | for episode in episodes: | |
262 | yield self._build_video_info(episode) | |
263 | next_url = channel['episodes']['next'] | |
264 | if not next_url: | |
265 | break | |
266 | channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', | |
267 | note=f'Retrieving channel page {page_num}') | |
268 | episodes = channel['episodes']['results'] | |
bdc196a4 GS |
269 | |
270 | def _real_extract(self, url): | |
359df0fc HH |
271 | collection_id = self._match_id(url) |
272 | channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' | |
273 | channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') | |
274 | channel_details = channel['details'] | |
bdc196a4 | 275 | |
359df0fc HH |
276 | return self.playlist_result( |
277 | entries=self._generate_playlist_entries(collection_id, channel), | |
278 | playlist_id=collection_id, | |
279 | playlist_title=channel_details['title'], | |
280 | playlist_description=channel_details['description'] | |
281 | ) |