]>
Commit | Line | Data |
---|---|---|
359df0fc | 1 | import itertools |
bdc196a4 GS |
2 | import json |
3 | import time | |
ac668111 | 4 | import urllib.error |
54007a45 | 5 | import urllib.parse |
bdc196a4 | 6 | |
359df0fc | 7 | from .common import InfoExtractor |
ac668111 | 8 | from ..utils import ExtractorError, parse_iso8601, try_get |
359df0fc | 9 | |
4cca2eb1 TA |
10 | _BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)' |
11 | ||
359df0fc HH |
12 | |
13 | class NebulaBaseIE(InfoExtractor): | |
14 | _NETRC_MACHINE = 'watchnebula' | |
15 | ||
16 | _nebula_api_token = None | |
17 | _nebula_bearer_token = None | |
18 | _zype_access_token = None | |
19 | ||
f3b3fe16 HH |
20 | def _perform_nebula_auth(self, username, password): |
21 | if not username or not password: | |
359df0fc HH |
22 | self.raise_login_required() |
23 | ||
24 | data = json.dumps({'email': username, 'password': password}).encode('utf8') | |
25 | response = self._download_json( | |
26 | 'https://api.watchnebula.com/api/v1/auth/login/', | |
27 | data=data, fatal=False, video_id=None, | |
28 | headers={ | |
29 | 'content-type': 'application/json', | |
30 | # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint | |
31 | 'cookie': '' | |
32 | }, | |
33 | note='Logging in to Nebula with supplied credentials', | |
34 | errnote='Authentication failed or rejected') | |
35 | if not response or not response.get('key'): | |
36 | self.raise_login_required() | |
37 | ||
38 | # save nebula token as cookie | |
39 | self._set_cookie( | |
40 | 'nebula.app', 'nebula-auth', | |
41 | urllib.parse.quote( | |
42 | json.dumps({ | |
43 | "apiToken": response["key"], | |
44 | "isLoggingIn": False, | |
45 | "isLoggingOut": False, | |
46 | }, separators=(",", ":"))), | |
47 | expire_time=int(time.time()) + 86400 * 365, | |
48 | ) | |
49 | ||
50 | return response['key'] | |
51 | ||
f3b3fe16 | 52 | def _retrieve_nebula_api_token(self, username=None, password=None): |
359df0fc HH |
53 | """ |
54 | Check cookie jar for valid token. Try to authenticate using credentials if no valid token | |
55 | can be found in the cookie jar. | |
56 | """ | |
57 | nebula_cookies = self._get_cookies('https://nebula.app') | |
58 | nebula_cookie = nebula_cookies.get('nebula-auth') | |
59 | if nebula_cookie: | |
60 | self.to_screen('Authenticating to Nebula with token from cookie jar') | |
61 | nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) | |
62 | nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') | |
63 | if nebula_api_token: | |
64 | return nebula_api_token | |
65 | ||
f3b3fe16 | 66 | return self._perform_nebula_auth(username, password) |
bdc196a4 | 67 | |
359df0fc HH |
68 | def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): |
69 | assert method in ('GET', 'POST',) | |
70 | assert auth_type in ('api', 'bearer',) | |
bdc196a4 | 71 | |
359df0fc HH |
72 | def inner_call(): |
73 | authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' | |
74 | return self._download_json( | |
75 | url, video_id, note=note, headers={'Authorization': authorization}, | |
76 | data=b'' if method == 'POST' else None) | |
77 | ||
78 | try: | |
79 | return inner_call() | |
80 | except ExtractorError as exc: | |
81 | # if 401 or 403, attempt credential re-auth and retry | |
82 | if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): | |
83 | self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') | |
4c268f9c | 84 | self._perform_login() |
359df0fc HH |
85 | return inner_call() |
86 | else: | |
87 | raise | |
88 | ||
89 | def _fetch_nebula_bearer_token(self): | |
90 | """ | |
91 | Get a Bearer token for the Nebula API. This will be required to fetch video meta data. | |
92 | """ | |
93 | response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', | |
94 | method='POST', | |
95 | note='Authorizing to Nebula') | |
96 | return response['token'] | |
bdc196a4 | 97 | |
359df0fc HH |
98 | def _fetch_zype_access_token(self): |
99 | """ | |
100 | Get a Zype access token, which is required to access video streams -- in our case: to | |
101 | generate video URLs. | |
102 | """ | |
103 | user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') | |
104 | ||
105 | access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) | |
106 | if not access_token: | |
107 | if try_get(user_object, lambda x: x['is_subscribed'], bool): | |
108 | # TODO: Reimplement the same Zype token polling the Nebula frontend implements | |
109 | # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 | |
110 | raise ExtractorError( | |
111 | 'Unable to extract Zype access token from Nebula API authentication endpoint. ' | |
112 | 'Open an arbitrary video in a browser with this account to generate a token', | |
113 | expected=True) | |
114 | raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') | |
115 | return access_token | |
116 | ||
117 | def _build_video_info(self, episode): | |
118 | zype_id = episode['zype_id'] | |
119 | zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' | |
120 | channel_slug = episode['channel_slug'] | |
121 | return { | |
122 | 'id': episode['zype_id'], | |
123 | 'display_id': episode['slug'], | |
124 | '_type': 'url_transparent', | |
125 | 'ie_key': 'Zype', | |
126 | 'url': zype_video_url, | |
127 | 'title': episode['title'], | |
128 | 'description': episode['description'], | |
129 | 'timestamp': parse_iso8601(episode['published_at']), | |
130 | 'thumbnails': [{ | |
131 | # 'id': tn.get('name'), # this appears to be null | |
132 | 'url': tn['original'], | |
133 | 'height': key, | |
134 | } for key, tn in episode['assets']['thumbnail'].items()], | |
135 | 'duration': episode['duration'], | |
136 | 'channel': episode['channel_title'], | |
137 | 'channel_id': channel_slug, | |
138 | 'channel_url': f'https://nebula.app/{channel_slug}', | |
139 | 'uploader': episode['channel_title'], | |
140 | 'uploader_id': channel_slug, | |
141 | 'uploader_url': f'https://nebula.app/{channel_slug}', | |
142 | 'series': episode['channel_title'], | |
143 | 'creator': episode['channel_title'], | |
144 | } | |
145 | ||
52efa4b3 | 146 | def _perform_login(self, username=None, password=None): |
f3b3fe16 | 147 | self._nebula_api_token = self._retrieve_nebula_api_token(username, password) |
359df0fc HH |
148 | self._nebula_bearer_token = self._fetch_nebula_bearer_token() |
149 | self._zype_access_token = self._fetch_zype_access_token() | |
150 | ||
359df0fc HH |
151 | |
152 | class NebulaIE(NebulaBaseIE): | |
4cca2eb1 | 153 | _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)' |
bdc196a4 GS |
154 | _TESTS = [ |
155 | { | |
156 | 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', | |
f3b3fe16 | 157 | 'md5': '14944cfee8c7beeea106320c47560efc', |
bdc196a4 GS |
158 | 'info_dict': { |
159 | 'id': '5c271b40b13fd613090034fd', | |
160 | 'ext': 'mp4', | |
161 | 'title': 'That Time Disney Remade Beauty and the Beast', | |
162 | 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', | |
163 | 'upload_date': '20180731', | |
164 | 'timestamp': 1533009600, | |
165 | 'channel': 'Lindsay Ellis', | |
359df0fc | 166 | 'channel_id': 'lindsayellis', |
bdc196a4 | 167 | 'uploader': 'Lindsay Ellis', |
359df0fc | 168 | 'uploader_id': 'lindsayellis', |
f3b3fe16 HH |
169 | 'timestamp': 1533009600, |
170 | 'uploader_url': 'https://nebula.app/lindsayellis', | |
171 | 'series': 'Lindsay Ellis', | |
172 | 'average_rating': int, | |
173 | 'display_id': 'that-time-disney-remade-beauty-and-the-beast', | |
174 | 'channel_url': 'https://nebula.app/lindsayellis', | |
175 | 'creator': 'Lindsay Ellis', | |
176 | 'duration': 2212, | |
177 | 'view_count': int, | |
178 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
bdc196a4 | 179 | }, |
bdc196a4 GS |
180 | }, |
181 | { | |
182 | 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', | |
f3b3fe16 | 183 | 'md5': 'd05739cf6c38c09322422f696b569c23', |
bdc196a4 GS |
184 | 'info_dict': { |
185 | 'id': '5e7e78171aaf320001fbd6be', | |
186 | 'ext': 'mp4', | |
187 | 'title': 'Landing Craft - How The Allies Got Ashore', | |
188 | 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', | |
189 | 'upload_date': '20200327', | |
190 | 'timestamp': 1585348140, | |
359df0fc HH |
191 | 'channel': 'Real Engineering', |
192 | 'channel_id': 'realengineering', | |
193 | 'uploader': 'Real Engineering', | |
194 | 'uploader_id': 'realengineering', | |
f3b3fe16 HH |
195 | 'view_count': int, |
196 | 'series': 'Real Engineering', | |
197 | 'average_rating': int, | |
198 | 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', | |
199 | 'creator': 'Real Engineering', | |
200 | 'duration': 841, | |
201 | 'channel_url': 'https://nebula.app/realengineering', | |
202 | 'uploader_url': 'https://nebula.app/realengineering', | |
203 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
bdc196a4 | 204 | }, |
bdc196a4 GS |
205 | }, |
206 | { | |
207 | 'url': 'https://nebula.app/videos/money-episode-1-the-draw', | |
f3b3fe16 | 208 | 'md5': 'ebe28a7ad822b9ee172387d860487868', |
bdc196a4 GS |
209 | 'info_dict': { |
210 | 'id': '5e779ebdd157bc0001d1c75a', | |
211 | 'ext': 'mp4', | |
212 | 'title': 'Episode 1: The Draw', | |
213 | 'description': r'contains:There’s free money on offer… if the players can all work together.', | |
214 | 'upload_date': '20200323', | |
215 | 'timestamp': 1584980400, | |
216 | 'channel': 'Tom Scott Presents: Money', | |
359df0fc | 217 | 'channel_id': 'tom-scott-presents-money', |
bdc196a4 | 218 | 'uploader': 'Tom Scott Presents: Money', |
359df0fc | 219 | 'uploader_id': 'tom-scott-presents-money', |
f3b3fe16 HH |
220 | 'uploader_url': 'https://nebula.app/tom-scott-presents-money', |
221 | 'duration': 825, | |
222 | 'channel_url': 'https://nebula.app/tom-scott-presents-money', | |
223 | 'view_count': int, | |
224 | 'series': 'Tom Scott Presents: Money', | |
225 | 'display_id': 'money-episode-1-the-draw', | |
226 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
227 | 'average_rating': int, | |
228 | 'creator': 'Tom Scott Presents: Money', | |
bdc196a4 | 229 | }, |
bdc196a4 GS |
230 | }, |
231 | { | |
232 | 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', | |
233 | 'only_matching': True, | |
234 | }, | |
235 | ] | |
bdc196a4 | 236 | |
359df0fc HH |
237 | def _fetch_video_metadata(self, slug): |
238 | return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', | |
239 | video_id=slug, | |
240 | auth_type='bearer', | |
241 | note='Fetching video meta data') | |
bdc196a4 | 242 | |
359df0fc HH |
243 | def _real_extract(self, url): |
244 | slug = self._match_id(url) | |
245 | video = self._fetch_video_metadata(slug) | |
246 | return self._build_video_info(video) | |
bdc196a4 | 247 | |
bdc196a4 | 248 | |
f3b3fe16 HH |
249 | class NebulaSubscriptionsIE(NebulaBaseIE): |
250 | IE_NAME = 'nebula:subscriptions' | |
4cca2eb1 | 251 | _VALID_URL = rf'{_BASE_URL_RE}/myshows' |
f3b3fe16 HH |
252 | _TESTS = [ |
253 | { | |
254 | 'url': 'https://nebula.app/myshows', | |
255 | 'playlist_mincount': 1, | |
256 | 'info_dict': { | |
257 | 'id': 'myshows', | |
258 | }, | |
259 | }, | |
260 | ] | |
261 | ||
262 | def _generate_playlist_entries(self): | |
263 | next_url = 'https://content.watchnebula.com/library/video/?page_size=100' | |
264 | page_num = 1 | |
265 | while next_url: | |
266 | channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', | |
267 | note=f'Retrieving subscriptions page {page_num}') | |
268 | for episode in channel['results']: | |
269 | yield self._build_video_info(episode) | |
270 | next_url = channel['next'] | |
271 | page_num += 1 | |
272 | ||
273 | def _real_extract(self, url): | |
274 | return self.playlist_result(self._generate_playlist_entries(), 'myshows') | |
275 | ||
276 | ||
277 | class NebulaChannelIE(NebulaBaseIE): | |
278 | IE_NAME = 'nebula:channel' | |
4cca2eb1 | 279 | _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)' |
359df0fc HH |
280 | _TESTS = [ |
281 | { | |
282 | 'url': 'https://nebula.app/tom-scott-presents-money', | |
283 | 'info_dict': { | |
284 | 'id': 'tom-scott-presents-money', | |
285 | 'title': 'Tom Scott Presents: Money', | |
286 | 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', | |
bdc196a4 | 287 | }, |
359df0fc | 288 | 'playlist_count': 5, |
359df0fc HH |
289 | }, { |
290 | 'url': 'https://nebula.app/lindsayellis', | |
291 | 'info_dict': { | |
292 | 'id': 'lindsayellis', | |
293 | 'title': 'Lindsay Ellis', | |
294 | 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', | |
295 | }, | |
296 | 'playlist_mincount': 100, | |
359df0fc HH |
297 | }, |
298 | ] | |
bdc196a4 | 299 | |
359df0fc HH |
300 | def _generate_playlist_entries(self, collection_id, channel): |
301 | episodes = channel['episodes']['results'] | |
302 | for page_num in itertools.count(2): | |
303 | for episode in episodes: | |
304 | yield self._build_video_info(episode) | |
305 | next_url = channel['episodes']['next'] | |
306 | if not next_url: | |
307 | break | |
308 | channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', | |
309 | note=f'Retrieving channel page {page_num}') | |
310 | episodes = channel['episodes']['results'] | |
bdc196a4 GS |
311 | |
312 | def _real_extract(self, url): | |
359df0fc HH |
313 | collection_id = self._match_id(url) |
314 | channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' | |
315 | channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') | |
316 | channel_details = channel['details'] | |
bdc196a4 | 317 | |
359df0fc HH |
318 | return self.playlist_result( |
319 | entries=self._generate_playlist_entries(collection_id, channel), | |
320 | playlist_id=collection_id, | |
321 | playlist_title=channel_details['title'], | |
322 | playlist_description=channel_details['description'] | |
323 | ) |