]>
Commit | Line | Data |
---|---|---|
359df0fc | 1 | import itertools |
bdc196a4 GS |
2 | import json |
3 | import time | |
359df0fc | 4 | import urllib |
bdc196a4 | 5 | |
bdc196a4 GS |
6 | from ..utils import ( |
7 | ExtractorError, | |
8 | parse_iso8601, | |
9 | try_get, | |
bdc196a4 | 10 | ) |
359df0fc HH |
11 | from .common import InfoExtractor |
12 | ||
13 | ||
14 | class NebulaBaseIE(InfoExtractor): | |
15 | _NETRC_MACHINE = 'watchnebula' | |
16 | ||
17 | _nebula_api_token = None | |
18 | _nebula_bearer_token = None | |
19 | _zype_access_token = None | |
20 | ||
f3b3fe16 HH |
21 | def _perform_nebula_auth(self, username, password): |
22 | if not username or not password: | |
359df0fc HH |
23 | self.raise_login_required() |
24 | ||
25 | data = json.dumps({'email': username, 'password': password}).encode('utf8') | |
26 | response = self._download_json( | |
27 | 'https://api.watchnebula.com/api/v1/auth/login/', | |
28 | data=data, fatal=False, video_id=None, | |
29 | headers={ | |
30 | 'content-type': 'application/json', | |
31 | # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint | |
32 | 'cookie': '' | |
33 | }, | |
34 | note='Logging in to Nebula with supplied credentials', | |
35 | errnote='Authentication failed or rejected') | |
36 | if not response or not response.get('key'): | |
37 | self.raise_login_required() | |
38 | ||
39 | # save nebula token as cookie | |
40 | self._set_cookie( | |
41 | 'nebula.app', 'nebula-auth', | |
42 | urllib.parse.quote( | |
43 | json.dumps({ | |
44 | "apiToken": response["key"], | |
45 | "isLoggingIn": False, | |
46 | "isLoggingOut": False, | |
47 | }, separators=(",", ":"))), | |
48 | expire_time=int(time.time()) + 86400 * 365, | |
49 | ) | |
50 | ||
51 | return response['key'] | |
52 | ||
f3b3fe16 | 53 | def _retrieve_nebula_api_token(self, username=None, password=None): |
359df0fc HH |
54 | """ |
55 | Check cookie jar for valid token. Try to authenticate using credentials if no valid token | |
56 | can be found in the cookie jar. | |
57 | """ | |
58 | nebula_cookies = self._get_cookies('https://nebula.app') | |
59 | nebula_cookie = nebula_cookies.get('nebula-auth') | |
60 | if nebula_cookie: | |
61 | self.to_screen('Authenticating to Nebula with token from cookie jar') | |
62 | nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) | |
63 | nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') | |
64 | if nebula_api_token: | |
65 | return nebula_api_token | |
66 | ||
f3b3fe16 | 67 | return self._perform_nebula_auth(username, password) |
bdc196a4 | 68 | |
359df0fc HH |
69 | def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): |
70 | assert method in ('GET', 'POST',) | |
71 | assert auth_type in ('api', 'bearer',) | |
bdc196a4 | 72 | |
359df0fc HH |
73 | def inner_call(): |
74 | authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' | |
75 | return self._download_json( | |
76 | url, video_id, note=note, headers={'Authorization': authorization}, | |
77 | data=b'' if method == 'POST' else None) | |
78 | ||
79 | try: | |
80 | return inner_call() | |
81 | except ExtractorError as exc: | |
82 | # if 401 or 403, attempt credential re-auth and retry | |
83 | if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): | |
84 | self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') | |
4c268f9c | 85 | self._perform_login() |
359df0fc HH |
86 | return inner_call() |
87 | else: | |
88 | raise | |
89 | ||
90 | def _fetch_nebula_bearer_token(self): | |
91 | """ | |
92 | Get a Bearer token for the Nebula API. This will be required to fetch video meta data. | |
93 | """ | |
94 | response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', | |
95 | method='POST', | |
96 | note='Authorizing to Nebula') | |
97 | return response['token'] | |
bdc196a4 | 98 | |
359df0fc HH |
99 | def _fetch_zype_access_token(self): |
100 | """ | |
101 | Get a Zype access token, which is required to access video streams -- in our case: to | |
102 | generate video URLs. | |
103 | """ | |
104 | user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') | |
105 | ||
106 | access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) | |
107 | if not access_token: | |
108 | if try_get(user_object, lambda x: x['is_subscribed'], bool): | |
109 | # TODO: Reimplement the same Zype token polling the Nebula frontend implements | |
110 | # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 | |
111 | raise ExtractorError( | |
112 | 'Unable to extract Zype access token from Nebula API authentication endpoint. ' | |
113 | 'Open an arbitrary video in a browser with this account to generate a token', | |
114 | expected=True) | |
115 | raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') | |
116 | return access_token | |
117 | ||
118 | def _build_video_info(self, episode): | |
119 | zype_id = episode['zype_id'] | |
120 | zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' | |
121 | channel_slug = episode['channel_slug'] | |
122 | return { | |
123 | 'id': episode['zype_id'], | |
124 | 'display_id': episode['slug'], | |
125 | '_type': 'url_transparent', | |
126 | 'ie_key': 'Zype', | |
127 | 'url': zype_video_url, | |
128 | 'title': episode['title'], | |
129 | 'description': episode['description'], | |
130 | 'timestamp': parse_iso8601(episode['published_at']), | |
131 | 'thumbnails': [{ | |
132 | # 'id': tn.get('name'), # this appears to be null | |
133 | 'url': tn['original'], | |
134 | 'height': key, | |
135 | } for key, tn in episode['assets']['thumbnail'].items()], | |
136 | 'duration': episode['duration'], | |
137 | 'channel': episode['channel_title'], | |
138 | 'channel_id': channel_slug, | |
139 | 'channel_url': f'https://nebula.app/{channel_slug}', | |
140 | 'uploader': episode['channel_title'], | |
141 | 'uploader_id': channel_slug, | |
142 | 'uploader_url': f'https://nebula.app/{channel_slug}', | |
143 | 'series': episode['channel_title'], | |
144 | 'creator': episode['channel_title'], | |
145 | } | |
146 | ||
52efa4b3 | 147 | def _perform_login(self, username=None, password=None): |
f3b3fe16 | 148 | self._nebula_api_token = self._retrieve_nebula_api_token(username, password) |
359df0fc HH |
149 | self._nebula_bearer_token = self._fetch_nebula_bearer_token() |
150 | self._zype_access_token = self._fetch_zype_access_token() | |
151 | ||
359df0fc HH |
152 | |
153 | class NebulaIE(NebulaBaseIE): | |
bdc196a4 GS |
154 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)' |
155 | _TESTS = [ | |
156 | { | |
157 | 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', | |
f3b3fe16 | 158 | 'md5': '14944cfee8c7beeea106320c47560efc', |
bdc196a4 GS |
159 | 'info_dict': { |
160 | 'id': '5c271b40b13fd613090034fd', | |
161 | 'ext': 'mp4', | |
162 | 'title': 'That Time Disney Remade Beauty and the Beast', | |
163 | 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', | |
164 | 'upload_date': '20180731', | |
165 | 'timestamp': 1533009600, | |
166 | 'channel': 'Lindsay Ellis', | |
359df0fc | 167 | 'channel_id': 'lindsayellis', |
bdc196a4 | 168 | 'uploader': 'Lindsay Ellis', |
359df0fc | 169 | 'uploader_id': 'lindsayellis', |
f3b3fe16 HH |
170 | 'timestamp': 1533009600, |
171 | 'uploader_url': 'https://nebula.app/lindsayellis', | |
172 | 'series': 'Lindsay Ellis', | |
173 | 'average_rating': int, | |
174 | 'display_id': 'that-time-disney-remade-beauty-and-the-beast', | |
175 | 'channel_url': 'https://nebula.app/lindsayellis', | |
176 | 'creator': 'Lindsay Ellis', | |
177 | 'duration': 2212, | |
178 | 'view_count': int, | |
179 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
bdc196a4 | 180 | }, |
bdc196a4 GS |
181 | }, |
182 | { | |
183 | 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', | |
f3b3fe16 | 184 | 'md5': 'd05739cf6c38c09322422f696b569c23', |
bdc196a4 GS |
185 | 'info_dict': { |
186 | 'id': '5e7e78171aaf320001fbd6be', | |
187 | 'ext': 'mp4', | |
188 | 'title': 'Landing Craft - How The Allies Got Ashore', | |
189 | 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', | |
190 | 'upload_date': '20200327', | |
191 | 'timestamp': 1585348140, | |
359df0fc HH |
192 | 'channel': 'Real Engineering', |
193 | 'channel_id': 'realengineering', | |
194 | 'uploader': 'Real Engineering', | |
195 | 'uploader_id': 'realengineering', | |
f3b3fe16 HH |
196 | 'view_count': int, |
197 | 'series': 'Real Engineering', | |
198 | 'average_rating': int, | |
199 | 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', | |
200 | 'creator': 'Real Engineering', | |
201 | 'duration': 841, | |
202 | 'channel_url': 'https://nebula.app/realengineering', | |
203 | 'uploader_url': 'https://nebula.app/realengineering', | |
204 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
bdc196a4 | 205 | }, |
bdc196a4 GS |
206 | }, |
207 | { | |
208 | 'url': 'https://nebula.app/videos/money-episode-1-the-draw', | |
f3b3fe16 | 209 | 'md5': 'ebe28a7ad822b9ee172387d860487868', |
bdc196a4 GS |
210 | 'info_dict': { |
211 | 'id': '5e779ebdd157bc0001d1c75a', | |
212 | 'ext': 'mp4', | |
213 | 'title': 'Episode 1: The Draw', | |
214 | 'description': r'contains:There’s free money on offer… if the players can all work together.', | |
215 | 'upload_date': '20200323', | |
216 | 'timestamp': 1584980400, | |
217 | 'channel': 'Tom Scott Presents: Money', | |
359df0fc | 218 | 'channel_id': 'tom-scott-presents-money', |
bdc196a4 | 219 | 'uploader': 'Tom Scott Presents: Money', |
359df0fc | 220 | 'uploader_id': 'tom-scott-presents-money', |
f3b3fe16 HH |
221 | 'uploader_url': 'https://nebula.app/tom-scott-presents-money', |
222 | 'duration': 825, | |
223 | 'channel_url': 'https://nebula.app/tom-scott-presents-money', | |
224 | 'view_count': int, | |
225 | 'series': 'Tom Scott Presents: Money', | |
226 | 'display_id': 'money-episode-1-the-draw', | |
227 | 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*', | |
228 | 'average_rating': int, | |
229 | 'creator': 'Tom Scott Presents: Money', | |
bdc196a4 | 230 | }, |
bdc196a4 GS |
231 | }, |
232 | { | |
233 | 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', | |
234 | 'only_matching': True, | |
235 | }, | |
236 | ] | |
bdc196a4 | 237 | |
359df0fc HH |
238 | def _fetch_video_metadata(self, slug): |
239 | return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', | |
240 | video_id=slug, | |
241 | auth_type='bearer', | |
242 | note='Fetching video meta data') | |
bdc196a4 | 243 | |
359df0fc HH |
244 | def _real_extract(self, url): |
245 | slug = self._match_id(url) | |
246 | video = self._fetch_video_metadata(slug) | |
247 | return self._build_video_info(video) | |
bdc196a4 | 248 | |
bdc196a4 | 249 | |
f3b3fe16 HH |
250 | class NebulaSubscriptionsIE(NebulaBaseIE): |
251 | IE_NAME = 'nebula:subscriptions' | |
252 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows' | |
253 | _TESTS = [ | |
254 | { | |
255 | 'url': 'https://nebula.app/myshows', | |
256 | 'playlist_mincount': 1, | |
257 | 'info_dict': { | |
258 | 'id': 'myshows', | |
259 | }, | |
260 | }, | |
261 | ] | |
262 | ||
263 | def _generate_playlist_entries(self): | |
264 | next_url = 'https://content.watchnebula.com/library/video/?page_size=100' | |
265 | page_num = 1 | |
266 | while next_url: | |
267 | channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer', | |
268 | note=f'Retrieving subscriptions page {page_num}') | |
269 | for episode in channel['results']: | |
270 | yield self._build_video_info(episode) | |
271 | next_url = channel['next'] | |
272 | page_num += 1 | |
273 | ||
274 | def _real_extract(self, url): | |
275 | return self.playlist_result(self._generate_playlist_entries(), 'myshows') | |
276 | ||
277 | ||
278 | class NebulaChannelIE(NebulaBaseIE): | |
279 | IE_NAME = 'nebula:channel' | |
280 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)' | |
359df0fc HH |
281 | _TESTS = [ |
282 | { | |
283 | 'url': 'https://nebula.app/tom-scott-presents-money', | |
284 | 'info_dict': { | |
285 | 'id': 'tom-scott-presents-money', | |
286 | 'title': 'Tom Scott Presents: Money', | |
287 | 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', | |
bdc196a4 | 288 | }, |
359df0fc | 289 | 'playlist_count': 5, |
359df0fc HH |
290 | }, { |
291 | 'url': 'https://nebula.app/lindsayellis', | |
292 | 'info_dict': { | |
293 | 'id': 'lindsayellis', | |
294 | 'title': 'Lindsay Ellis', | |
295 | 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', | |
296 | }, | |
297 | 'playlist_mincount': 100, | |
359df0fc HH |
298 | }, |
299 | ] | |
bdc196a4 | 300 | |
359df0fc HH |
301 | def _generate_playlist_entries(self, collection_id, channel): |
302 | episodes = channel['episodes']['results'] | |
303 | for page_num in itertools.count(2): | |
304 | for episode in episodes: | |
305 | yield self._build_video_info(episode) | |
306 | next_url = channel['episodes']['next'] | |
307 | if not next_url: | |
308 | break | |
309 | channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', | |
310 | note=f'Retrieving channel page {page_num}') | |
311 | episodes = channel['episodes']['results'] | |
bdc196a4 GS |
312 | |
313 | def _real_extract(self, url): | |
359df0fc HH |
314 | collection_id = self._match_id(url) |
315 | channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' | |
316 | channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') | |
317 | channel_details = channel['details'] | |
bdc196a4 | 318 | |
359df0fc HH |
319 | return self.playlist_result( |
320 | entries=self._generate_playlist_entries(collection_id, channel), | |
321 | playlist_id=collection_id, | |
322 | playlist_title=channel_details['title'], | |
323 | playlist_description=channel_details['description'] | |
324 | ) |