]>
Commit | Line | Data |
---|---|---|
bdc196a4 GS |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
359df0fc | 4 | import itertools |
bdc196a4 GS |
5 | import json |
6 | import time | |
359df0fc | 7 | import urllib |
bdc196a4 | 8 | |
bdc196a4 GS |
9 | from ..utils import ( |
10 | ExtractorError, | |
11 | parse_iso8601, | |
12 | try_get, | |
bdc196a4 | 13 | ) |
359df0fc HH |
14 | from .common import InfoExtractor |
15 | ||
16 | ||
17 | class NebulaBaseIE(InfoExtractor): | |
18 | _NETRC_MACHINE = 'watchnebula' | |
19 | ||
20 | _nebula_api_token = None | |
21 | _nebula_bearer_token = None | |
22 | _zype_access_token = None | |
23 | ||
24 | def _perform_nebula_auth(self): | |
25 | username, password = self._get_login_info() | |
26 | if not (username and password): | |
27 | self.raise_login_required() | |
28 | ||
29 | data = json.dumps({'email': username, 'password': password}).encode('utf8') | |
30 | response = self._download_json( | |
31 | 'https://api.watchnebula.com/api/v1/auth/login/', | |
32 | data=data, fatal=False, video_id=None, | |
33 | headers={ | |
34 | 'content-type': 'application/json', | |
35 | # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint | |
36 | 'cookie': '' | |
37 | }, | |
38 | note='Logging in to Nebula with supplied credentials', | |
39 | errnote='Authentication failed or rejected') | |
40 | if not response or not response.get('key'): | |
41 | self.raise_login_required() | |
42 | ||
43 | # save nebula token as cookie | |
44 | self._set_cookie( | |
45 | 'nebula.app', 'nebula-auth', | |
46 | urllib.parse.quote( | |
47 | json.dumps({ | |
48 | "apiToken": response["key"], | |
49 | "isLoggingIn": False, | |
50 | "isLoggingOut": False, | |
51 | }, separators=(",", ":"))), | |
52 | expire_time=int(time.time()) + 86400 * 365, | |
53 | ) | |
54 | ||
55 | return response['key'] | |
56 | ||
57 | def _retrieve_nebula_api_token(self): | |
58 | """ | |
59 | Check cookie jar for valid token. Try to authenticate using credentials if no valid token | |
60 | can be found in the cookie jar. | |
61 | """ | |
62 | nebula_cookies = self._get_cookies('https://nebula.app') | |
63 | nebula_cookie = nebula_cookies.get('nebula-auth') | |
64 | if nebula_cookie: | |
65 | self.to_screen('Authenticating to Nebula with token from cookie jar') | |
66 | nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value) | |
67 | nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken') | |
68 | if nebula_api_token: | |
69 | return nebula_api_token | |
70 | ||
71 | return self._perform_nebula_auth() | |
bdc196a4 | 72 | |
359df0fc HH |
73 | def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''): |
74 | assert method in ('GET', 'POST',) | |
75 | assert auth_type in ('api', 'bearer',) | |
bdc196a4 | 76 | |
359df0fc HH |
77 | def inner_call(): |
78 | authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}' | |
79 | return self._download_json( | |
80 | url, video_id, note=note, headers={'Authorization': authorization}, | |
81 | data=b'' if method == 'POST' else None) | |
82 | ||
83 | try: | |
84 | return inner_call() | |
85 | except ExtractorError as exc: | |
86 | # if 401 or 403, attempt credential re-auth and retry | |
87 | if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403): | |
88 | self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}') | |
89 | self._login() | |
90 | return inner_call() | |
91 | else: | |
92 | raise | |
93 | ||
94 | def _fetch_nebula_bearer_token(self): | |
95 | """ | |
96 | Get a Bearer token for the Nebula API. This will be required to fetch video meta data. | |
97 | """ | |
98 | response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/', | |
99 | method='POST', | |
100 | note='Authorizing to Nebula') | |
101 | return response['token'] | |
bdc196a4 | 102 | |
359df0fc HH |
103 | def _fetch_zype_access_token(self): |
104 | """ | |
105 | Get a Zype access token, which is required to access video streams -- in our case: to | |
106 | generate video URLs. | |
107 | """ | |
108 | user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token') | |
109 | ||
110 | access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str) | |
111 | if not access_token: | |
112 | if try_get(user_object, lambda x: x['is_subscribed'], bool): | |
113 | # TODO: Reimplement the same Zype token polling the Nebula frontend implements | |
114 | # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532 | |
115 | raise ExtractorError( | |
116 | 'Unable to extract Zype access token from Nebula API authentication endpoint. ' | |
117 | 'Open an arbitrary video in a browser with this account to generate a token', | |
118 | expected=True) | |
119 | raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint') | |
120 | return access_token | |
121 | ||
122 | def _build_video_info(self, episode): | |
123 | zype_id = episode['zype_id'] | |
124 | zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}' | |
125 | channel_slug = episode['channel_slug'] | |
126 | return { | |
127 | 'id': episode['zype_id'], | |
128 | 'display_id': episode['slug'], | |
129 | '_type': 'url_transparent', | |
130 | 'ie_key': 'Zype', | |
131 | 'url': zype_video_url, | |
132 | 'title': episode['title'], | |
133 | 'description': episode['description'], | |
134 | 'timestamp': parse_iso8601(episode['published_at']), | |
135 | 'thumbnails': [{ | |
136 | # 'id': tn.get('name'), # this appears to be null | |
137 | 'url': tn['original'], | |
138 | 'height': key, | |
139 | } for key, tn in episode['assets']['thumbnail'].items()], | |
140 | 'duration': episode['duration'], | |
141 | 'channel': episode['channel_title'], | |
142 | 'channel_id': channel_slug, | |
143 | 'channel_url': f'https://nebula.app/{channel_slug}', | |
144 | 'uploader': episode['channel_title'], | |
145 | 'uploader_id': channel_slug, | |
146 | 'uploader_url': f'https://nebula.app/{channel_slug}', | |
147 | 'series': episode['channel_title'], | |
148 | 'creator': episode['channel_title'], | |
149 | } | |
150 | ||
52efa4b3 | 151 | def _perform_login(self, username=None, password=None): |
152 | # FIXME: username should be passed from here to inner functions | |
359df0fc HH |
153 | self._nebula_api_token = self._retrieve_nebula_api_token() |
154 | self._nebula_bearer_token = self._fetch_nebula_bearer_token() | |
155 | self._zype_access_token = self._fetch_zype_access_token() | |
156 | ||
359df0fc HH |
157 | |
158 | class NebulaIE(NebulaBaseIE): | |
bdc196a4 GS |
159 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)' |
160 | _TESTS = [ | |
161 | { | |
162 | 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast', | |
163 | 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e', | |
164 | 'info_dict': { | |
165 | 'id': '5c271b40b13fd613090034fd', | |
166 | 'ext': 'mp4', | |
167 | 'title': 'That Time Disney Remade Beauty and the Beast', | |
168 | 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.', | |
169 | 'upload_date': '20180731', | |
170 | 'timestamp': 1533009600, | |
171 | 'channel': 'Lindsay Ellis', | |
359df0fc | 172 | 'channel_id': 'lindsayellis', |
bdc196a4 | 173 | 'uploader': 'Lindsay Ellis', |
359df0fc | 174 | 'uploader_id': 'lindsayellis', |
bdc196a4 GS |
175 | }, |
176 | 'params': { | |
177 | 'usenetrc': True, | |
178 | }, | |
bdc196a4 GS |
179 | }, |
180 | { | |
181 | 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore', | |
182 | 'md5': '6d4edd14ce65720fa63aba5c583fb328', | |
183 | 'info_dict': { | |
184 | 'id': '5e7e78171aaf320001fbd6be', | |
185 | 'ext': 'mp4', | |
186 | 'title': 'Landing Craft - How The Allies Got Ashore', | |
187 | 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.', | |
188 | 'upload_date': '20200327', | |
189 | 'timestamp': 1585348140, | |
359df0fc HH |
190 | 'channel': 'Real Engineering', |
191 | 'channel_id': 'realengineering', | |
192 | 'uploader': 'Real Engineering', | |
193 | 'uploader_id': 'realengineering', | |
bdc196a4 GS |
194 | }, |
195 | 'params': { | |
196 | 'usenetrc': True, | |
197 | }, | |
bdc196a4 GS |
198 | }, |
199 | { | |
200 | 'url': 'https://nebula.app/videos/money-episode-1-the-draw', | |
201 | 'md5': '8c7d272910eea320f6f8e6d3084eecf5', | |
202 | 'info_dict': { | |
203 | 'id': '5e779ebdd157bc0001d1c75a', | |
204 | 'ext': 'mp4', | |
205 | 'title': 'Episode 1: The Draw', | |
206 | 'description': r'contains:There’s free money on offer… if the players can all work together.', | |
207 | 'upload_date': '20200323', | |
208 | 'timestamp': 1584980400, | |
209 | 'channel': 'Tom Scott Presents: Money', | |
359df0fc | 210 | 'channel_id': 'tom-scott-presents-money', |
bdc196a4 | 211 | 'uploader': 'Tom Scott Presents: Money', |
359df0fc | 212 | 'uploader_id': 'tom-scott-presents-money', |
bdc196a4 GS |
213 | }, |
214 | 'params': { | |
215 | 'usenetrc': True, | |
216 | }, | |
bdc196a4 GS |
217 | }, |
218 | { | |
219 | 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw', | |
220 | 'only_matching': True, | |
221 | }, | |
222 | ] | |
bdc196a4 | 223 | |
359df0fc HH |
224 | def _fetch_video_metadata(self, slug): |
225 | return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/', | |
226 | video_id=slug, | |
227 | auth_type='bearer', | |
228 | note='Fetching video meta data') | |
bdc196a4 | 229 | |
359df0fc HH |
230 | def _real_extract(self, url): |
231 | slug = self._match_id(url) | |
232 | video = self._fetch_video_metadata(slug) | |
233 | return self._build_video_info(video) | |
bdc196a4 | 234 | |
bdc196a4 | 235 | |
359df0fc HH |
236 | class NebulaCollectionIE(NebulaBaseIE): |
237 | IE_NAME = 'nebula:collection' | |
238 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)' | |
239 | _TESTS = [ | |
240 | { | |
241 | 'url': 'https://nebula.app/tom-scott-presents-money', | |
242 | 'info_dict': { | |
243 | 'id': 'tom-scott-presents-money', | |
244 | 'title': 'Tom Scott Presents: Money', | |
245 | 'description': 'Tom Scott hosts a series all about trust, negotiation and money.', | |
bdc196a4 | 246 | }, |
359df0fc HH |
247 | 'playlist_count': 5, |
248 | 'params': { | |
249 | 'usenetrc': True, | |
250 | }, | |
251 | }, { | |
252 | 'url': 'https://nebula.app/lindsayellis', | |
253 | 'info_dict': { | |
254 | 'id': 'lindsayellis', | |
255 | 'title': 'Lindsay Ellis', | |
256 | 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.', | |
257 | }, | |
258 | 'playlist_mincount': 100, | |
259 | 'params': { | |
260 | 'usenetrc': True, | |
261 | }, | |
262 | }, | |
263 | ] | |
bdc196a4 | 264 | |
359df0fc HH |
265 | def _generate_playlist_entries(self, collection_id, channel): |
266 | episodes = channel['episodes']['results'] | |
267 | for page_num in itertools.count(2): | |
268 | for episode in episodes: | |
269 | yield self._build_video_info(episode) | |
270 | next_url = channel['episodes']['next'] | |
271 | if not next_url: | |
272 | break | |
273 | channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer', | |
274 | note=f'Retrieving channel page {page_num}') | |
275 | episodes = channel['episodes']['results'] | |
bdc196a4 GS |
276 | |
277 | def _real_extract(self, url): | |
359df0fc HH |
278 | collection_id = self._match_id(url) |
279 | channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/' | |
280 | channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel') | |
281 | channel_details = channel['details'] | |
bdc196a4 | 282 | |
359df0fc HH |
283 | return self.playlist_result( |
284 | entries=self._generate_playlist_entries(collection_id, channel), | |
285 | playlist_id=collection_id, | |
286 | playlist_title=channel_details['title'], | |
287 | playlist_description=channel_details['description'] | |
288 | ) |