]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nebula.py
[extractor] Add `_perform_login` function (#2943)
[yt-dlp.git] / yt_dlp / extractor / nebula.py
CommitLineData
bdc196a4
GS
1# coding: utf-8
2from __future__ import unicode_literals
3
359df0fc 4import itertools
bdc196a4
GS
5import json
6import time
359df0fc 7import urllib
bdc196a4 8
bdc196a4
GS
9from ..utils import (
10 ExtractorError,
11 parse_iso8601,
12 try_get,
bdc196a4 13)
359df0fc
HH
14from .common import InfoExtractor
15
16
17class NebulaBaseIE(InfoExtractor):
18 _NETRC_MACHINE = 'watchnebula'
19
20 _nebula_api_token = None
21 _nebula_bearer_token = None
22 _zype_access_token = None
23
24 def _perform_nebula_auth(self):
25 username, password = self._get_login_info()
26 if not (username and password):
27 self.raise_login_required()
28
29 data = json.dumps({'email': username, 'password': password}).encode('utf8')
30 response = self._download_json(
31 'https://api.watchnebula.com/api/v1/auth/login/',
32 data=data, fatal=False, video_id=None,
33 headers={
34 'content-type': 'application/json',
35 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
36 'cookie': ''
37 },
38 note='Logging in to Nebula with supplied credentials',
39 errnote='Authentication failed or rejected')
40 if not response or not response.get('key'):
41 self.raise_login_required()
42
43 # save nebula token as cookie
44 self._set_cookie(
45 'nebula.app', 'nebula-auth',
46 urllib.parse.quote(
47 json.dumps({
48 "apiToken": response["key"],
49 "isLoggingIn": False,
50 "isLoggingOut": False,
51 }, separators=(",", ":"))),
52 expire_time=int(time.time()) + 86400 * 365,
53 )
54
55 return response['key']
56
57 def _retrieve_nebula_api_token(self):
58 """
59 Check cookie jar for valid token. Try to authenticate using credentials if no valid token
60 can be found in the cookie jar.
61 """
62 nebula_cookies = self._get_cookies('https://nebula.app')
63 nebula_cookie = nebula_cookies.get('nebula-auth')
64 if nebula_cookie:
65 self.to_screen('Authenticating to Nebula with token from cookie jar')
66 nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
67 nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
68 if nebula_api_token:
69 return nebula_api_token
70
71 return self._perform_nebula_auth()
bdc196a4 72
359df0fc
HH
73 def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
74 assert method in ('GET', 'POST',)
75 assert auth_type in ('api', 'bearer',)
bdc196a4 76
359df0fc
HH
77 def inner_call():
78 authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
79 return self._download_json(
80 url, video_id, note=note, headers={'Authorization': authorization},
81 data=b'' if method == 'POST' else None)
82
83 try:
84 return inner_call()
85 except ExtractorError as exc:
86 # if 401 or 403, attempt credential re-auth and retry
87 if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
88 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
89 self._login()
90 return inner_call()
91 else:
92 raise
93
94 def _fetch_nebula_bearer_token(self):
95 """
96 Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
97 """
98 response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
99 method='POST',
100 note='Authorizing to Nebula')
101 return response['token']
bdc196a4 102
359df0fc
HH
103 def _fetch_zype_access_token(self):
104 """
105 Get a Zype access token, which is required to access video streams -- in our case: to
106 generate video URLs.
107 """
108 user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
109
110 access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
111 if not access_token:
112 if try_get(user_object, lambda x: x['is_subscribed'], bool):
113 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
114 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
115 raise ExtractorError(
116 'Unable to extract Zype access token from Nebula API authentication endpoint. '
117 'Open an arbitrary video in a browser with this account to generate a token',
118 expected=True)
119 raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
120 return access_token
121
122 def _build_video_info(self, episode):
123 zype_id = episode['zype_id']
124 zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
125 channel_slug = episode['channel_slug']
126 return {
127 'id': episode['zype_id'],
128 'display_id': episode['slug'],
129 '_type': 'url_transparent',
130 'ie_key': 'Zype',
131 'url': zype_video_url,
132 'title': episode['title'],
133 'description': episode['description'],
134 'timestamp': parse_iso8601(episode['published_at']),
135 'thumbnails': [{
136 # 'id': tn.get('name'), # this appears to be null
137 'url': tn['original'],
138 'height': key,
139 } for key, tn in episode['assets']['thumbnail'].items()],
140 'duration': episode['duration'],
141 'channel': episode['channel_title'],
142 'channel_id': channel_slug,
143 'channel_url': f'https://nebula.app/{channel_slug}',
144 'uploader': episode['channel_title'],
145 'uploader_id': channel_slug,
146 'uploader_url': f'https://nebula.app/{channel_slug}',
147 'series': episode['channel_title'],
148 'creator': episode['channel_title'],
149 }
150
52efa4b3 151 def _perform_login(self, username=None, password=None):
152 # FIXME: username should be passed from here to inner functions
359df0fc
HH
153 self._nebula_api_token = self._retrieve_nebula_api_token()
154 self._nebula_bearer_token = self._fetch_nebula_bearer_token()
155 self._zype_access_token = self._fetch_zype_access_token()
156
359df0fc
HH
157
158class NebulaIE(NebulaBaseIE):
bdc196a4
GS
159 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
160 _TESTS = [
161 {
162 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
163 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
164 'info_dict': {
165 'id': '5c271b40b13fd613090034fd',
166 'ext': 'mp4',
167 'title': 'That Time Disney Remade Beauty and the Beast',
168 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
169 'upload_date': '20180731',
170 'timestamp': 1533009600,
171 'channel': 'Lindsay Ellis',
359df0fc 172 'channel_id': 'lindsayellis',
bdc196a4 173 'uploader': 'Lindsay Ellis',
359df0fc 174 'uploader_id': 'lindsayellis',
bdc196a4
GS
175 },
176 'params': {
177 'usenetrc': True,
178 },
bdc196a4
GS
179 },
180 {
181 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
182 'md5': '6d4edd14ce65720fa63aba5c583fb328',
183 'info_dict': {
184 'id': '5e7e78171aaf320001fbd6be',
185 'ext': 'mp4',
186 'title': 'Landing Craft - How The Allies Got Ashore',
187 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
188 'upload_date': '20200327',
189 'timestamp': 1585348140,
359df0fc
HH
190 'channel': 'Real Engineering',
191 'channel_id': 'realengineering',
192 'uploader': 'Real Engineering',
193 'uploader_id': 'realengineering',
bdc196a4
GS
194 },
195 'params': {
196 'usenetrc': True,
197 },
bdc196a4
GS
198 },
199 {
200 'url': 'https://nebula.app/videos/money-episode-1-the-draw',
201 'md5': '8c7d272910eea320f6f8e6d3084eecf5',
202 'info_dict': {
203 'id': '5e779ebdd157bc0001d1c75a',
204 'ext': 'mp4',
205 'title': 'Episode 1: The Draw',
206 'description': r'contains:There’s free money on offer… if the players can all work together.',
207 'upload_date': '20200323',
208 'timestamp': 1584980400,
209 'channel': 'Tom Scott Presents: Money',
359df0fc 210 'channel_id': 'tom-scott-presents-money',
bdc196a4 211 'uploader': 'Tom Scott Presents: Money',
359df0fc 212 'uploader_id': 'tom-scott-presents-money',
bdc196a4
GS
213 },
214 'params': {
215 'usenetrc': True,
216 },
bdc196a4
GS
217 },
218 {
219 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
220 'only_matching': True,
221 },
222 ]
bdc196a4 223
359df0fc
HH
224 def _fetch_video_metadata(self, slug):
225 return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
226 video_id=slug,
227 auth_type='bearer',
228 note='Fetching video meta data')
bdc196a4 229
359df0fc
HH
230 def _real_extract(self, url):
231 slug = self._match_id(url)
232 video = self._fetch_video_metadata(slug)
233 return self._build_video_info(video)
bdc196a4 234
bdc196a4 235
359df0fc
HH
236class NebulaCollectionIE(NebulaBaseIE):
237 IE_NAME = 'nebula:collection'
238 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
239 _TESTS = [
240 {
241 'url': 'https://nebula.app/tom-scott-presents-money',
242 'info_dict': {
243 'id': 'tom-scott-presents-money',
244 'title': 'Tom Scott Presents: Money',
245 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
bdc196a4 246 },
359df0fc
HH
247 'playlist_count': 5,
248 'params': {
249 'usenetrc': True,
250 },
251 }, {
252 'url': 'https://nebula.app/lindsayellis',
253 'info_dict': {
254 'id': 'lindsayellis',
255 'title': 'Lindsay Ellis',
256 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
257 },
258 'playlist_mincount': 100,
259 'params': {
260 'usenetrc': True,
261 },
262 },
263 ]
bdc196a4 264
359df0fc
HH
265 def _generate_playlist_entries(self, collection_id, channel):
266 episodes = channel['episodes']['results']
267 for page_num in itertools.count(2):
268 for episode in episodes:
269 yield self._build_video_info(episode)
270 next_url = channel['episodes']['next']
271 if not next_url:
272 break
273 channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
274 note=f'Retrieving channel page {page_num}')
275 episodes = channel['episodes']['results']
bdc196a4
GS
276
277 def _real_extract(self, url):
359df0fc
HH
278 collection_id = self._match_id(url)
279 channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
280 channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
281 channel_details = channel['details']
bdc196a4 282
359df0fc
HH
283 return self.playlist_result(
284 entries=self._generate_playlist_entries(collection_id, channel),
285 playlist_id=collection_id,
286 playlist_title=channel_details['title'],
287 playlist_description=channel_details['description']
288 )