]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nebula.py
[extractor/rumble] Detect JS embed
[yt-dlp.git] / yt_dlp / extractor / nebula.py
CommitLineData
359df0fc 1import itertools
bdc196a4
GS
2import json
3import time
359df0fc 4import urllib
bdc196a4 5
bdc196a4
GS
6from ..utils import (
7 ExtractorError,
8 parse_iso8601,
9 try_get,
bdc196a4 10)
359df0fc
HH
11from .common import InfoExtractor
12
13
14class NebulaBaseIE(InfoExtractor):
15 _NETRC_MACHINE = 'watchnebula'
16
17 _nebula_api_token = None
18 _nebula_bearer_token = None
19 _zype_access_token = None
20
f3b3fe16
HH
21 def _perform_nebula_auth(self, username, password):
22 if not username or not password:
359df0fc
HH
23 self.raise_login_required()
24
25 data = json.dumps({'email': username, 'password': password}).encode('utf8')
26 response = self._download_json(
27 'https://api.watchnebula.com/api/v1/auth/login/',
28 data=data, fatal=False, video_id=None,
29 headers={
30 'content-type': 'application/json',
31 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
32 'cookie': ''
33 },
34 note='Logging in to Nebula with supplied credentials',
35 errnote='Authentication failed or rejected')
36 if not response or not response.get('key'):
37 self.raise_login_required()
38
39 # save nebula token as cookie
40 self._set_cookie(
41 'nebula.app', 'nebula-auth',
42 urllib.parse.quote(
43 json.dumps({
44 "apiToken": response["key"],
45 "isLoggingIn": False,
46 "isLoggingOut": False,
47 }, separators=(",", ":"))),
48 expire_time=int(time.time()) + 86400 * 365,
49 )
50
51 return response['key']
52
f3b3fe16 53 def _retrieve_nebula_api_token(self, username=None, password=None):
359df0fc
HH
54 """
55 Check cookie jar for valid token. Try to authenticate using credentials if no valid token
56 can be found in the cookie jar.
57 """
58 nebula_cookies = self._get_cookies('https://nebula.app')
59 nebula_cookie = nebula_cookies.get('nebula-auth')
60 if nebula_cookie:
61 self.to_screen('Authenticating to Nebula with token from cookie jar')
62 nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
63 nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
64 if nebula_api_token:
65 return nebula_api_token
66
f3b3fe16 67 return self._perform_nebula_auth(username, password)
bdc196a4 68
359df0fc
HH
69 def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
70 assert method in ('GET', 'POST',)
71 assert auth_type in ('api', 'bearer',)
bdc196a4 72
359df0fc
HH
73 def inner_call():
74 authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
75 return self._download_json(
76 url, video_id, note=note, headers={'Authorization': authorization},
77 data=b'' if method == 'POST' else None)
78
79 try:
80 return inner_call()
81 except ExtractorError as exc:
82 # if 401 or 403, attempt credential re-auth and retry
83 if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
84 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
4c268f9c 85 self._perform_login()
359df0fc
HH
86 return inner_call()
87 else:
88 raise
89
90 def _fetch_nebula_bearer_token(self):
91 """
92 Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
93 """
94 response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
95 method='POST',
96 note='Authorizing to Nebula')
97 return response['token']
bdc196a4 98
359df0fc
HH
99 def _fetch_zype_access_token(self):
100 """
101 Get a Zype access token, which is required to access video streams -- in our case: to
102 generate video URLs.
103 """
104 user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
105
106 access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
107 if not access_token:
108 if try_get(user_object, lambda x: x['is_subscribed'], bool):
109 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
110 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
111 raise ExtractorError(
112 'Unable to extract Zype access token from Nebula API authentication endpoint. '
113 'Open an arbitrary video in a browser with this account to generate a token',
114 expected=True)
115 raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
116 return access_token
117
118 def _build_video_info(self, episode):
119 zype_id = episode['zype_id']
120 zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
121 channel_slug = episode['channel_slug']
122 return {
123 'id': episode['zype_id'],
124 'display_id': episode['slug'],
125 '_type': 'url_transparent',
126 'ie_key': 'Zype',
127 'url': zype_video_url,
128 'title': episode['title'],
129 'description': episode['description'],
130 'timestamp': parse_iso8601(episode['published_at']),
131 'thumbnails': [{
132 # 'id': tn.get('name'), # this appears to be null
133 'url': tn['original'],
134 'height': key,
135 } for key, tn in episode['assets']['thumbnail'].items()],
136 'duration': episode['duration'],
137 'channel': episode['channel_title'],
138 'channel_id': channel_slug,
139 'channel_url': f'https://nebula.app/{channel_slug}',
140 'uploader': episode['channel_title'],
141 'uploader_id': channel_slug,
142 'uploader_url': f'https://nebula.app/{channel_slug}',
143 'series': episode['channel_title'],
144 'creator': episode['channel_title'],
145 }
146
52efa4b3 147 def _perform_login(self, username=None, password=None):
f3b3fe16 148 self._nebula_api_token = self._retrieve_nebula_api_token(username, password)
359df0fc
HH
149 self._nebula_bearer_token = self._fetch_nebula_bearer_token()
150 self._zype_access_token = self._fetch_zype_access_token()
151
359df0fc
HH
152
153class NebulaIE(NebulaBaseIE):
bdc196a4
GS
154 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
155 _TESTS = [
156 {
157 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
f3b3fe16 158 'md5': '14944cfee8c7beeea106320c47560efc',
bdc196a4
GS
159 'info_dict': {
160 'id': '5c271b40b13fd613090034fd',
161 'ext': 'mp4',
162 'title': 'That Time Disney Remade Beauty and the Beast',
163 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
164 'upload_date': '20180731',
165 'timestamp': 1533009600,
166 'channel': 'Lindsay Ellis',
359df0fc 167 'channel_id': 'lindsayellis',
bdc196a4 168 'uploader': 'Lindsay Ellis',
359df0fc 169 'uploader_id': 'lindsayellis',
f3b3fe16
HH
170 'timestamp': 1533009600,
171 'uploader_url': 'https://nebula.app/lindsayellis',
172 'series': 'Lindsay Ellis',
173 'average_rating': int,
174 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
175 'channel_url': 'https://nebula.app/lindsayellis',
176 'creator': 'Lindsay Ellis',
177 'duration': 2212,
178 'view_count': int,
179 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 180 },
bdc196a4
GS
181 },
182 {
183 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
f3b3fe16 184 'md5': 'd05739cf6c38c09322422f696b569c23',
bdc196a4
GS
185 'info_dict': {
186 'id': '5e7e78171aaf320001fbd6be',
187 'ext': 'mp4',
188 'title': 'Landing Craft - How The Allies Got Ashore',
189 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
190 'upload_date': '20200327',
191 'timestamp': 1585348140,
359df0fc
HH
192 'channel': 'Real Engineering',
193 'channel_id': 'realengineering',
194 'uploader': 'Real Engineering',
195 'uploader_id': 'realengineering',
f3b3fe16
HH
196 'view_count': int,
197 'series': 'Real Engineering',
198 'average_rating': int,
199 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
200 'creator': 'Real Engineering',
201 'duration': 841,
202 'channel_url': 'https://nebula.app/realengineering',
203 'uploader_url': 'https://nebula.app/realengineering',
204 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 205 },
bdc196a4
GS
206 },
207 {
208 'url': 'https://nebula.app/videos/money-episode-1-the-draw',
f3b3fe16 209 'md5': 'ebe28a7ad822b9ee172387d860487868',
bdc196a4
GS
210 'info_dict': {
211 'id': '5e779ebdd157bc0001d1c75a',
212 'ext': 'mp4',
213 'title': 'Episode 1: The Draw',
214 'description': r'contains:There’s free money on offer… if the players can all work together.',
215 'upload_date': '20200323',
216 'timestamp': 1584980400,
217 'channel': 'Tom Scott Presents: Money',
359df0fc 218 'channel_id': 'tom-scott-presents-money',
bdc196a4 219 'uploader': 'Tom Scott Presents: Money',
359df0fc 220 'uploader_id': 'tom-scott-presents-money',
f3b3fe16
HH
221 'uploader_url': 'https://nebula.app/tom-scott-presents-money',
222 'duration': 825,
223 'channel_url': 'https://nebula.app/tom-scott-presents-money',
224 'view_count': int,
225 'series': 'Tom Scott Presents: Money',
226 'display_id': 'money-episode-1-the-draw',
227 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
228 'average_rating': int,
229 'creator': 'Tom Scott Presents: Money',
bdc196a4 230 },
bdc196a4
GS
231 },
232 {
233 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
234 'only_matching': True,
235 },
236 ]
bdc196a4 237
359df0fc
HH
238 def _fetch_video_metadata(self, slug):
239 return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
240 video_id=slug,
241 auth_type='bearer',
242 note='Fetching video meta data')
bdc196a4 243
359df0fc
HH
244 def _real_extract(self, url):
245 slug = self._match_id(url)
246 video = self._fetch_video_metadata(slug)
247 return self._build_video_info(video)
bdc196a4 248
bdc196a4 249
f3b3fe16
HH
250class NebulaSubscriptionsIE(NebulaBaseIE):
251 IE_NAME = 'nebula:subscriptions'
252 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows'
253 _TESTS = [
254 {
255 'url': 'https://nebula.app/myshows',
256 'playlist_mincount': 1,
257 'info_dict': {
258 'id': 'myshows',
259 },
260 },
261 ]
262
263 def _generate_playlist_entries(self):
264 next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
265 page_num = 1
266 while next_url:
267 channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
268 note=f'Retrieving subscriptions page {page_num}')
269 for episode in channel['results']:
270 yield self._build_video_info(episode)
271 next_url = channel['next']
272 page_num += 1
273
274 def _real_extract(self, url):
275 return self.playlist_result(self._generate_playlist_entries(), 'myshows')
276
277
278class NebulaChannelIE(NebulaBaseIE):
279 IE_NAME = 'nebula:channel'
280 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)'
359df0fc
HH
281 _TESTS = [
282 {
283 'url': 'https://nebula.app/tom-scott-presents-money',
284 'info_dict': {
285 'id': 'tom-scott-presents-money',
286 'title': 'Tom Scott Presents: Money',
287 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
bdc196a4 288 },
359df0fc 289 'playlist_count': 5,
359df0fc
HH
290 }, {
291 'url': 'https://nebula.app/lindsayellis',
292 'info_dict': {
293 'id': 'lindsayellis',
294 'title': 'Lindsay Ellis',
295 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
296 },
297 'playlist_mincount': 100,
359df0fc
HH
298 },
299 ]
bdc196a4 300
359df0fc
HH
301 def _generate_playlist_entries(self, collection_id, channel):
302 episodes = channel['episodes']['results']
303 for page_num in itertools.count(2):
304 for episode in episodes:
305 yield self._build_video_info(episode)
306 next_url = channel['episodes']['next']
307 if not next_url:
308 break
309 channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
310 note=f'Retrieving channel page {page_num}')
311 episodes = channel['episodes']['results']
bdc196a4
GS
312
313 def _real_extract(self, url):
359df0fc
HH
314 collection_id = self._match_id(url)
315 channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
316 channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
317 channel_details = channel['details']
bdc196a4 318
359df0fc
HH
319 return self.playlist_result(
320 entries=self._generate_playlist_entries(collection_id, channel),
321 playlist_id=collection_id,
322 playlist_title=channel_details['title'],
323 playlist_description=channel_details['description']
324 )