]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/nebula.py
[spotify] Detect iframe embeds (#3430)
[yt-dlp.git] / yt_dlp / extractor / nebula.py
1 import itertools
2 import json
3 import time
4 import urllib
5
6 from ..utils import (
7 ExtractorError,
8 parse_iso8601,
9 try_get,
10 )
11 from .common import InfoExtractor
12
13
14 class NebulaBaseIE(InfoExtractor):
15 _NETRC_MACHINE = 'watchnebula'
16
17 _nebula_api_token = None
18 _nebula_bearer_token = None
19 _zype_access_token = None
20
21 def _perform_nebula_auth(self):
22 username, password = self._get_login_info()
23 if not (username and password):
24 self.raise_login_required()
25
26 data = json.dumps({'email': username, 'password': password}).encode('utf8')
27 response = self._download_json(
28 'https://api.watchnebula.com/api/v1/auth/login/',
29 data=data, fatal=False, video_id=None,
30 headers={
31 'content-type': 'application/json',
32 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
33 'cookie': ''
34 },
35 note='Logging in to Nebula with supplied credentials',
36 errnote='Authentication failed or rejected')
37 if not response or not response.get('key'):
38 self.raise_login_required()
39
40 # save nebula token as cookie
41 self._set_cookie(
42 'nebula.app', 'nebula-auth',
43 urllib.parse.quote(
44 json.dumps({
45 "apiToken": response["key"],
46 "isLoggingIn": False,
47 "isLoggingOut": False,
48 }, separators=(",", ":"))),
49 expire_time=int(time.time()) + 86400 * 365,
50 )
51
52 return response['key']
53
54 def _retrieve_nebula_api_token(self):
55 """
56 Check cookie jar for valid token. Try to authenticate using credentials if no valid token
57 can be found in the cookie jar.
58 """
59 nebula_cookies = self._get_cookies('https://nebula.app')
60 nebula_cookie = nebula_cookies.get('nebula-auth')
61 if nebula_cookie:
62 self.to_screen('Authenticating to Nebula with token from cookie jar')
63 nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
64 nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
65 if nebula_api_token:
66 return nebula_api_token
67
68 return self._perform_nebula_auth()
69
70 def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
71 assert method in ('GET', 'POST',)
72 assert auth_type in ('api', 'bearer',)
73
74 def inner_call():
75 authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
76 return self._download_json(
77 url, video_id, note=note, headers={'Authorization': authorization},
78 data=b'' if method == 'POST' else None)
79
80 try:
81 return inner_call()
82 except ExtractorError as exc:
83 # if 401 or 403, attempt credential re-auth and retry
84 if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
85 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
86 self._perform_login()
87 return inner_call()
88 else:
89 raise
90
91 def _fetch_nebula_bearer_token(self):
92 """
93 Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
94 """
95 response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
96 method='POST',
97 note='Authorizing to Nebula')
98 return response['token']
99
100 def _fetch_zype_access_token(self):
101 """
102 Get a Zype access token, which is required to access video streams -- in our case: to
103 generate video URLs.
104 """
105 user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
106
107 access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
108 if not access_token:
109 if try_get(user_object, lambda x: x['is_subscribed'], bool):
110 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
111 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
112 raise ExtractorError(
113 'Unable to extract Zype access token from Nebula API authentication endpoint. '
114 'Open an arbitrary video in a browser with this account to generate a token',
115 expected=True)
116 raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
117 return access_token
118
119 def _build_video_info(self, episode):
120 zype_id = episode['zype_id']
121 zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
122 channel_slug = episode['channel_slug']
123 return {
124 'id': episode['zype_id'],
125 'display_id': episode['slug'],
126 '_type': 'url_transparent',
127 'ie_key': 'Zype',
128 'url': zype_video_url,
129 'title': episode['title'],
130 'description': episode['description'],
131 'timestamp': parse_iso8601(episode['published_at']),
132 'thumbnails': [{
133 # 'id': tn.get('name'), # this appears to be null
134 'url': tn['original'],
135 'height': key,
136 } for key, tn in episode['assets']['thumbnail'].items()],
137 'duration': episode['duration'],
138 'channel': episode['channel_title'],
139 'channel_id': channel_slug,
140 'channel_url': f'https://nebula.app/{channel_slug}',
141 'uploader': episode['channel_title'],
142 'uploader_id': channel_slug,
143 'uploader_url': f'https://nebula.app/{channel_slug}',
144 'series': episode['channel_title'],
145 'creator': episode['channel_title'],
146 }
147
148 def _perform_login(self, username=None, password=None):
149 # FIXME: username should be passed from here to inner functions
150 self._nebula_api_token = self._retrieve_nebula_api_token()
151 self._nebula_bearer_token = self._fetch_nebula_bearer_token()
152 self._zype_access_token = self._fetch_zype_access_token()
153
154
155 class NebulaIE(NebulaBaseIE):
156 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
157 _TESTS = [
158 {
159 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
160 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
161 'info_dict': {
162 'id': '5c271b40b13fd613090034fd',
163 'ext': 'mp4',
164 'title': 'That Time Disney Remade Beauty and the Beast',
165 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
166 'upload_date': '20180731',
167 'timestamp': 1533009600,
168 'channel': 'Lindsay Ellis',
169 'channel_id': 'lindsayellis',
170 'uploader': 'Lindsay Ellis',
171 'uploader_id': 'lindsayellis',
172 },
173 'params': {
174 'usenetrc': True,
175 },
176 },
177 {
178 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
179 'md5': '6d4edd14ce65720fa63aba5c583fb328',
180 'info_dict': {
181 'id': '5e7e78171aaf320001fbd6be',
182 'ext': 'mp4',
183 'title': 'Landing Craft - How The Allies Got Ashore',
184 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
185 'upload_date': '20200327',
186 'timestamp': 1585348140,
187 'channel': 'Real Engineering',
188 'channel_id': 'realengineering',
189 'uploader': 'Real Engineering',
190 'uploader_id': 'realengineering',
191 },
192 'params': {
193 'usenetrc': True,
194 },
195 },
196 {
197 'url': 'https://nebula.app/videos/money-episode-1-the-draw',
198 'md5': '8c7d272910eea320f6f8e6d3084eecf5',
199 'info_dict': {
200 'id': '5e779ebdd157bc0001d1c75a',
201 'ext': 'mp4',
202 'title': 'Episode 1: The Draw',
203 'description': r'contains:There’s free money on offer… if the players can all work together.',
204 'upload_date': '20200323',
205 'timestamp': 1584980400,
206 'channel': 'Tom Scott Presents: Money',
207 'channel_id': 'tom-scott-presents-money',
208 'uploader': 'Tom Scott Presents: Money',
209 'uploader_id': 'tom-scott-presents-money',
210 },
211 'params': {
212 'usenetrc': True,
213 },
214 },
215 {
216 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
217 'only_matching': True,
218 },
219 ]
220
221 def _fetch_video_metadata(self, slug):
222 return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
223 video_id=slug,
224 auth_type='bearer',
225 note='Fetching video meta data')
226
227 def _real_extract(self, url):
228 slug = self._match_id(url)
229 video = self._fetch_video_metadata(slug)
230 return self._build_video_info(video)
231
232
233 class NebulaCollectionIE(NebulaBaseIE):
234 IE_NAME = 'nebula:collection'
235 _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
236 _TESTS = [
237 {
238 'url': 'https://nebula.app/tom-scott-presents-money',
239 'info_dict': {
240 'id': 'tom-scott-presents-money',
241 'title': 'Tom Scott Presents: Money',
242 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
243 },
244 'playlist_count': 5,
245 'params': {
246 'usenetrc': True,
247 },
248 }, {
249 'url': 'https://nebula.app/lindsayellis',
250 'info_dict': {
251 'id': 'lindsayellis',
252 'title': 'Lindsay Ellis',
253 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
254 },
255 'playlist_mincount': 100,
256 'params': {
257 'usenetrc': True,
258 },
259 },
260 ]
261
262 def _generate_playlist_entries(self, collection_id, channel):
263 episodes = channel['episodes']['results']
264 for page_num in itertools.count(2):
265 for episode in episodes:
266 yield self._build_video_info(episode)
267 next_url = channel['episodes']['next']
268 if not next_url:
269 break
270 channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
271 note=f'Retrieving channel page {page_num}')
272 episodes = channel['episodes']['results']
273
274 def _real_extract(self, url):
275 collection_id = self._match_id(url)
276 channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
277 channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
278 channel_details = channel['details']
279
280 return self.playlist_result(
281 entries=self._generate_playlist_entries(collection_id, channel),
282 playlist_id=collection_id,
283 playlist_title=channel_details['title'],
284 playlist_description=channel_details['description']
285 )