]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nebula.py
[core] Deprecate internal `Youtubedl-no-compression` header (#6876)
[yt-dlp.git] / yt_dlp / extractor / nebula.py
CommitLineData
359df0fc 1import itertools
bdc196a4 2import json
ac668111 3import urllib.error
bdc196a4 4
359df0fc 5from .common import InfoExtractor
d50ea3ce 6from ..utils import ExtractorError, parse_iso8601
359df0fc 7
cbfe2e5c 8_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
4cca2eb1 9
359df0fc
HH
10
11class NebulaBaseIE(InfoExtractor):
12 _NETRC_MACHINE = 'watchnebula'
13
14 _nebula_api_token = None
15 _nebula_bearer_token = None
359df0fc 16
f3b3fe16
HH
17 def _perform_nebula_auth(self, username, password):
18 if not username or not password:
d50ea3ce 19 self.raise_login_required(method='password')
359df0fc
HH
20
21 data = json.dumps({'email': username, 'password': password}).encode('utf8')
22 response = self._download_json(
23 'https://api.watchnebula.com/api/v1/auth/login/',
24 data=data, fatal=False, video_id=None,
25 headers={
26 'content-type': 'application/json',
27 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
28 'cookie': ''
29 },
30 note='Logging in to Nebula with supplied credentials',
31 errnote='Authentication failed or rejected')
32 if not response or not response.get('key'):
d50ea3ce 33 self.raise_login_required(method='password')
359df0fc
HH
34
35 return response['key']
36
359df0fc
HH
37 def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
38 assert method in ('GET', 'POST',)
39 assert auth_type in ('api', 'bearer',)
bdc196a4 40
359df0fc
HH
41 def inner_call():
42 authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
43 return self._download_json(
44 url, video_id, note=note, headers={'Authorization': authorization},
45 data=b'' if method == 'POST' else None)
46
47 try:
48 return inner_call()
49 except ExtractorError as exc:
50 # if 401 or 403, attempt credential re-auth and retry
51 if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
52 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
4c268f9c 53 self._perform_login()
359df0fc
HH
54 return inner_call()
55 else:
56 raise
57
58 def _fetch_nebula_bearer_token(self):
59 """
60 Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
61 """
62 response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
63 method='POST',
64 note='Authorizing to Nebula')
65 return response['token']
bdc196a4 66
d50ea3ce
HH
67 def _fetch_video_formats(self, slug):
68 stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
69 video_id=slug,
70 auth_type='bearer',
71 note='Fetching video stream info')
72 manifest_url = stream_info['manifest']
73 return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
359df0fc
HH
74
75 def _build_video_info(self, episode):
d50ea3ce 76 fmts, subs = self._fetch_video_formats(episode['slug'])
359df0fc 77 channel_slug = episode['channel_slug']
d50ea3ce 78 channel_title = episode['channel_title']
359df0fc
HH
79 return {
80 'id': episode['zype_id'],
81 'display_id': episode['slug'],
d50ea3ce
HH
82 'formats': fmts,
83 'subtitles': subs,
84 'webpage_url': f'https://nebula.tv/{episode["slug"]}',
359df0fc
HH
85 'title': episode['title'],
86 'description': episode['description'],
87 'timestamp': parse_iso8601(episode['published_at']),
88 'thumbnails': [{
89 # 'id': tn.get('name'), # this appears to be null
90 'url': tn['original'],
91 'height': key,
92 } for key, tn in episode['assets']['thumbnail'].items()],
93 'duration': episode['duration'],
d50ea3ce 94 'channel': channel_title,
359df0fc 95 'channel_id': channel_slug,
d50ea3ce
HH
96 'channel_url': f'https://nebula.tv/{channel_slug}',
97 'uploader': channel_title,
359df0fc 98 'uploader_id': channel_slug,
d50ea3ce
HH
99 'uploader_url': f'https://nebula.tv/{channel_slug}',
100 'series': channel_title,
101 'creator': channel_title,
359df0fc
HH
102 }
103
52efa4b3 104 def _perform_login(self, username=None, password=None):
d50ea3ce 105 self._nebula_api_token = self._perform_nebula_auth(username, password)
359df0fc 106 self._nebula_bearer_token = self._fetch_nebula_bearer_token()
359df0fc 107
359df0fc
HH
108
109class NebulaIE(NebulaBaseIE):
4cca2eb1 110 _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
bdc196a4
GS
111 _TESTS = [
112 {
d50ea3ce 113 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
f3b3fe16 114 'md5': '14944cfee8c7beeea106320c47560efc',
bdc196a4
GS
115 'info_dict': {
116 'id': '5c271b40b13fd613090034fd',
117 'ext': 'mp4',
118 'title': 'That Time Disney Remade Beauty and the Beast',
119 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
120 'upload_date': '20180731',
121 'timestamp': 1533009600,
122 'channel': 'Lindsay Ellis',
359df0fc 123 'channel_id': 'lindsayellis',
bdc196a4 124 'uploader': 'Lindsay Ellis',
359df0fc 125 'uploader_id': 'lindsayellis',
f3b3fe16 126 'timestamp': 1533009600,
d50ea3ce 127 'uploader_url': 'https://nebula.tv/lindsayellis',
f3b3fe16 128 'series': 'Lindsay Ellis',
f3b3fe16 129 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
d50ea3ce 130 'channel_url': 'https://nebula.tv/lindsayellis',
f3b3fe16
HH
131 'creator': 'Lindsay Ellis',
132 'duration': 2212,
f3b3fe16 133 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 134 },
bdc196a4
GS
135 },
136 {
d50ea3ce 137 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
f3b3fe16 138 'md5': 'd05739cf6c38c09322422f696b569c23',
bdc196a4
GS
139 'info_dict': {
140 'id': '5e7e78171aaf320001fbd6be',
141 'ext': 'mp4',
142 'title': 'Landing Craft - How The Allies Got Ashore',
143 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
144 'upload_date': '20200327',
145 'timestamp': 1585348140,
359df0fc
HH
146 'channel': 'Real Engineering',
147 'channel_id': 'realengineering',
148 'uploader': 'Real Engineering',
149 'uploader_id': 'realengineering',
f3b3fe16 150 'series': 'Real Engineering',
f3b3fe16
HH
151 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
152 'creator': 'Real Engineering',
153 'duration': 841,
d50ea3ce
HH
154 'channel_url': 'https://nebula.tv/realengineering',
155 'uploader_url': 'https://nebula.tv/realengineering',
f3b3fe16 156 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 157 },
bdc196a4
GS
158 },
159 {
d50ea3ce 160 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
f3b3fe16 161 'md5': 'ebe28a7ad822b9ee172387d860487868',
bdc196a4
GS
162 'info_dict': {
163 'id': '5e779ebdd157bc0001d1c75a',
164 'ext': 'mp4',
165 'title': 'Episode 1: The Draw',
166 'description': r'contains:There’s free money on offer… if the players can all work together.',
167 'upload_date': '20200323',
168 'timestamp': 1584980400,
169 'channel': 'Tom Scott Presents: Money',
359df0fc 170 'channel_id': 'tom-scott-presents-money',
bdc196a4 171 'uploader': 'Tom Scott Presents: Money',
359df0fc 172 'uploader_id': 'tom-scott-presents-money',
d50ea3ce 173 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
f3b3fe16 174 'duration': 825,
d50ea3ce 175 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
f3b3fe16
HH
176 'series': 'Tom Scott Presents: Money',
177 'display_id': 'money-episode-1-the-draw',
178 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
f3b3fe16 179 'creator': 'Tom Scott Presents: Money',
bdc196a4 180 },
bdc196a4
GS
181 },
182 {
183 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
184 'only_matching': True,
185 },
cbfe2e5c 186 {
187 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
188 'only_matching': True,
189 },
bdc196a4 190 ]
bdc196a4 191
359df0fc
HH
192 def _fetch_video_metadata(self, slug):
193 return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
194 video_id=slug,
195 auth_type='bearer',
196 note='Fetching video meta data')
bdc196a4 197
359df0fc
HH
198 def _real_extract(self, url):
199 slug = self._match_id(url)
200 video = self._fetch_video_metadata(slug)
201 return self._build_video_info(video)
bdc196a4 202
bdc196a4 203
f3b3fe16
HH
204class NebulaSubscriptionsIE(NebulaBaseIE):
205 IE_NAME = 'nebula:subscriptions'
4cca2eb1 206 _VALID_URL = rf'{_BASE_URL_RE}/myshows'
f3b3fe16
HH
207 _TESTS = [
208 {
d50ea3ce 209 'url': 'https://nebula.tv/myshows',
f3b3fe16
HH
210 'playlist_mincount': 1,
211 'info_dict': {
212 'id': 'myshows',
213 },
214 },
215 ]
216
217 def _generate_playlist_entries(self):
218 next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
219 page_num = 1
220 while next_url:
221 channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
222 note=f'Retrieving subscriptions page {page_num}')
223 for episode in channel['results']:
224 yield self._build_video_info(episode)
225 next_url = channel['next']
226 page_num += 1
227
228 def _real_extract(self, url):
229 return self.playlist_result(self._generate_playlist_entries(), 'myshows')
230
231
232class NebulaChannelIE(NebulaBaseIE):
233 IE_NAME = 'nebula:channel'
4cca2eb1 234 _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
359df0fc
HH
235 _TESTS = [
236 {
d50ea3ce 237 'url': 'https://nebula.tv/tom-scott-presents-money',
359df0fc
HH
238 'info_dict': {
239 'id': 'tom-scott-presents-money',
240 'title': 'Tom Scott Presents: Money',
241 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
bdc196a4 242 },
359df0fc 243 'playlist_count': 5,
359df0fc 244 }, {
d50ea3ce 245 'url': 'https://nebula.tv/lindsayellis',
359df0fc
HH
246 'info_dict': {
247 'id': 'lindsayellis',
248 'title': 'Lindsay Ellis',
249 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
250 },
d50ea3ce 251 'playlist_mincount': 2,
359df0fc
HH
252 },
253 ]
bdc196a4 254
359df0fc
HH
255 def _generate_playlist_entries(self, collection_id, channel):
256 episodes = channel['episodes']['results']
257 for page_num in itertools.count(2):
258 for episode in episodes:
259 yield self._build_video_info(episode)
260 next_url = channel['episodes']['next']
261 if not next_url:
262 break
263 channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
264 note=f'Retrieving channel page {page_num}')
265 episodes = channel['episodes']['results']
bdc196a4
GS
266
267 def _real_extract(self, url):
359df0fc
HH
268 collection_id = self._match_id(url)
269 channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
270 channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
271 channel_details = channel['details']
bdc196a4 272
359df0fc
HH
273 return self.playlist_result(
274 entries=self._generate_playlist_entries(collection_id, channel),
275 playlist_id=collection_id,
276 playlist_title=channel_details['title'],
277 playlist_description=channel_details['description']
278 )