]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/nebula.py
[cleanup] Misc (#8182)
[yt-dlp.git] / yt_dlp / extractor / nebula.py
CommitLineData
359df0fc 1import itertools
bdc196a4 2import json
bdc196a4 3
359df0fc 4from .common import InfoExtractor
3d2623a8 5from ..networking.exceptions import HTTPError
3f756c8c 6from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
359df0fc 7
cbfe2e5c 8_BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
4cca2eb1 9
359df0fc
HH
10
11class NebulaBaseIE(InfoExtractor):
12 _NETRC_MACHINE = 'watchnebula'
13
14 _nebula_api_token = None
15 _nebula_bearer_token = None
359df0fc 16
f3b3fe16
HH
17 def _perform_nebula_auth(self, username, password):
18 if not username or not password:
d50ea3ce 19 self.raise_login_required(method='password')
359df0fc
HH
20
21 data = json.dumps({'email': username, 'password': password}).encode('utf8')
22 response = self._download_json(
23 'https://api.watchnebula.com/api/v1/auth/login/',
24 data=data, fatal=False, video_id=None,
25 headers={
26 'content-type': 'application/json',
27 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
28 'cookie': ''
29 },
30 note='Logging in to Nebula with supplied credentials',
31 errnote='Authentication failed or rejected')
32 if not response or not response.get('key'):
d50ea3ce 33 self.raise_login_required(method='password')
359df0fc
HH
34
35 return response['key']
36
359df0fc
HH
37 def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
38 assert method in ('GET', 'POST',)
39 assert auth_type in ('api', 'bearer',)
bdc196a4 40
359df0fc
HH
41 def inner_call():
42 authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
43 return self._download_json(
44 url, video_id, note=note, headers={'Authorization': authorization},
45 data=b'' if method == 'POST' else None)
46
47 try:
48 return inner_call()
49 except ExtractorError as exc:
50 # if 401 or 403, attempt credential re-auth and retry
3d2623a8 51 if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403):
359df0fc 52 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
4c268f9c 53 self._perform_login()
359df0fc
HH
54 return inner_call()
55 else:
56 raise
57
58 def _fetch_nebula_bearer_token(self):
59 """
60 Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
61 """
62 response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
63 method='POST',
64 note='Authorizing to Nebula')
65 return response['token']
bdc196a4 66
d50ea3ce 67 def _fetch_video_formats(self, slug):
3f756c8c 68 stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
d50ea3ce
HH
69 video_id=slug,
70 auth_type='bearer',
71 note='Fetching video stream info')
72 manifest_url = stream_info['manifest']
3f756c8c 73 return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
359df0fc
HH
74
75 def _build_video_info(self, episode):
d50ea3ce 76 fmts, subs = self._fetch_video_formats(episode['slug'])
359df0fc 77 channel_slug = episode['channel_slug']
d50ea3ce 78 channel_title = episode['channel_title']
3f756c8c 79 zype_id = episode.get('zype_id')
359df0fc 80 return {
3f756c8c 81 'id': remove_start(episode['id'], 'video_episode:'),
359df0fc 82 'display_id': episode['slug'],
d50ea3ce
HH
83 'formats': fmts,
84 'subtitles': subs,
85 'webpage_url': f'https://nebula.tv/{episode["slug"]}',
359df0fc
HH
86 'title': episode['title'],
87 'description': episode['description'],
88 'timestamp': parse_iso8601(episode['published_at']),
89 'thumbnails': [{
90 # 'id': tn.get('name'), # this appears to be null
91 'url': tn['original'],
92 'height': key,
93 } for key, tn in episode['assets']['thumbnail'].items()],
94 'duration': episode['duration'],
d50ea3ce 95 'channel': channel_title,
359df0fc 96 'channel_id': channel_slug,
d50ea3ce
HH
97 'channel_url': f'https://nebula.tv/{channel_slug}',
98 'uploader': channel_title,
359df0fc 99 'uploader_id': channel_slug,
d50ea3ce
HH
100 'uploader_url': f'https://nebula.tv/{channel_slug}',
101 'series': channel_title,
102 'creator': channel_title,
3f756c8c
RH
103 'extractor_key': NebulaIE.ie_key(),
104 'extractor': NebulaIE.IE_NAME,
105 '_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
359df0fc
HH
106 }
107
52efa4b3 108 def _perform_login(self, username=None, password=None):
d50ea3ce 109 self._nebula_api_token = self._perform_nebula_auth(username, password)
359df0fc 110 self._nebula_bearer_token = self._fetch_nebula_bearer_token()
359df0fc 111
359df0fc
HH
112
113class NebulaIE(NebulaBaseIE):
4cca2eb1 114 _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
bdc196a4
GS
115 _TESTS = [
116 {
d50ea3ce 117 'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
f3b3fe16 118 'md5': '14944cfee8c7beeea106320c47560efc',
bdc196a4 119 'info_dict': {
3f756c8c 120 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
bdc196a4
GS
121 'ext': 'mp4',
122 'title': 'That Time Disney Remade Beauty and the Beast',
123 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
124 'upload_date': '20180731',
125 'timestamp': 1533009600,
126 'channel': 'Lindsay Ellis',
359df0fc 127 'channel_id': 'lindsayellis',
bdc196a4 128 'uploader': 'Lindsay Ellis',
359df0fc 129 'uploader_id': 'lindsayellis',
d50ea3ce 130 'uploader_url': 'https://nebula.tv/lindsayellis',
f3b3fe16 131 'series': 'Lindsay Ellis',
f3b3fe16 132 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
d50ea3ce 133 'channel_url': 'https://nebula.tv/lindsayellis',
f3b3fe16
HH
134 'creator': 'Lindsay Ellis',
135 'duration': 2212,
f3b3fe16 136 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 137 },
bdc196a4
GS
138 },
139 {
d50ea3ce 140 'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
f3b3fe16 141 'md5': 'd05739cf6c38c09322422f696b569c23',
bdc196a4 142 'info_dict': {
3f756c8c 143 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
bdc196a4
GS
144 'ext': 'mp4',
145 'title': 'Landing Craft - How The Allies Got Ashore',
146 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
147 'upload_date': '20200327',
148 'timestamp': 1585348140,
3f756c8c
RH
149 'channel': 'Real Engineering — The Logistics of D-Day',
150 'channel_id': 'd-day',
151 'uploader': 'Real Engineering — The Logistics of D-Day',
152 'uploader_id': 'd-day',
153 'series': 'Real Engineering — The Logistics of D-Day',
f3b3fe16 154 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
3f756c8c 155 'creator': 'Real Engineering — The Logistics of D-Day',
f3b3fe16 156 'duration': 841,
3f756c8c
RH
157 'channel_url': 'https://nebula.tv/d-day',
158 'uploader_url': 'https://nebula.tv/d-day',
f3b3fe16 159 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
bdc196a4 160 },
bdc196a4
GS
161 },
162 {
d50ea3ce 163 'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
f3b3fe16 164 'md5': 'ebe28a7ad822b9ee172387d860487868',
bdc196a4 165 'info_dict': {
3f756c8c 166 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
bdc196a4
GS
167 'ext': 'mp4',
168 'title': 'Episode 1: The Draw',
169 'description': r'contains:There’s free money on offer… if the players can all work together.',
170 'upload_date': '20200323',
171 'timestamp': 1584980400,
172 'channel': 'Tom Scott Presents: Money',
359df0fc 173 'channel_id': 'tom-scott-presents-money',
bdc196a4 174 'uploader': 'Tom Scott Presents: Money',
359df0fc 175 'uploader_id': 'tom-scott-presents-money',
d50ea3ce 176 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
f3b3fe16 177 'duration': 825,
d50ea3ce 178 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
f3b3fe16
HH
179 'series': 'Tom Scott Presents: Money',
180 'display_id': 'money-episode-1-the-draw',
181 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
f3b3fe16 182 'creator': 'Tom Scott Presents: Money',
bdc196a4 183 },
bdc196a4
GS
184 },
185 {
186 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
187 'only_matching': True,
188 },
cbfe2e5c 189 {
190 'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
191 'only_matching': True,
192 },
bdc196a4 193 ]
bdc196a4 194
359df0fc 195 def _fetch_video_metadata(self, slug):
3f756c8c 196 return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
359df0fc
HH
197 video_id=slug,
198 auth_type='bearer',
199 note='Fetching video meta data')
bdc196a4 200
359df0fc
HH
201 def _real_extract(self, url):
202 slug = self._match_id(url)
203 video = self._fetch_video_metadata(slug)
204 return self._build_video_info(video)
bdc196a4 205
bdc196a4 206
f3b3fe16
HH
207class NebulaSubscriptionsIE(NebulaBaseIE):
208 IE_NAME = 'nebula:subscriptions'
4cca2eb1 209 _VALID_URL = rf'{_BASE_URL_RE}/myshows'
f3b3fe16
HH
210 _TESTS = [
211 {
d50ea3ce 212 'url': 'https://nebula.tv/myshows',
f3b3fe16
HH
213 'playlist_mincount': 1,
214 'info_dict': {
215 'id': 'myshows',
216 },
217 },
218 ]
219
220 def _generate_playlist_entries(self):
221 next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
222 page_num = 1
223 while next_url:
224 channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
225 note=f'Retrieving subscriptions page {page_num}')
226 for episode in channel['results']:
227 yield self._build_video_info(episode)
228 next_url = channel['next']
229 page_num += 1
230
231 def _real_extract(self, url):
232 return self.playlist_result(self._generate_playlist_entries(), 'myshows')
233
234
235class NebulaChannelIE(NebulaBaseIE):
236 IE_NAME = 'nebula:channel'
4cca2eb1 237 _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
359df0fc
HH
238 _TESTS = [
239 {
d50ea3ce 240 'url': 'https://nebula.tv/tom-scott-presents-money',
359df0fc
HH
241 'info_dict': {
242 'id': 'tom-scott-presents-money',
243 'title': 'Tom Scott Presents: Money',
244 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
bdc196a4 245 },
359df0fc 246 'playlist_count': 5,
359df0fc 247 }, {
d50ea3ce 248 'url': 'https://nebula.tv/lindsayellis',
359df0fc
HH
249 'info_dict': {
250 'id': 'lindsayellis',
251 'title': 'Lindsay Ellis',
252 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
253 },
d50ea3ce 254 'playlist_mincount': 2,
359df0fc
HH
255 },
256 ]
bdc196a4 257
359df0fc
HH
258 def _generate_playlist_entries(self, collection_id, channel):
259 episodes = channel['episodes']['results']
260 for page_num in itertools.count(2):
261 for episode in episodes:
262 yield self._build_video_info(episode)
263 next_url = channel['episodes']['next']
264 if not next_url:
265 break
266 channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
267 note=f'Retrieving channel page {page_num}')
268 episodes = channel['episodes']['results']
bdc196a4
GS
269
270 def _real_extract(self, url):
359df0fc
HH
271 collection_id = self._match_id(url)
272 channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
273 channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
274 channel_details = channel['details']
bdc196a4 275
359df0fc
HH
276 return self.playlist_result(
277 entries=self._generate_playlist_entries(collection_id, channel),
278 playlist_id=collection_id,
279 playlist_title=channel_details['title'],
280 playlist_description=channel_details['description']
281 )