yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
   7
   8 _BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
   9
  10
  11 class NebulaBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'watchnebula'
  13
  14     _nebula_api_token = None
  15     _nebula_bearer_token = None
  16
  17     def _perform_nebula_auth(self, username, password):
  18         if not username or not password:
  19             self.raise_login_required(method='password')
  20
  21         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  22         response = self._download_json(
  23             'https://api.watchnebula.com/api/v1/auth/login/',
  24             data=data, fatal=False, video_id=None,
  25             headers={
  26                 'content-type': 'application/json',
  27                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  28                 'cookie': ''
  29             },
  30             note='Logging in to Nebula with supplied credentials',
  31             errnote='Authentication failed or rejected')
  32         if not response or not response.get('key'):
  33             self.raise_login_required(method='password')
  34
  35         return response['key']
  36
  37     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  38         assert method in ('GET', 'POST',)
  39         assert auth_type in ('api', 'bearer',)
  40
  41         def inner_call():
  42             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  43             return self._download_json(
  44                 url, video_id, note=note, headers={'Authorization': authorization},
  45                 data=b'' if method == 'POST' else None)
  46
  47         try:
  48             return inner_call()
  49         except ExtractorError as exc:
  50             # if 401 or 403, attempt credential re-auth and retry
  51             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  52                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  53                 self._perform_login()
  54                 return inner_call()
  55             else:
  56                 raise
  57
  58     def _fetch_nebula_bearer_token(self):
  59         """
  60         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  61         """
  62         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  63                                          method='POST',
  64                                          note='Authorizing to Nebula')
  65         return response['token']
  66
  67     def _fetch_video_formats(self, slug):
  68         stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
  69                                             video_id=slug,
  70                                             auth_type='bearer',
  71                                             note='Fetching video stream info')
  72         manifest_url = stream_info['manifest']
  73         return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
  74
  75     def _build_video_info(self, episode):
  76         fmts, subs = self._fetch_video_formats(episode['slug'])
  77         channel_slug = episode['channel_slug']
  78         channel_title = episode['channel_title']
  79         zype_id = episode.get('zype_id')
  80         return {
  81             'id': remove_start(episode['id'], 'video_episode:'),
  82             'display_id': episode['slug'],
  83             'formats': fmts,
  84             'subtitles': subs,
  85             'webpage_url': f'https://nebula.tv/{episode["slug"]}',
  86             'title': episode['title'],
  87             'description': episode['description'],
  88             'timestamp': parse_iso8601(episode['published_at']),
  89             'thumbnails': [{
  90                 # 'id': tn.get('name'),  # this appears to be null
  91                 'url': tn['original'],
  92                 'height': key,
  93             } for key, tn in episode['assets']['thumbnail'].items()],
  94             'duration': episode['duration'],
  95             'channel': channel_title,
  96             'channel_id': channel_slug,
  97             'channel_url': f'https://nebula.tv/{channel_slug}',
  98             'uploader': channel_title,
  99             'uploader_id': channel_slug,
 100             'uploader_url': f'https://nebula.tv/{channel_slug}',
 101             'series': channel_title,
 102             'creator': channel_title,
 103             'extractor_key': NebulaIE.ie_key(),
 104             'extractor': NebulaIE.IE_NAME,
 105             '_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
 106         }
 107
 108     def _perform_login(self, username=None, password=None):
 109         self._nebula_api_token = self._perform_nebula_auth(username, password)
 110         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 111
 112
 113 class NebulaIE(NebulaBaseIE):
 114     _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
 115     _TESTS = [
 116         {
 117             'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
 118             'md5': '14944cfee8c7beeea106320c47560efc',
 119             'info_dict': {
 120                 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
 121                 'ext': 'mp4',
 122                 'title': 'That Time Disney Remade Beauty and the Beast',
 123                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 124                 'upload_date': '20180731',
 125                 'timestamp': 1533009600,
 126                 'channel': 'Lindsay Ellis',
 127                 'channel_id': 'lindsayellis',
 128                 'uploader': 'Lindsay Ellis',
 129                 'uploader_id': 'lindsayellis',
 130                 'timestamp': 1533009600,
 131                 'uploader_url': 'https://nebula.tv/lindsayellis',
 132                 'series': 'Lindsay Ellis',
 133                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 134                 'channel_url': 'https://nebula.tv/lindsayellis',
 135                 'creator': 'Lindsay Ellis',
 136                 'duration': 2212,
 137                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 138             },
 139         },
 140         {
 141             'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 142             'md5': 'd05739cf6c38c09322422f696b569c23',
 143             'info_dict': {
 144                 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
 145                 'ext': 'mp4',
 146                 'title': 'Landing Craft - How The Allies Got Ashore',
 147                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 148                 'upload_date': '20200327',
 149                 'timestamp': 1585348140,
 150                 'channel': 'Real Engineering — The Logistics of D-Day',
 151                 'channel_id': 'd-day',
 152                 'uploader': 'Real Engineering — The Logistics of D-Day',
 153                 'uploader_id': 'd-day',
 154                 'series': 'Real Engineering — The Logistics of D-Day',
 155                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 156                 'creator': 'Real Engineering — The Logistics of D-Day',
 157                 'duration': 841,
 158                 'channel_url': 'https://nebula.tv/d-day',
 159                 'uploader_url': 'https://nebula.tv/d-day',
 160                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 161             },
 162         },
 163         {
 164             'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
 165             'md5': 'ebe28a7ad822b9ee172387d860487868',
 166             'info_dict': {
 167                 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
 168                 'ext': 'mp4',
 169                 'title': 'Episode 1: The Draw',
 170                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 171                 'upload_date': '20200323',
 172                 'timestamp': 1584980400,
 173                 'channel': 'Tom Scott Presents: Money',
 174                 'channel_id': 'tom-scott-presents-money',
 175                 'uploader': 'Tom Scott Presents: Money',
 176                 'uploader_id': 'tom-scott-presents-money',
 177                 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
 178                 'duration': 825,
 179                 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
 180                 'series': 'Tom Scott Presents: Money',
 181                 'display_id': 'money-episode-1-the-draw',
 182                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 183                 'creator': 'Tom Scott Presents: Money',
 184             },
 185         },
 186         {
 187             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 188             'only_matching': True,
 189         },
 190         {
 191             'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
 192             'only_matching': True,
 193         },
 194     ]
 195
 196     def _fetch_video_metadata(self, slug):
 197         return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
 198                                      video_id=slug,
 199                                      auth_type='bearer',
 200                                      note='Fetching video meta data')
 201
 202     def _real_extract(self, url):
 203         slug = self._match_id(url)
 204         video = self._fetch_video_metadata(slug)
 205         return self._build_video_info(video)
 206
 207
 208 class NebulaSubscriptionsIE(NebulaBaseIE):
 209     IE_NAME = 'nebula:subscriptions'
 210     _VALID_URL = rf'{_BASE_URL_RE}/myshows'
 211     _TESTS = [
 212         {
 213             'url': 'https://nebula.tv/myshows',
 214             'playlist_mincount': 1,
 215             'info_dict': {
 216                 'id': 'myshows',
 217             },
 218         },
 219     ]
 220
 221     def _generate_playlist_entries(self):
 222         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 223         page_num = 1
 224         while next_url:
 225             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 226                                             note=f'Retrieving subscriptions page {page_num}')
 227             for episode in channel['results']:
 228                 yield self._build_video_info(episode)
 229             next_url = channel['next']
 230             page_num += 1
 231
 232     def _real_extract(self, url):
 233         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 234
 235
 236 class NebulaChannelIE(NebulaBaseIE):
 237     IE_NAME = 'nebula:channel'
 238     _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
 239     _TESTS = [
 240         {
 241             'url': 'https://nebula.tv/tom-scott-presents-money',
 242             'info_dict': {
 243                 'id': 'tom-scott-presents-money',
 244                 'title': 'Tom Scott Presents: Money',
 245                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 246             },
 247             'playlist_count': 5,
 248         }, {
 249             'url': 'https://nebula.tv/lindsayellis',
 250             'info_dict': {
 251                 'id': 'lindsayellis',
 252                 'title': 'Lindsay Ellis',
 253                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 254             },
 255             'playlist_mincount': 2,
 256         },
 257     ]
 258
 259     def _generate_playlist_entries(self, collection_id, channel):
 260         episodes = channel['episodes']['results']
 261         for page_num in itertools.count(2):
 262             for episode in episodes:
 263                 yield self._build_video_info(episode)
 264             next_url = channel['episodes']['next']
 265             if not next_url:
 266                 break
 267             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 268                                             note=f'Retrieving channel page {page_num}')
 269             episodes = channel['episodes']['results']
 270
 271     def _real_extract(self, url):
 272         collection_id = self._match_id(url)
 273         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 274         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 275         channel_details = channel['details']
 276
 277         return self.playlist_result(
 278             entries=self._generate_playlist_entries(collection_id, channel),
 279             playlist_id=collection_id,
 280             playlist_title=channel_details['title'],
 281             playlist_description=channel_details['description']
 282         )