yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from ..utils import ExtractorError, parse_iso8601
   7
   8 _BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
   9
  10
  11 class NebulaBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'watchnebula'
  13
  14     _nebula_api_token = None
  15     _nebula_bearer_token = None
  16
  17     def _perform_nebula_auth(self, username, password):
  18         if not username or not password:
  19             self.raise_login_required(method='password')
  20
  21         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  22         response = self._download_json(
  23             'https://api.watchnebula.com/api/v1/auth/login/',
  24             data=data, fatal=False, video_id=None,
  25             headers={
  26                 'content-type': 'application/json',
  27                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  28                 'cookie': ''
  29             },
  30             note='Logging in to Nebula with supplied credentials',
  31             errnote='Authentication failed or rejected')
  32         if not response or not response.get('key'):
  33             self.raise_login_required(method='password')
  34
  35         return response['key']
  36
  37     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  38         assert method in ('GET', 'POST',)
  39         assert auth_type in ('api', 'bearer',)
  40
  41         def inner_call():
  42             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  43             return self._download_json(
  44                 url, video_id, note=note, headers={'Authorization': authorization},
  45                 data=b'' if method == 'POST' else None)
  46
  47         try:
  48             return inner_call()
  49         except ExtractorError as exc:
  50             # if 401 or 403, attempt credential re-auth and retry
  51             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  52                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  53                 self._perform_login()
  54                 return inner_call()
  55             else:
  56                 raise
  57
  58     def _fetch_nebula_bearer_token(self):
  59         """
  60         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  61         """
  62         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  63                                          method='POST',
  64                                          note='Authorizing to Nebula')
  65         return response['token']
  66
  67     def _fetch_video_formats(self, slug):
  68         stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
  69                                             video_id=slug,
  70                                             auth_type='bearer',
  71                                             note='Fetching video stream info')
  72         manifest_url = stream_info['manifest']
  73         return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
  74
  75     def _build_video_info(self, episode):
  76         fmts, subs = self._fetch_video_formats(episode['slug'])
  77         channel_slug = episode['channel_slug']
  78         channel_title = episode['channel_title']
  79         return {
  80             'id': episode['zype_id'],
  81             'display_id': episode['slug'],
  82             'formats': fmts,
  83             'subtitles': subs,
  84             'webpage_url': f'https://nebula.tv/{episode["slug"]}',
  85             'title': episode['title'],
  86             'description': episode['description'],
  87             'timestamp': parse_iso8601(episode['published_at']),
  88             'thumbnails': [{
  89                 # 'id': tn.get('name'),  # this appears to be null
  90                 'url': tn['original'],
  91                 'height': key,
  92             } for key, tn in episode['assets']['thumbnail'].items()],
  93             'duration': episode['duration'],
  94             'channel': channel_title,
  95             'channel_id': channel_slug,
  96             'channel_url': f'https://nebula.tv/{channel_slug}',
  97             'uploader': channel_title,
  98             'uploader_id': channel_slug,
  99             'uploader_url': f'https://nebula.tv/{channel_slug}',
 100             'series': channel_title,
 101             'creator': channel_title,
 102         }
 103
 104     def _perform_login(self, username=None, password=None):
 105         self._nebula_api_token = self._perform_nebula_auth(username, password)
 106         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 107
 108
 109 class NebulaIE(NebulaBaseIE):
 110     _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
 111     _TESTS = [
 112         {
 113             'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
 114             'md5': '14944cfee8c7beeea106320c47560efc',
 115             'info_dict': {
 116                 'id': '5c271b40b13fd613090034fd',
 117                 'ext': 'mp4',
 118                 'title': 'That Time Disney Remade Beauty and the Beast',
 119                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 120                 'upload_date': '20180731',
 121                 'timestamp': 1533009600,
 122                 'channel': 'Lindsay Ellis',
 123                 'channel_id': 'lindsayellis',
 124                 'uploader': 'Lindsay Ellis',
 125                 'uploader_id': 'lindsayellis',
 126                 'timestamp': 1533009600,
 127                 'uploader_url': 'https://nebula.tv/lindsayellis',
 128                 'series': 'Lindsay Ellis',
 129                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 130                 'channel_url': 'https://nebula.tv/lindsayellis',
 131                 'creator': 'Lindsay Ellis',
 132                 'duration': 2212,
 133                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 134             },
 135         },
 136         {
 137             'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 138             'md5': 'd05739cf6c38c09322422f696b569c23',
 139             'info_dict': {
 140                 'id': '5e7e78171aaf320001fbd6be',
 141                 'ext': 'mp4',
 142                 'title': 'Landing Craft - How The Allies Got Ashore',
 143                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 144                 'upload_date': '20200327',
 145                 'timestamp': 1585348140,
 146                 'channel': 'Real Engineering',
 147                 'channel_id': 'realengineering',
 148                 'uploader': 'Real Engineering',
 149                 'uploader_id': 'realengineering',
 150                 'series': 'Real Engineering',
 151                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 152                 'creator': 'Real Engineering',
 153                 'duration': 841,
 154                 'channel_url': 'https://nebula.tv/realengineering',
 155                 'uploader_url': 'https://nebula.tv/realengineering',
 156                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 157             },
 158         },
 159         {
 160             'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
 161             'md5': 'ebe28a7ad822b9ee172387d860487868',
 162             'info_dict': {
 163                 'id': '5e779ebdd157bc0001d1c75a',
 164                 'ext': 'mp4',
 165                 'title': 'Episode 1: The Draw',
 166                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 167                 'upload_date': '20200323',
 168                 'timestamp': 1584980400,
 169                 'channel': 'Tom Scott Presents: Money',
 170                 'channel_id': 'tom-scott-presents-money',
 171                 'uploader': 'Tom Scott Presents: Money',
 172                 'uploader_id': 'tom-scott-presents-money',
 173                 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
 174                 'duration': 825,
 175                 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
 176                 'series': 'Tom Scott Presents: Money',
 177                 'display_id': 'money-episode-1-the-draw',
 178                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 179                 'creator': 'Tom Scott Presents: Money',
 180             },
 181         },
 182         {
 183             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 184             'only_matching': True,
 185         },
 186         {
 187             'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
 188             'only_matching': True,
 189         },
 190     ]
 191
 192     def _fetch_video_metadata(self, slug):
 193         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 194                                      video_id=slug,
 195                                      auth_type='bearer',
 196                                      note='Fetching video meta data')
 197
 198     def _real_extract(self, url):
 199         slug = self._match_id(url)
 200         video = self._fetch_video_metadata(slug)
 201         return self._build_video_info(video)
 202
 203
 204 class NebulaSubscriptionsIE(NebulaBaseIE):
 205     IE_NAME = 'nebula:subscriptions'
 206     _VALID_URL = rf'{_BASE_URL_RE}/myshows'
 207     _TESTS = [
 208         {
 209             'url': 'https://nebula.tv/myshows',
 210             'playlist_mincount': 1,
 211             'info_dict': {
 212                 'id': 'myshows',
 213             },
 214         },
 215     ]
 216
 217     def _generate_playlist_entries(self):
 218         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 219         page_num = 1
 220         while next_url:
 221             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 222                                             note=f'Retrieving subscriptions page {page_num}')
 223             for episode in channel['results']:
 224                 yield self._build_video_info(episode)
 225             next_url = channel['next']
 226             page_num += 1
 227
 228     def _real_extract(self, url):
 229         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 230
 231
 232 class NebulaChannelIE(NebulaBaseIE):
 233     IE_NAME = 'nebula:channel'
 234     _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
 235     _TESTS = [
 236         {
 237             'url': 'https://nebula.tv/tom-scott-presents-money',
 238             'info_dict': {
 239                 'id': 'tom-scott-presents-money',
 240                 'title': 'Tom Scott Presents: Money',
 241                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 242             },
 243             'playlist_count': 5,
 244         }, {
 245             'url': 'https://nebula.tv/lindsayellis',
 246             'info_dict': {
 247                 'id': 'lindsayellis',
 248                 'title': 'Lindsay Ellis',
 249                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 250             },
 251             'playlist_mincount': 2,
 252         },
 253     ]
 254
 255     def _generate_playlist_entries(self, collection_id, channel):
 256         episodes = channel['episodes']['results']
 257         for page_num in itertools.count(2):
 258             for episode in episodes:
 259                 yield self._build_video_info(episode)
 260             next_url = channel['episodes']['next']
 261             if not next_url:
 262                 break
 263             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 264                                             note=f'Retrieving channel page {page_num}')
 265             episodes = channel['episodes']['results']
 266
 267     def _real_extract(self, url):
 268         collection_id = self._match_id(url)
 269         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 270         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 271         channel_details = channel['details']
 272
 273         return self.playlist_result(
 274             entries=self._generate_playlist_entries(collection_id, channel),
 275             playlist_id=collection_id,
 276             playlist_title=channel_details['title'],
 277             playlist_description=channel_details['description']
 278         )