yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import urllib.error
   4
   5 from .common import InfoExtractor
   6 from ..utils import ExtractorError, parse_iso8601
   7
   8 _BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
   9
  10
  11 class NebulaBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'watchnebula'
  13
  14     _nebula_api_token = None
  15     _nebula_bearer_token = None
  16
  17     def _perform_nebula_auth(self, username, password):
  18         if not username or not password:
  19             self.raise_login_required(method='password')
  20
  21         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  22         response = self._download_json(
  23             'https://api.watchnebula.com/api/v1/auth/login/',
  24             data=data, fatal=False, video_id=None,
  25             headers={
  26                 'content-type': 'application/json',
  27                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  28                 'cookie': ''
  29             },
  30             note='Logging in to Nebula with supplied credentials',
  31             errnote='Authentication failed or rejected')
  32         if not response or not response.get('key'):
  33             self.raise_login_required(method='password')
  34
  35         return response['key']
  36
  37     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  38         assert method in ('GET', 'POST',)
  39         assert auth_type in ('api', 'bearer',)
  40
  41         def inner_call():
  42             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  43             return self._download_json(
  44                 url, video_id, note=note, headers={'Authorization': authorization},
  45                 data=b'' if method == 'POST' else None)
  46
  47         try:
  48             return inner_call()
  49         except ExtractorError as exc:
  50             # if 401 or 403, attempt credential re-auth and retry
  51             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  52                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  53                 self._perform_login()
  54                 return inner_call()
  55             else:
  56                 raise
  57
  58     def _fetch_nebula_bearer_token(self):
  59         """
  60         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  61         """
  62         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  63                                          method='POST',
  64                                          note='Authorizing to Nebula')
  65         return response['token']
  66
  67     def _fetch_video_formats(self, slug):
  68         stream_info = self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/stream/',
  69                                             video_id=slug,
  70                                             auth_type='bearer',
  71                                             note='Fetching video stream info')
  72         manifest_url = stream_info['manifest']
  73         return self._extract_m3u8_formats_and_subtitles(manifest_url, slug)
  74
  75     def _build_video_info(self, episode):
  76         fmts, subs = self._fetch_video_formats(episode['slug'])
  77         channel_slug = episode['channel_slug']
  78         channel_title = episode['channel_title']
  79         return {
  80             'id': episode['zype_id'],
  81             'display_id': episode['slug'],
  82             'formats': fmts,
  83             'subtitles': subs,
  84             'webpage_url': f'https://nebula.tv/{episode["slug"]}',
  85             'title': episode['title'],
  86             'description': episode['description'],
  87             'timestamp': parse_iso8601(episode['published_at']),
  88             'thumbnails': [{
  89                 # 'id': tn.get('name'),  # this appears to be null
  90                 'url': tn['original'],
  91                 'height': key,
  92             } for key, tn in episode['assets']['thumbnail'].items()],
  93             'duration': episode['duration'],
  94             'channel': channel_title,
  95             'channel_id': channel_slug,
  96             'channel_url': f'https://nebula.tv/{channel_slug}',
  97             'uploader': channel_title,
  98             'uploader_id': channel_slug,
  99             'uploader_url': f'https://nebula.tv/{channel_slug}',
 100             'series': channel_title,
 101             'creator': channel_title,
 102         }
 103
 104     def _perform_login(self, username=None, password=None):
 105         self._nebula_api_token = self._perform_nebula_auth(username, password)
 106         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 107
 108
 109 class NebulaIE(NebulaBaseIE):
 110     _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
 111     _TESTS = [
 112         {
 113             'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
 114             'md5': '14944cfee8c7beeea106320c47560efc',
 115             'info_dict': {
 116                 'id': '5c271b40b13fd613090034fd',
 117                 'ext': 'mp4',
 118                 'title': 'That Time Disney Remade Beauty and the Beast',
 119                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 120                 'upload_date': '20180731',
 121                 'timestamp': 1533009600,
 122                 'channel': 'Lindsay Ellis',
 123                 'channel_id': 'lindsayellis',
 124                 'uploader': 'Lindsay Ellis',
 125                 'uploader_id': 'lindsayellis',
 126                 'timestamp': 1533009600,
 127                 'uploader_url': 'https://nebula.tv/lindsayellis',
 128                 'series': 'Lindsay Ellis',
 129                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 130                 'channel_url': 'https://nebula.tv/lindsayellis',
 131                 'creator': 'Lindsay Ellis',
 132                 'duration': 2212,
 133                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 134             },
 135         },
 136         {
 137             'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 138             'md5': 'd05739cf6c38c09322422f696b569c23',
 139             'info_dict': {
 140                 'id': '5e7e78171aaf320001fbd6be',
 141                 'ext': 'mp4',
 142                 'title': 'Landing Craft - How The Allies Got Ashore',
 143                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 144                 'upload_date': '20200327',
 145                 'timestamp': 1585348140,
 146                 'channel': 'Real Engineering',
 147                 'channel_id': 'realengineering',
 148                 'uploader': 'Real Engineering',
 149                 'uploader_id': 'realengineering',
 150                 'series': 'Real Engineering',
 151                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 152                 'creator': 'Real Engineering',
 153                 'duration': 841,
 154                 'channel_url': 'https://nebula.tv/realengineering',
 155                 'uploader_url': 'https://nebula.tv/realengineering',
 156                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 157             },
 158         },
 159         {
 160             'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
 161             'md5': 'ebe28a7ad822b9ee172387d860487868',
 162             'info_dict': {
 163                 'id': '5e779ebdd157bc0001d1c75a',
 164                 'ext': 'mp4',
 165                 'title': 'Episode 1: The Draw',
 166                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 167                 'upload_date': '20200323',
 168                 'timestamp': 1584980400,
 169                 'channel': 'Tom Scott Presents: Money',
 170                 'channel_id': 'tom-scott-presents-money',
 171                 'uploader': 'Tom Scott Presents: Money',
 172                 'uploader_id': 'tom-scott-presents-money',
 173                 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
 174                 'duration': 825,
 175                 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
 176                 'series': 'Tom Scott Presents: Money',
 177                 'display_id': 'money-episode-1-the-draw',
 178                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 179                 'creator': 'Tom Scott Presents: Money',
 180             },
 181         },
 182         {
 183             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 184             'only_matching': True,
 185         },
 186     ]
 187
 188     def _fetch_video_metadata(self, slug):
 189         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 190                                      video_id=slug,
 191                                      auth_type='bearer',
 192                                      note='Fetching video meta data')
 193
 194     def _real_extract(self, url):
 195         slug = self._match_id(url)
 196         video = self._fetch_video_metadata(slug)
 197         return self._build_video_info(video)
 198
 199
 200 class NebulaSubscriptionsIE(NebulaBaseIE):
 201     IE_NAME = 'nebula:subscriptions'
 202     _VALID_URL = rf'{_BASE_URL_RE}/myshows'
 203     _TESTS = [
 204         {
 205             'url': 'https://nebula.tv/myshows',
 206             'playlist_mincount': 1,
 207             'info_dict': {
 208                 'id': 'myshows',
 209             },
 210         },
 211     ]
 212
 213     def _generate_playlist_entries(self):
 214         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 215         page_num = 1
 216         while next_url:
 217             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 218                                             note=f'Retrieving subscriptions page {page_num}')
 219             for episode in channel['results']:
 220                 yield self._build_video_info(episode)
 221             next_url = channel['next']
 222             page_num += 1
 223
 224     def _real_extract(self, url):
 225         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 226
 227
 228 class NebulaChannelIE(NebulaBaseIE):
 229     IE_NAME = 'nebula:channel'
 230     _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
 231     _TESTS = [
 232         {
 233             'url': 'https://nebula.tv/tom-scott-presents-money',
 234             'info_dict': {
 235                 'id': 'tom-scott-presents-money',
 236                 'title': 'Tom Scott Presents: Money',
 237                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 238             },
 239             'playlist_count': 5,
 240         }, {
 241             'url': 'https://nebula.tv/lindsayellis',
 242             'info_dict': {
 243                 'id': 'lindsayellis',
 244                 'title': 'Lindsay Ellis',
 245                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 246             },
 247             'playlist_mincount': 2,
 248         },
 249     ]
 250
 251     def _generate_playlist_entries(self, collection_id, channel):
 252         episodes = channel['episodes']['results']
 253         for page_num in itertools.count(2):
 254             for episode in episodes:
 255                 yield self._build_video_info(episode)
 256             next_url = channel['episodes']['next']
 257             if not next_url:
 258                 break
 259             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 260                                             note=f'Retrieving channel page {page_num}')
 261             episodes = channel['episodes']['results']
 262
 263     def _real_extract(self, url):
 264         collection_id = self._match_id(url)
 265         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 266         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 267         channel_details = channel['details']
 268
 269         return self.playlist_result(
 270             entries=self._generate_playlist_entries(collection_id, channel),
 271             playlist_id=collection_id,
 272             playlist_title=channel_details['title'],
 273             playlist_description=channel_details['description']
 274         )