yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3
   4 from .common import InfoExtractor
   5 from ..networking.exceptions import HTTPError
   6 from ..utils import ExtractorError, make_archive_id, parse_iso8601, remove_start
   7
   8 _BASE_URL_RE = r'https?://(?:www\.|beta\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
   9
  10
  11 class NebulaBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'watchnebula'
  13
  14     _nebula_api_token = None
  15     _nebula_bearer_token = None
  16
  17     def _perform_nebula_auth(self, username, password):
  18         if not username or not password:
  19             self.raise_login_required(method='password')
  20
  21         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  22         response = self._download_json(
  23             'https://api.watchnebula.com/api/v1/auth/login/',
  24             data=data, fatal=False, video_id=None,
  25             headers={
  26                 'content-type': 'application/json',
  27                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  28                 'cookie': ''
  29             },
  30             note='Logging in to Nebula with supplied credentials',
  31             errnote='Authentication failed or rejected')
  32         if not response or not response.get('key'):
  33             self.raise_login_required(method='password')
  34
  35         return response['key']
  36
  37     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  38         assert method in ('GET', 'POST',)
  39         assert auth_type in ('api', 'bearer',)
  40
  41         def inner_call():
  42             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  43             return self._download_json(
  44                 url, video_id, note=note, headers={'Authorization': authorization},
  45                 data=b'' if method == 'POST' else None)
  46
  47         try:
  48             return inner_call()
  49         except ExtractorError as exc:
  50             # if 401 or 403, attempt credential re-auth and retry
  51             if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.status in (401, 403):
  52                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  53                 self._perform_login()
  54                 return inner_call()
  55             else:
  56                 raise
  57
  58     def _fetch_nebula_bearer_token(self):
  59         """
  60         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  61         """
  62         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  63                                          method='POST',
  64                                          note='Authorizing to Nebula')
  65         return response['token']
  66
  67     def _fetch_video_formats(self, slug):
  68         stream_info = self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/stream/',
  69                                             video_id=slug,
  70                                             auth_type='bearer',
  71                                             note='Fetching video stream info')
  72         manifest_url = stream_info['manifest']
  73         return self._extract_m3u8_formats_and_subtitles(manifest_url, slug, 'mp4')
  74
  75     def _build_video_info(self, episode):
  76         fmts, subs = self._fetch_video_formats(episode['slug'])
  77         channel_slug = episode['channel_slug']
  78         channel_title = episode['channel_title']
  79         zype_id = episode.get('zype_id')
  80         return {
  81             'id': remove_start(episode['id'], 'video_episode:'),
  82             'display_id': episode['slug'],
  83             'formats': fmts,
  84             'subtitles': subs,
  85             'webpage_url': f'https://nebula.tv/{episode["slug"]}',
  86             'title': episode['title'],
  87             'description': episode['description'],
  88             'timestamp': parse_iso8601(episode['published_at']),
  89             'thumbnails': [{
  90                 # 'id': tn.get('name'),  # this appears to be null
  91                 'url': tn['original'],
  92                 'height': key,
  93             } for key, tn in episode['assets']['thumbnail'].items()],
  94             'duration': episode['duration'],
  95             'channel': channel_title,
  96             'channel_id': channel_slug,
  97             'channel_url': f'https://nebula.tv/{channel_slug}',
  98             'uploader': channel_title,
  99             'uploader_id': channel_slug,
 100             'uploader_url': f'https://nebula.tv/{channel_slug}',
 101             'series': channel_title,
 102             'creator': channel_title,
 103             'extractor_key': NebulaIE.ie_key(),
 104             'extractor': NebulaIE.IE_NAME,
 105             '_old_archive_ids': [make_archive_id(NebulaIE, zype_id)] if zype_id else None,
 106         }
 107
 108     def _perform_login(self, username=None, password=None):
 109         self._nebula_api_token = self._perform_nebula_auth(username, password)
 110         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 111
 112
 113 class NebulaIE(NebulaBaseIE):
 114     _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
 115     _TESTS = [
 116         {
 117             'url': 'https://nebula.tv/videos/that-time-disney-remade-beauty-and-the-beast',
 118             'md5': '14944cfee8c7beeea106320c47560efc',
 119             'info_dict': {
 120                 'id': '84ed544d-4afd-4723-8cd5-2b95261f0abf',
 121                 'ext': 'mp4',
 122                 'title': 'That Time Disney Remade Beauty and the Beast',
 123                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 124                 'upload_date': '20180731',
 125                 'timestamp': 1533009600,
 126                 'channel': 'Lindsay Ellis',
 127                 'channel_id': 'lindsayellis',
 128                 'uploader': 'Lindsay Ellis',
 129                 'uploader_id': 'lindsayellis',
 130                 'uploader_url': 'https://nebula.tv/lindsayellis',
 131                 'series': 'Lindsay Ellis',
 132                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 133                 'channel_url': 'https://nebula.tv/lindsayellis',
 134                 'creator': 'Lindsay Ellis',
 135                 'duration': 2212,
 136                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 137             },
 138         },
 139         {
 140             'url': 'https://nebula.tv/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 141             'md5': 'd05739cf6c38c09322422f696b569c23',
 142             'info_dict': {
 143                 'id': '7e623145-1b44-4ca3-aa0b-ed25a247ea34',
 144                 'ext': 'mp4',
 145                 'title': 'Landing Craft - How The Allies Got Ashore',
 146                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 147                 'upload_date': '20200327',
 148                 'timestamp': 1585348140,
 149                 'channel': 'Real Engineering — The Logistics of D-Day',
 150                 'channel_id': 'd-day',
 151                 'uploader': 'Real Engineering — The Logistics of D-Day',
 152                 'uploader_id': 'd-day',
 153                 'series': 'Real Engineering — The Logistics of D-Day',
 154                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 155                 'creator': 'Real Engineering — The Logistics of D-Day',
 156                 'duration': 841,
 157                 'channel_url': 'https://nebula.tv/d-day',
 158                 'uploader_url': 'https://nebula.tv/d-day',
 159                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 160             },
 161         },
 162         {
 163             'url': 'https://nebula.tv/videos/money-episode-1-the-draw',
 164             'md5': 'ebe28a7ad822b9ee172387d860487868',
 165             'info_dict': {
 166                 'id': 'b96c5714-9e2b-4ec3-b3f1-20f6e89cc553',
 167                 'ext': 'mp4',
 168                 'title': 'Episode 1: The Draw',
 169                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 170                 'upload_date': '20200323',
 171                 'timestamp': 1584980400,
 172                 'channel': 'Tom Scott Presents: Money',
 173                 'channel_id': 'tom-scott-presents-money',
 174                 'uploader': 'Tom Scott Presents: Money',
 175                 'uploader_id': 'tom-scott-presents-money',
 176                 'uploader_url': 'https://nebula.tv/tom-scott-presents-money',
 177                 'duration': 825,
 178                 'channel_url': 'https://nebula.tv/tom-scott-presents-money',
 179                 'series': 'Tom Scott Presents: Money',
 180                 'display_id': 'money-episode-1-the-draw',
 181                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 182                 'creator': 'Tom Scott Presents: Money',
 183             },
 184         },
 185         {
 186             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 187             'only_matching': True,
 188         },
 189         {
 190             'url': 'https://beta.nebula.tv/videos/money-episode-1-the-draw',
 191             'only_matching': True,
 192         },
 193     ]
 194
 195     def _fetch_video_metadata(self, slug):
 196         return self._call_nebula_api(f'https://content.api.nebula.app/video/{slug}/',
 197                                      video_id=slug,
 198                                      auth_type='bearer',
 199                                      note='Fetching video meta data')
 200
 201     def _real_extract(self, url):
 202         slug = self._match_id(url)
 203         video = self._fetch_video_metadata(slug)
 204         return self._build_video_info(video)
 205
 206
 207 class NebulaSubscriptionsIE(NebulaBaseIE):
 208     IE_NAME = 'nebula:subscriptions'
 209     _VALID_URL = rf'{_BASE_URL_RE}/myshows'
 210     _TESTS = [
 211         {
 212             'url': 'https://nebula.tv/myshows',
 213             'playlist_mincount': 1,
 214             'info_dict': {
 215                 'id': 'myshows',
 216             },
 217         },
 218     ]
 219
 220     def _generate_playlist_entries(self):
 221         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 222         page_num = 1
 223         while next_url:
 224             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 225                                             note=f'Retrieving subscriptions page {page_num}')
 226             for episode in channel['results']:
 227                 yield self._build_video_info(episode)
 228             next_url = channel['next']
 229             page_num += 1
 230
 231     def _real_extract(self, url):
 232         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 233
 234
 235 class NebulaChannelIE(NebulaBaseIE):
 236     IE_NAME = 'nebula:channel'
 237     _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
 238     _TESTS = [
 239         {
 240             'url': 'https://nebula.tv/tom-scott-presents-money',
 241             'info_dict': {
 242                 'id': 'tom-scott-presents-money',
 243                 'title': 'Tom Scott Presents: Money',
 244                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 245             },
 246             'playlist_count': 5,
 247         }, {
 248             'url': 'https://nebula.tv/lindsayellis',
 249             'info_dict': {
 250                 'id': 'lindsayellis',
 251                 'title': 'Lindsay Ellis',
 252                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 253             },
 254             'playlist_mincount': 2,
 255         },
 256     ]
 257
 258     def _generate_playlist_entries(self, collection_id, channel):
 259         episodes = channel['episodes']['results']
 260         for page_num in itertools.count(2):
 261             for episode in episodes:
 262                 yield self._build_video_info(episode)
 263             next_url = channel['episodes']['next']
 264             if not next_url:
 265                 break
 266             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 267                                             note=f'Retrieving channel page {page_num}')
 268             episodes = channel['episodes']['results']
 269
 270     def _real_extract(self, url):
 271         collection_id = self._match_id(url)
 272         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 273         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 274         channel_details = channel['details']
 275
 276         return self.playlist_result(
 277             entries=self._generate_playlist_entries(collection_id, channel),
 278             playlist_id=collection_id,
 279             playlist_title=channel_details['title'],
 280             playlist_description=channel_details['description']
 281         )