yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import time
   4 import urllib.error
   5 import urllib.parse
   6
   7 from .common import InfoExtractor
   8 from ..utils import ExtractorError, parse_iso8601, try_get
   9
  10 _BASE_URL_RE = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app|nebula\.tv)'
  11
  12
  13 class NebulaBaseIE(InfoExtractor):
  14     _NETRC_MACHINE = 'watchnebula'
  15
  16     _nebula_api_token = None
  17     _nebula_bearer_token = None
  18     _zype_access_token = None
  19
  20     def _perform_nebula_auth(self, username, password):
  21         if not username or not password:
  22             self.raise_login_required()
  23
  24         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  25         response = self._download_json(
  26             'https://api.watchnebula.com/api/v1/auth/login/',
  27             data=data, fatal=False, video_id=None,
  28             headers={
  29                 'content-type': 'application/json',
  30                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  31                 'cookie': ''
  32             },
  33             note='Logging in to Nebula with supplied credentials',
  34             errnote='Authentication failed or rejected')
  35         if not response or not response.get('key'):
  36             self.raise_login_required()
  37
  38         # save nebula token as cookie
  39         self._set_cookie(
  40             'nebula.app', 'nebula-auth',
  41             urllib.parse.quote(
  42                 json.dumps({
  43                     "apiToken": response["key"],
  44                     "isLoggingIn": False,
  45                     "isLoggingOut": False,
  46                 }, separators=(",", ":"))),
  47             expire_time=int(time.time()) + 86400 * 365,
  48         )
  49
  50         return response['key']
  51
  52     def _retrieve_nebula_api_token(self, username=None, password=None):
  53         """
  54         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  55         can be found in the cookie jar.
  56         """
  57         nebula_cookies = self._get_cookies('https://nebula.app')
  58         nebula_cookie = nebula_cookies.get('nebula-auth')
  59         if nebula_cookie:
  60             self.to_screen('Authenticating to Nebula with token from cookie jar')
  61             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  62             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  63             if nebula_api_token:
  64                 return nebula_api_token
  65
  66         return self._perform_nebula_auth(username, password)
  67
  68     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  69         assert method in ('GET', 'POST',)
  70         assert auth_type in ('api', 'bearer',)
  71
  72         def inner_call():
  73             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  74             return self._download_json(
  75                 url, video_id, note=note, headers={'Authorization': authorization},
  76                 data=b'' if method == 'POST' else None)
  77
  78         try:
  79             return inner_call()
  80         except ExtractorError as exc:
  81             # if 401 or 403, attempt credential re-auth and retry
  82             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  83                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  84                 self._perform_login()
  85                 return inner_call()
  86             else:
  87                 raise
  88
  89     def _fetch_nebula_bearer_token(self):
  90         """
  91         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  92         """
  93         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  94                                          method='POST',
  95                                          note='Authorizing to Nebula')
  96         return response['token']
  97
  98     def _fetch_zype_access_token(self):
  99         """
 100         Get a Zype access token, which is required to access video streams -- in our case: to
 101         generate video URLs.
 102         """
 103         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 104
 105         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 106         if not access_token:
 107             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 108                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 109                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 110                 raise ExtractorError(
 111                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 112                     'Open an arbitrary video in a browser with this account to generate a token',
 113                     expected=True)
 114             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 115         return access_token
 116
 117     def _build_video_info(self, episode):
 118         zype_id = episode['zype_id']
 119         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 120         channel_slug = episode['channel_slug']
 121         return {
 122             'id': episode['zype_id'],
 123             'display_id': episode['slug'],
 124             '_type': 'url_transparent',
 125             'ie_key': 'Zype',
 126             'url': zype_video_url,
 127             'title': episode['title'],
 128             'description': episode['description'],
 129             'timestamp': parse_iso8601(episode['published_at']),
 130             'thumbnails': [{
 131                 # 'id': tn.get('name'),  # this appears to be null
 132                 'url': tn['original'],
 133                 'height': key,
 134             } for key, tn in episode['assets']['thumbnail'].items()],
 135             'duration': episode['duration'],
 136             'channel': episode['channel_title'],
 137             'channel_id': channel_slug,
 138             'channel_url': f'https://nebula.app/{channel_slug}',
 139             'uploader': episode['channel_title'],
 140             'uploader_id': channel_slug,
 141             'uploader_url': f'https://nebula.app/{channel_slug}',
 142             'series': episode['channel_title'],
 143             'creator': episode['channel_title'],
 144         }
 145
 146     def _perform_login(self, username=None, password=None):
 147         self._nebula_api_token = self._retrieve_nebula_api_token(username, password)
 148         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 149         self._zype_access_token = self._fetch_zype_access_token()
 150
 151
 152 class NebulaIE(NebulaBaseIE):
 153     _VALID_URL = rf'{_BASE_URL_RE}/videos/(?P<id>[-\w]+)'
 154     _TESTS = [
 155         {
 156             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 157             'md5': '14944cfee8c7beeea106320c47560efc',
 158             'info_dict': {
 159                 'id': '5c271b40b13fd613090034fd',
 160                 'ext': 'mp4',
 161                 'title': 'That Time Disney Remade Beauty and the Beast',
 162                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 163                 'upload_date': '20180731',
 164                 'timestamp': 1533009600,
 165                 'channel': 'Lindsay Ellis',
 166                 'channel_id': 'lindsayellis',
 167                 'uploader': 'Lindsay Ellis',
 168                 'uploader_id': 'lindsayellis',
 169                 'timestamp': 1533009600,
 170                 'uploader_url': 'https://nebula.app/lindsayellis',
 171                 'series': 'Lindsay Ellis',
 172                 'average_rating': int,
 173                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 174                 'channel_url': 'https://nebula.app/lindsayellis',
 175                 'creator': 'Lindsay Ellis',
 176                 'duration': 2212,
 177                 'view_count': int,
 178                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 179             },
 180         },
 181         {
 182             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 183             'md5': 'd05739cf6c38c09322422f696b569c23',
 184             'info_dict': {
 185                 'id': '5e7e78171aaf320001fbd6be',
 186                 'ext': 'mp4',
 187                 'title': 'Landing Craft - How The Allies Got Ashore',
 188                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 189                 'upload_date': '20200327',
 190                 'timestamp': 1585348140,
 191                 'channel': 'Real Engineering',
 192                 'channel_id': 'realengineering',
 193                 'uploader': 'Real Engineering',
 194                 'uploader_id': 'realengineering',
 195                 'view_count': int,
 196                 'series': 'Real Engineering',
 197                 'average_rating': int,
 198                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 199                 'creator': 'Real Engineering',
 200                 'duration': 841,
 201                 'channel_url': 'https://nebula.app/realengineering',
 202                 'uploader_url': 'https://nebula.app/realengineering',
 203                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 204             },
 205         },
 206         {
 207             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 208             'md5': 'ebe28a7ad822b9ee172387d860487868',
 209             'info_dict': {
 210                 'id': '5e779ebdd157bc0001d1c75a',
 211                 'ext': 'mp4',
 212                 'title': 'Episode 1: The Draw',
 213                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 214                 'upload_date': '20200323',
 215                 'timestamp': 1584980400,
 216                 'channel': 'Tom Scott Presents: Money',
 217                 'channel_id': 'tom-scott-presents-money',
 218                 'uploader': 'Tom Scott Presents: Money',
 219                 'uploader_id': 'tom-scott-presents-money',
 220                 'uploader_url': 'https://nebula.app/tom-scott-presents-money',
 221                 'duration': 825,
 222                 'channel_url': 'https://nebula.app/tom-scott-presents-money',
 223                 'view_count': int,
 224                 'series': 'Tom Scott Presents: Money',
 225                 'display_id': 'money-episode-1-the-draw',
 226                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 227                 'average_rating': int,
 228                 'creator': 'Tom Scott Presents: Money',
 229             },
 230         },
 231         {
 232             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 233             'only_matching': True,
 234         },
 235     ]
 236
 237     def _fetch_video_metadata(self, slug):
 238         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 239                                      video_id=slug,
 240                                      auth_type='bearer',
 241                                      note='Fetching video meta data')
 242
 243     def _real_extract(self, url):
 244         slug = self._match_id(url)
 245         video = self._fetch_video_metadata(slug)
 246         return self._build_video_info(video)
 247
 248
 249 class NebulaSubscriptionsIE(NebulaBaseIE):
 250     IE_NAME = 'nebula:subscriptions'
 251     _VALID_URL = rf'{_BASE_URL_RE}/myshows'
 252     _TESTS = [
 253         {
 254             'url': 'https://nebula.app/myshows',
 255             'playlist_mincount': 1,
 256             'info_dict': {
 257                 'id': 'myshows',
 258             },
 259         },
 260     ]
 261
 262     def _generate_playlist_entries(self):
 263         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 264         page_num = 1
 265         while next_url:
 266             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 267                                             note=f'Retrieving subscriptions page {page_num}')
 268             for episode in channel['results']:
 269                 yield self._build_video_info(episode)
 270             next_url = channel['next']
 271             page_num += 1
 272
 273     def _real_extract(self, url):
 274         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 275
 276
 277 class NebulaChannelIE(NebulaBaseIE):
 278     IE_NAME = 'nebula:channel'
 279     _VALID_URL = rf'{_BASE_URL_RE}/(?!myshows|videos/)(?P<id>[-\w]+)'
 280     _TESTS = [
 281         {
 282             'url': 'https://nebula.app/tom-scott-presents-money',
 283             'info_dict': {
 284                 'id': 'tom-scott-presents-money',
 285                 'title': 'Tom Scott Presents: Money',
 286                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 287             },
 288             'playlist_count': 5,
 289         }, {
 290             'url': 'https://nebula.app/lindsayellis',
 291             'info_dict': {
 292                 'id': 'lindsayellis',
 293                 'title': 'Lindsay Ellis',
 294                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 295             },
 296             'playlist_mincount': 100,
 297         },
 298     ]
 299
 300     def _generate_playlist_entries(self, collection_id, channel):
 301         episodes = channel['episodes']['results']
 302         for page_num in itertools.count(2):
 303             for episode in episodes:
 304                 yield self._build_video_info(episode)
 305             next_url = channel['episodes']['next']
 306             if not next_url:
 307                 break
 308             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 309                                             note=f'Retrieving channel page {page_num}')
 310             episodes = channel['episodes']['results']
 311
 312     def _real_extract(self, url):
 313         collection_id = self._match_id(url)
 314         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 315         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 316         channel_details = channel['details']
 317
 318         return self.playlist_result(
 319             entries=self._generate_playlist_entries(collection_id, channel),
 320             playlist_id=collection_id,
 321             playlist_title=channel_details['title'],
 322             playlist_description=channel_details['description']
 323         )