yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import time
   4 import urllib
   5
   6 from ..utils import (
   7     ExtractorError,
   8     parse_iso8601,
   9     try_get,
  10 )
  11 from .common import InfoExtractor
  12
  13
  14 class NebulaBaseIE(InfoExtractor):
  15     _NETRC_MACHINE = 'watchnebula'
  16
  17     _nebula_api_token = None
  18     _nebula_bearer_token = None
  19     _zype_access_token = None
  20
  21     def _perform_nebula_auth(self, username, password):
  22         if not username or not password:
  23             self.raise_login_required()
  24
  25         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  26         response = self._download_json(
  27             'https://api.watchnebula.com/api/v1/auth/login/',
  28             data=data, fatal=False, video_id=None,
  29             headers={
  30                 'content-type': 'application/json',
  31                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  32                 'cookie': ''
  33             },
  34             note='Logging in to Nebula with supplied credentials',
  35             errnote='Authentication failed or rejected')
  36         if not response or not response.get('key'):
  37             self.raise_login_required()
  38
  39         # save nebula token as cookie
  40         self._set_cookie(
  41             'nebula.app', 'nebula-auth',
  42             urllib.parse.quote(
  43                 json.dumps({
  44                     "apiToken": response["key"],
  45                     "isLoggingIn": False,
  46                     "isLoggingOut": False,
  47                 }, separators=(",", ":"))),
  48             expire_time=int(time.time()) + 86400 * 365,
  49         )
  50
  51         return response['key']
  52
  53     def _retrieve_nebula_api_token(self, username=None, password=None):
  54         """
  55         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  56         can be found in the cookie jar.
  57         """
  58         nebula_cookies = self._get_cookies('https://nebula.app')
  59         nebula_cookie = nebula_cookies.get('nebula-auth')
  60         if nebula_cookie:
  61             self.to_screen('Authenticating to Nebula with token from cookie jar')
  62             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  63             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  64             if nebula_api_token:
  65                 return nebula_api_token
  66
  67         return self._perform_nebula_auth(username, password)
  68
  69     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  70         assert method in ('GET', 'POST',)
  71         assert auth_type in ('api', 'bearer',)
  72
  73         def inner_call():
  74             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  75             return self._download_json(
  76                 url, video_id, note=note, headers={'Authorization': authorization},
  77                 data=b'' if method == 'POST' else None)
  78
  79         try:
  80             return inner_call()
  81         except ExtractorError as exc:
  82             # if 401 or 403, attempt credential re-auth and retry
  83             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  84                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  85                 self._perform_login()
  86                 return inner_call()
  87             else:
  88                 raise
  89
  90     def _fetch_nebula_bearer_token(self):
  91         """
  92         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  93         """
  94         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  95                                          method='POST',
  96                                          note='Authorizing to Nebula')
  97         return response['token']
  98
  99     def _fetch_zype_access_token(self):
 100         """
 101         Get a Zype access token, which is required to access video streams -- in our case: to
 102         generate video URLs.
 103         """
 104         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 105
 106         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 107         if not access_token:
 108             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 109                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 110                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 111                 raise ExtractorError(
 112                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 113                     'Open an arbitrary video in a browser with this account to generate a token',
 114                     expected=True)
 115             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 116         return access_token
 117
 118     def _build_video_info(self, episode):
 119         zype_id = episode['zype_id']
 120         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 121         channel_slug = episode['channel_slug']
 122         return {
 123             'id': episode['zype_id'],
 124             'display_id': episode['slug'],
 125             '_type': 'url_transparent',
 126             'ie_key': 'Zype',
 127             'url': zype_video_url,
 128             'title': episode['title'],
 129             'description': episode['description'],
 130             'timestamp': parse_iso8601(episode['published_at']),
 131             'thumbnails': [{
 132                 # 'id': tn.get('name'),  # this appears to be null
 133                 'url': tn['original'],
 134                 'height': key,
 135             } for key, tn in episode['assets']['thumbnail'].items()],
 136             'duration': episode['duration'],
 137             'channel': episode['channel_title'],
 138             'channel_id': channel_slug,
 139             'channel_url': f'https://nebula.app/{channel_slug}',
 140             'uploader': episode['channel_title'],
 141             'uploader_id': channel_slug,
 142             'uploader_url': f'https://nebula.app/{channel_slug}',
 143             'series': episode['channel_title'],
 144             'creator': episode['channel_title'],
 145         }
 146
 147     def _perform_login(self, username=None, password=None):
 148         self._nebula_api_token = self._retrieve_nebula_api_token(username, password)
 149         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 150         self._zype_access_token = self._fetch_zype_access_token()
 151
 152
 153 class NebulaIE(NebulaBaseIE):
 154     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
 155     _TESTS = [
 156         {
 157             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 158             'md5': '14944cfee8c7beeea106320c47560efc',
 159             'info_dict': {
 160                 'id': '5c271b40b13fd613090034fd',
 161                 'ext': 'mp4',
 162                 'title': 'That Time Disney Remade Beauty and the Beast',
 163                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 164                 'upload_date': '20180731',
 165                 'timestamp': 1533009600,
 166                 'channel': 'Lindsay Ellis',
 167                 'channel_id': 'lindsayellis',
 168                 'uploader': 'Lindsay Ellis',
 169                 'uploader_id': 'lindsayellis',
 170                 'timestamp': 1533009600,
 171                 'uploader_url': 'https://nebula.app/lindsayellis',
 172                 'series': 'Lindsay Ellis',
 173                 'average_rating': int,
 174                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 175                 'channel_url': 'https://nebula.app/lindsayellis',
 176                 'creator': 'Lindsay Ellis',
 177                 'duration': 2212,
 178                 'view_count': int,
 179                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 180             },
 181         },
 182         {
 183             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 184             'md5': 'd05739cf6c38c09322422f696b569c23',
 185             'info_dict': {
 186                 'id': '5e7e78171aaf320001fbd6be',
 187                 'ext': 'mp4',
 188                 'title': 'Landing Craft - How The Allies Got Ashore',
 189                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 190                 'upload_date': '20200327',
 191                 'timestamp': 1585348140,
 192                 'channel': 'Real Engineering',
 193                 'channel_id': 'realengineering',
 194                 'uploader': 'Real Engineering',
 195                 'uploader_id': 'realengineering',
 196                 'view_count': int,
 197                 'series': 'Real Engineering',
 198                 'average_rating': int,
 199                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 200                 'creator': 'Real Engineering',
 201                 'duration': 841,
 202                 'channel_url': 'https://nebula.app/realengineering',
 203                 'uploader_url': 'https://nebula.app/realengineering',
 204                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 205             },
 206         },
 207         {
 208             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 209             'md5': 'ebe28a7ad822b9ee172387d860487868',
 210             'info_dict': {
 211                 'id': '5e779ebdd157bc0001d1c75a',
 212                 'ext': 'mp4',
 213                 'title': 'Episode 1: The Draw',
 214                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 215                 'upload_date': '20200323',
 216                 'timestamp': 1584980400,
 217                 'channel': 'Tom Scott Presents: Money',
 218                 'channel_id': 'tom-scott-presents-money',
 219                 'uploader': 'Tom Scott Presents: Money',
 220                 'uploader_id': 'tom-scott-presents-money',
 221                 'uploader_url': 'https://nebula.app/tom-scott-presents-money',
 222                 'duration': 825,
 223                 'channel_url': 'https://nebula.app/tom-scott-presents-money',
 224                 'view_count': int,
 225                 'series': 'Tom Scott Presents: Money',
 226                 'display_id': 'money-episode-1-the-draw',
 227                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 228                 'average_rating': int,
 229                 'creator': 'Tom Scott Presents: Money',
 230             },
 231         },
 232         {
 233             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 234             'only_matching': True,
 235         },
 236     ]
 237
 238     def _fetch_video_metadata(self, slug):
 239         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 240                                      video_id=slug,
 241                                      auth_type='bearer',
 242                                      note='Fetching video meta data')
 243
 244     def _real_extract(self, url):
 245         slug = self._match_id(url)
 246         video = self._fetch_video_metadata(slug)
 247         return self._build_video_info(video)
 248
 249
 250 class NebulaSubscriptionsIE(NebulaBaseIE):
 251     IE_NAME = 'nebula:subscriptions'
 252     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows'
 253     _TESTS = [
 254         {
 255             'url': 'https://nebula.app/myshows',
 256             'playlist_mincount': 1,
 257             'info_dict': {
 258                 'id': 'myshows',
 259             },
 260         },
 261     ]
 262
 263     def _generate_playlist_entries(self):
 264         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 265         page_num = 1
 266         while next_url:
 267             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 268                                             note=f'Retrieving subscriptions page {page_num}')
 269             for episode in channel['results']:
 270                 yield self._build_video_info(episode)
 271             next_url = channel['next']
 272             page_num += 1
 273
 274     def _real_extract(self, url):
 275         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 276
 277
 278 class NebulaChannelIE(NebulaBaseIE):
 279     IE_NAME = 'nebula:channel'
 280     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)'
 281     _TESTS = [
 282         {
 283             'url': 'https://nebula.app/tom-scott-presents-money',
 284             'info_dict': {
 285                 'id': 'tom-scott-presents-money',
 286                 'title': 'Tom Scott Presents: Money',
 287                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 288             },
 289             'playlist_count': 5,
 290         }, {
 291             'url': 'https://nebula.app/lindsayellis',
 292             'info_dict': {
 293                 'id': 'lindsayellis',
 294                 'title': 'Lindsay Ellis',
 295                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 296             },
 297             'playlist_mincount': 100,
 298         },
 299     ]
 300
 301     def _generate_playlist_entries(self, collection_id, channel):
 302         episodes = channel['episodes']['results']
 303         for page_num in itertools.count(2):
 304             for episode in episodes:
 305                 yield self._build_video_info(episode)
 306             next_url = channel['episodes']['next']
 307             if not next_url:
 308                 break
 309             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 310                                             note=f'Retrieving channel page {page_num}')
 311             episodes = channel['episodes']['results']
 312
 313     def _real_extract(self, url):
 314         collection_id = self._match_id(url)
 315         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 316         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 317         channel_details = channel['details']
 318
 319         return self.playlist_result(
 320             entries=self._generate_playlist_entries(collection_id, channel),
 321             playlist_id=collection_id,
 322             playlist_title=channel_details['title'],
 323             playlist_description=channel_details['description']
 324         )