yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import time
   4 import urllib
   5
   6 from ..utils import (
   7     ExtractorError,
   8     parse_iso8601,
   9     try_get,
  10 )
  11 from .common import InfoExtractor
  12
  13
  14 class NebulaBaseIE(InfoExtractor):
  15     _NETRC_MACHINE = 'watchnebula'
  16
  17     _nebula_api_token = None
  18     _nebula_bearer_token = None
  19     _zype_access_token = None
  20
  21     def _perform_nebula_auth(self):
  22         username, password = self._get_login_info()
  23         if not (username and password):
  24             self.raise_login_required()
  25
  26         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  27         response = self._download_json(
  28             'https://api.watchnebula.com/api/v1/auth/login/',
  29             data=data, fatal=False, video_id=None,
  30             headers={
  31                 'content-type': 'application/json',
  32                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  33                 'cookie': ''
  34             },
  35             note='Logging in to Nebula with supplied credentials',
  36             errnote='Authentication failed or rejected')
  37         if not response or not response.get('key'):
  38             self.raise_login_required()
  39
  40         # save nebula token as cookie
  41         self._set_cookie(
  42             'nebula.app', 'nebula-auth',
  43             urllib.parse.quote(
  44                 json.dumps({
  45                     "apiToken": response["key"],
  46                     "isLoggingIn": False,
  47                     "isLoggingOut": False,
  48                 }, separators=(",", ":"))),
  49             expire_time=int(time.time()) + 86400 * 365,
  50         )
  51
  52         return response['key']
  53
  54     def _retrieve_nebula_api_token(self):
  55         """
  56         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  57         can be found in the cookie jar.
  58         """
  59         nebula_cookies = self._get_cookies('https://nebula.app')
  60         nebula_cookie = nebula_cookies.get('nebula-auth')
  61         if nebula_cookie:
  62             self.to_screen('Authenticating to Nebula with token from cookie jar')
  63             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  64             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  65             if nebula_api_token:
  66                 return nebula_api_token
  67
  68         return self._perform_nebula_auth()
  69
  70     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  71         assert method in ('GET', 'POST',)
  72         assert auth_type in ('api', 'bearer',)
  73
  74         def inner_call():
  75             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  76             return self._download_json(
  77                 url, video_id, note=note, headers={'Authorization': authorization},
  78                 data=b'' if method == 'POST' else None)
  79
  80         try:
  81             return inner_call()
  82         except ExtractorError as exc:
  83             # if 401 or 403, attempt credential re-auth and retry
  84             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  85                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  86                 self._perform_login()
  87                 return inner_call()
  88             else:
  89                 raise
  90
  91     def _fetch_nebula_bearer_token(self):
  92         """
  93         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  94         """
  95         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  96                                          method='POST',
  97                                          note='Authorizing to Nebula')
  98         return response['token']
  99
 100     def _fetch_zype_access_token(self):
 101         """
 102         Get a Zype access token, which is required to access video streams -- in our case: to
 103         generate video URLs.
 104         """
 105         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 106
 107         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 108         if not access_token:
 109             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 110                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 111                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 112                 raise ExtractorError(
 113                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 114                     'Open an arbitrary video in a browser with this account to generate a token',
 115                     expected=True)
 116             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 117         return access_token
 118
 119     def _build_video_info(self, episode):
 120         zype_id = episode['zype_id']
 121         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 122         channel_slug = episode['channel_slug']
 123         return {
 124             'id': episode['zype_id'],
 125             'display_id': episode['slug'],
 126             '_type': 'url_transparent',
 127             'ie_key': 'Zype',
 128             'url': zype_video_url,
 129             'title': episode['title'],
 130             'description': episode['description'],
 131             'timestamp': parse_iso8601(episode['published_at']),
 132             'thumbnails': [{
 133                 # 'id': tn.get('name'),  # this appears to be null
 134                 'url': tn['original'],
 135                 'height': key,
 136             } for key, tn in episode['assets']['thumbnail'].items()],
 137             'duration': episode['duration'],
 138             'channel': episode['channel_title'],
 139             'channel_id': channel_slug,
 140             'channel_url': f'https://nebula.app/{channel_slug}',
 141             'uploader': episode['channel_title'],
 142             'uploader_id': channel_slug,
 143             'uploader_url': f'https://nebula.app/{channel_slug}',
 144             'series': episode['channel_title'],
 145             'creator': episode['channel_title'],
 146         }
 147
 148     def _perform_login(self, username=None, password=None):
 149         # FIXME: username should be passed from here to inner functions
 150         self._nebula_api_token = self._retrieve_nebula_api_token()
 151         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 152         self._zype_access_token = self._fetch_zype_access_token()
 153
 154
 155 class NebulaIE(NebulaBaseIE):
 156     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
 157     _TESTS = [
 158         {
 159             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 160             'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
 161             'info_dict': {
 162                 'id': '5c271b40b13fd613090034fd',
 163                 'ext': 'mp4',
 164                 'title': 'That Time Disney Remade Beauty and the Beast',
 165                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 166                 'upload_date': '20180731',
 167                 'timestamp': 1533009600,
 168                 'channel': 'Lindsay Ellis',
 169                 'channel_id': 'lindsayellis',
 170                 'uploader': 'Lindsay Ellis',
 171                 'uploader_id': 'lindsayellis',
 172             },
 173             'params': {
 174                 'usenetrc': True,
 175             },
 176         },
 177         {
 178             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 179             'md5': '6d4edd14ce65720fa63aba5c583fb328',
 180             'info_dict': {
 181                 'id': '5e7e78171aaf320001fbd6be',
 182                 'ext': 'mp4',
 183                 'title': 'Landing Craft - How The Allies Got Ashore',
 184                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 185                 'upload_date': '20200327',
 186                 'timestamp': 1585348140,
 187                 'channel': 'Real Engineering',
 188                 'channel_id': 'realengineering',
 189                 'uploader': 'Real Engineering',
 190                 'uploader_id': 'realengineering',
 191             },
 192             'params': {
 193                 'usenetrc': True,
 194             },
 195         },
 196         {
 197             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 198             'md5': '8c7d272910eea320f6f8e6d3084eecf5',
 199             'info_dict': {
 200                 'id': '5e779ebdd157bc0001d1c75a',
 201                 'ext': 'mp4',
 202                 'title': 'Episode 1: The Draw',
 203                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 204                 'upload_date': '20200323',
 205                 'timestamp': 1584980400,
 206                 'channel': 'Tom Scott Presents: Money',
 207                 'channel_id': 'tom-scott-presents-money',
 208                 'uploader': 'Tom Scott Presents: Money',
 209                 'uploader_id': 'tom-scott-presents-money',
 210             },
 211             'params': {
 212                 'usenetrc': True,
 213             },
 214         },
 215         {
 216             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 217             'only_matching': True,
 218         },
 219     ]
 220
 221     def _fetch_video_metadata(self, slug):
 222         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 223                                      video_id=slug,
 224                                      auth_type='bearer',
 225                                      note='Fetching video meta data')
 226
 227     def _real_extract(self, url):
 228         slug = self._match_id(url)
 229         video = self._fetch_video_metadata(slug)
 230         return self._build_video_info(video)
 231
 232
 233 class NebulaCollectionIE(NebulaBaseIE):
 234     IE_NAME = 'nebula:collection'
 235     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
 236     _TESTS = [
 237         {
 238             'url': 'https://nebula.app/tom-scott-presents-money',
 239             'info_dict': {
 240                 'id': 'tom-scott-presents-money',
 241                 'title': 'Tom Scott Presents: Money',
 242                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 243             },
 244             'playlist_count': 5,
 245             'params': {
 246                 'usenetrc': True,
 247             },
 248         }, {
 249             'url': 'https://nebula.app/lindsayellis',
 250             'info_dict': {
 251                 'id': 'lindsayellis',
 252                 'title': 'Lindsay Ellis',
 253                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 254             },
 255             'playlist_mincount': 100,
 256             'params': {
 257                 'usenetrc': True,
 258             },
 259         },
 260     ]
 261
 262     def _generate_playlist_entries(self, collection_id, channel):
 263         episodes = channel['episodes']['results']
 264         for page_num in itertools.count(2):
 265             for episode in episodes:
 266                 yield self._build_video_info(episode)
 267             next_url = channel['episodes']['next']
 268             if not next_url:
 269                 break
 270             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 271                                             note=f'Retrieving channel page {page_num}')
 272             episodes = channel['episodes']['results']
 273
 274     def _real_extract(self, url):
 275         collection_id = self._match_id(url)
 276         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 277         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 278         channel_details = channel['details']
 279
 280         return self.playlist_result(
 281             entries=self._generate_playlist_entries(collection_id, channel),
 282             playlist_id=collection_id,
 283             playlist_title=channel_details['title'],
 284             playlist_description=channel_details['description']
 285         )