yt_dlp/extractor/nebula.py

   1 import itertools
   2 import json
   3 import time
   4 import urllib.error
   5 import urllib.parse
   6
   7 from .common import InfoExtractor
   8 from ..utils import ExtractorError, parse_iso8601, try_get
   9
  10
  11 class NebulaBaseIE(InfoExtractor):
  12     _NETRC_MACHINE = 'watchnebula'
  13
  14     _nebula_api_token = None
  15     _nebula_bearer_token = None
  16     _zype_access_token = None
  17
  18     def _perform_nebula_auth(self, username, password):
  19         if not username or not password:
  20             self.raise_login_required()
  21
  22         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  23         response = self._download_json(
  24             'https://api.watchnebula.com/api/v1/auth/login/',
  25             data=data, fatal=False, video_id=None,
  26             headers={
  27                 'content-type': 'application/json',
  28                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  29                 'cookie': ''
  30             },
  31             note='Logging in to Nebula with supplied credentials',
  32             errnote='Authentication failed or rejected')
  33         if not response or not response.get('key'):
  34             self.raise_login_required()
  35
  36         # save nebula token as cookie
  37         self._set_cookie(
  38             'nebula.app', 'nebula-auth',
  39             urllib.parse.quote(
  40                 json.dumps({
  41                     "apiToken": response["key"],
  42                     "isLoggingIn": False,
  43                     "isLoggingOut": False,
  44                 }, separators=(",", ":"))),
  45             expire_time=int(time.time()) + 86400 * 365,
  46         )
  47
  48         return response['key']
  49
  50     def _retrieve_nebula_api_token(self, username=None, password=None):
  51         """
  52         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  53         can be found in the cookie jar.
  54         """
  55         nebula_cookies = self._get_cookies('https://nebula.app')
  56         nebula_cookie = nebula_cookies.get('nebula-auth')
  57         if nebula_cookie:
  58             self.to_screen('Authenticating to Nebula with token from cookie jar')
  59             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  60             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  61             if nebula_api_token:
  62                 return nebula_api_token
  63
  64         return self._perform_nebula_auth(username, password)
  65
  66     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  67         assert method in ('GET', 'POST',)
  68         assert auth_type in ('api', 'bearer',)
  69
  70         def inner_call():
  71             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  72             return self._download_json(
  73                 url, video_id, note=note, headers={'Authorization': authorization},
  74                 data=b'' if method == 'POST' else None)
  75
  76         try:
  77             return inner_call()
  78         except ExtractorError as exc:
  79             # if 401 or 403, attempt credential re-auth and retry
  80             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  81                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  82                 self._perform_login()
  83                 return inner_call()
  84             else:
  85                 raise
  86
  87     def _fetch_nebula_bearer_token(self):
  88         """
  89         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  90         """
  91         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  92                                          method='POST',
  93                                          note='Authorizing to Nebula')
  94         return response['token']
  95
  96     def _fetch_zype_access_token(self):
  97         """
  98         Get a Zype access token, which is required to access video streams -- in our case: to
  99         generate video URLs.
 100         """
 101         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 102
 103         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 104         if not access_token:
 105             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 106                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 107                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 108                 raise ExtractorError(
 109                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 110                     'Open an arbitrary video in a browser with this account to generate a token',
 111                     expected=True)
 112             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 113         return access_token
 114
 115     def _build_video_info(self, episode):
 116         zype_id = episode['zype_id']
 117         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 118         channel_slug = episode['channel_slug']
 119         return {
 120             'id': episode['zype_id'],
 121             'display_id': episode['slug'],
 122             '_type': 'url_transparent',
 123             'ie_key': 'Zype',
 124             'url': zype_video_url,
 125             'title': episode['title'],
 126             'description': episode['description'],
 127             'timestamp': parse_iso8601(episode['published_at']),
 128             'thumbnails': [{
 129                 # 'id': tn.get('name'),  # this appears to be null
 130                 'url': tn['original'],
 131                 'height': key,
 132             } for key, tn in episode['assets']['thumbnail'].items()],
 133             'duration': episode['duration'],
 134             'channel': episode['channel_title'],
 135             'channel_id': channel_slug,
 136             'channel_url': f'https://nebula.app/{channel_slug}',
 137             'uploader': episode['channel_title'],
 138             'uploader_id': channel_slug,
 139             'uploader_url': f'https://nebula.app/{channel_slug}',
 140             'series': episode['channel_title'],
 141             'creator': episode['channel_title'],
 142         }
 143
 144     def _perform_login(self, username=None, password=None):
 145         self._nebula_api_token = self._retrieve_nebula_api_token(username, password)
 146         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 147         self._zype_access_token = self._fetch_zype_access_token()
 148
 149
 150 class NebulaIE(NebulaBaseIE):
 151     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
 152     _TESTS = [
 153         {
 154             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 155             'md5': '14944cfee8c7beeea106320c47560efc',
 156             'info_dict': {
 157                 'id': '5c271b40b13fd613090034fd',
 158                 'ext': 'mp4',
 159                 'title': 'That Time Disney Remade Beauty and the Beast',
 160                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 161                 'upload_date': '20180731',
 162                 'timestamp': 1533009600,
 163                 'channel': 'Lindsay Ellis',
 164                 'channel_id': 'lindsayellis',
 165                 'uploader': 'Lindsay Ellis',
 166                 'uploader_id': 'lindsayellis',
 167                 'timestamp': 1533009600,
 168                 'uploader_url': 'https://nebula.app/lindsayellis',
 169                 'series': 'Lindsay Ellis',
 170                 'average_rating': int,
 171                 'display_id': 'that-time-disney-remade-beauty-and-the-beast',
 172                 'channel_url': 'https://nebula.app/lindsayellis',
 173                 'creator': 'Lindsay Ellis',
 174                 'duration': 2212,
 175                 'view_count': int,
 176                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 177             },
 178         },
 179         {
 180             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 181             'md5': 'd05739cf6c38c09322422f696b569c23',
 182             'info_dict': {
 183                 'id': '5e7e78171aaf320001fbd6be',
 184                 'ext': 'mp4',
 185                 'title': 'Landing Craft - How The Allies Got Ashore',
 186                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 187                 'upload_date': '20200327',
 188                 'timestamp': 1585348140,
 189                 'channel': 'Real Engineering',
 190                 'channel_id': 'realengineering',
 191                 'uploader': 'Real Engineering',
 192                 'uploader_id': 'realengineering',
 193                 'view_count': int,
 194                 'series': 'Real Engineering',
 195                 'average_rating': int,
 196                 'display_id': 'the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 197                 'creator': 'Real Engineering',
 198                 'duration': 841,
 199                 'channel_url': 'https://nebula.app/realengineering',
 200                 'uploader_url': 'https://nebula.app/realengineering',
 201                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 202             },
 203         },
 204         {
 205             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 206             'md5': 'ebe28a7ad822b9ee172387d860487868',
 207             'info_dict': {
 208                 'id': '5e779ebdd157bc0001d1c75a',
 209                 'ext': 'mp4',
 210                 'title': 'Episode 1: The Draw',
 211                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 212                 'upload_date': '20200323',
 213                 'timestamp': 1584980400,
 214                 'channel': 'Tom Scott Presents: Money',
 215                 'channel_id': 'tom-scott-presents-money',
 216                 'uploader': 'Tom Scott Presents: Money',
 217                 'uploader_id': 'tom-scott-presents-money',
 218                 'uploader_url': 'https://nebula.app/tom-scott-presents-money',
 219                 'duration': 825,
 220                 'channel_url': 'https://nebula.app/tom-scott-presents-money',
 221                 'view_count': int,
 222                 'series': 'Tom Scott Presents: Money',
 223                 'display_id': 'money-episode-1-the-draw',
 224                 'thumbnail': r're:https://\w+\.cloudfront\.net/[\w-]+\.jpeg?.*',
 225                 'average_rating': int,
 226                 'creator': 'Tom Scott Presents: Money',
 227             },
 228         },
 229         {
 230             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 231             'only_matching': True,
 232         },
 233     ]
 234
 235     def _fetch_video_metadata(self, slug):
 236         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 237                                      video_id=slug,
 238                                      auth_type='bearer',
 239                                      note='Fetching video meta data')
 240
 241     def _real_extract(self, url):
 242         slug = self._match_id(url)
 243         video = self._fetch_video_metadata(slug)
 244         return self._build_video_info(video)
 245
 246
 247 class NebulaSubscriptionsIE(NebulaBaseIE):
 248     IE_NAME = 'nebula:subscriptions'
 249     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/myshows'
 250     _TESTS = [
 251         {
 252             'url': 'https://nebula.app/myshows',
 253             'playlist_mincount': 1,
 254             'info_dict': {
 255                 'id': 'myshows',
 256             },
 257         },
 258     ]
 259
 260     def _generate_playlist_entries(self):
 261         next_url = 'https://content.watchnebula.com/library/video/?page_size=100'
 262         page_num = 1
 263         while next_url:
 264             channel = self._call_nebula_api(next_url, 'myshows', auth_type='bearer',
 265                                             note=f'Retrieving subscriptions page {page_num}')
 266             for episode in channel['results']:
 267                 yield self._build_video_info(episode)
 268             next_url = channel['next']
 269             page_num += 1
 270
 271     def _real_extract(self, url):
 272         return self.playlist_result(self._generate_playlist_entries(), 'myshows')
 273
 274
 275 class NebulaChannelIE(NebulaBaseIE):
 276     IE_NAME = 'nebula:channel'
 277     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!myshows|videos/)(?P<id>[-\w]+)'
 278     _TESTS = [
 279         {
 280             'url': 'https://nebula.app/tom-scott-presents-money',
 281             'info_dict': {
 282                 'id': 'tom-scott-presents-money',
 283                 'title': 'Tom Scott Presents: Money',
 284                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 285             },
 286             'playlist_count': 5,
 287         }, {
 288             'url': 'https://nebula.app/lindsayellis',
 289             'info_dict': {
 290                 'id': 'lindsayellis',
 291                 'title': 'Lindsay Ellis',
 292                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 293             },
 294             'playlist_mincount': 100,
 295         },
 296     ]
 297
 298     def _generate_playlist_entries(self, collection_id, channel):
 299         episodes = channel['episodes']['results']
 300         for page_num in itertools.count(2):
 301             for episode in episodes:
 302                 yield self._build_video_info(episode)
 303             next_url = channel['episodes']['next']
 304             if not next_url:
 305                 break
 306             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 307                                             note=f'Retrieving channel page {page_num}')
 308             episodes = channel['episodes']['results']
 309
 310     def _real_extract(self, url):
 311         collection_id = self._match_id(url)
 312         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 313         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 314         channel_details = channel['details']
 315
 316         return self.playlist_result(
 317             entries=self._generate_playlist_entries(collection_id, channel),
 318             playlist_id=collection_id,
 319             playlist_title=channel_details['title'],
 320             playlist_description=channel_details['description']
 321         )