yt_dlp/extractor/nebula.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import json
   6 import time
   7 import urllib
   8
   9 from ..utils import (
  10     ExtractorError,
  11     parse_iso8601,
  12     try_get,
  13 )
  14 from .common import InfoExtractor
  15
  16
  17 class NebulaBaseIE(InfoExtractor):
  18     _NETRC_MACHINE = 'watchnebula'
  19
  20     _nebula_api_token = None
  21     _nebula_bearer_token = None
  22     _zype_access_token = None
  23
  24     def _perform_nebula_auth(self):
  25         username, password = self._get_login_info()
  26         if not (username and password):
  27             self.raise_login_required()
  28
  29         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  30         response = self._download_json(
  31             'https://api.watchnebula.com/api/v1/auth/login/',
  32             data=data, fatal=False, video_id=None,
  33             headers={
  34                 'content-type': 'application/json',
  35                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  36                 'cookie': ''
  37             },
  38             note='Logging in to Nebula with supplied credentials',
  39             errnote='Authentication failed or rejected')
  40         if not response or not response.get('key'):
  41             self.raise_login_required()
  42
  43         # save nebula token as cookie
  44         self._set_cookie(
  45             'nebula.app', 'nebula-auth',
  46             urllib.parse.quote(
  47                 json.dumps({
  48                     "apiToken": response["key"],
  49                     "isLoggingIn": False,
  50                     "isLoggingOut": False,
  51                 }, separators=(",", ":"))),
  52             expire_time=int(time.time()) + 86400 * 365,
  53         )
  54
  55         return response['key']
  56
  57     def _retrieve_nebula_api_token(self):
  58         """
  59         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  60         can be found in the cookie jar.
  61         """
  62         nebula_cookies = self._get_cookies('https://nebula.app')
  63         nebula_cookie = nebula_cookies.get('nebula-auth')
  64         if nebula_cookie:
  65             self.to_screen('Authenticating to Nebula with token from cookie jar')
  66             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  67             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  68             if nebula_api_token:
  69                 return nebula_api_token
  70
  71         return self._perform_nebula_auth()
  72
  73     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  74         assert method in ('GET', 'POST',)
  75         assert auth_type in ('api', 'bearer',)
  76
  77         def inner_call():
  78             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  79             return self._download_json(
  80                 url, video_id, note=note, headers={'Authorization': authorization},
  81                 data=b'' if method == 'POST' else None)
  82
  83         try:
  84             return inner_call()
  85         except ExtractorError as exc:
  86             # if 401 or 403, attempt credential re-auth and retry
  87             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  88                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  89                 self._login()
  90                 return inner_call()
  91             else:
  92                 raise
  93
  94     def _fetch_nebula_bearer_token(self):
  95         """
  96         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  97         """
  98         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  99                                          method='POST',
 100                                          note='Authorizing to Nebula')
 101         return response['token']
 102
 103     def _fetch_zype_access_token(self):
 104         """
 105         Get a Zype access token, which is required to access video streams -- in our case: to
 106         generate video URLs.
 107         """
 108         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 109
 110         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 111         if not access_token:
 112             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 113                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 114                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 115                 raise ExtractorError(
 116                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 117                     'Open an arbitrary video in a browser with this account to generate a token',
 118                     expected=True)
 119             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 120         return access_token
 121
 122     def _build_video_info(self, episode):
 123         zype_id = episode['zype_id']
 124         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 125         channel_slug = episode['channel_slug']
 126         return {
 127             'id': episode['zype_id'],
 128             'display_id': episode['slug'],
 129             '_type': 'url_transparent',
 130             'ie_key': 'Zype',
 131             'url': zype_video_url,
 132             'title': episode['title'],
 133             'description': episode['description'],
 134             'timestamp': parse_iso8601(episode['published_at']),
 135             'thumbnails': [{
 136                 # 'id': tn.get('name'),  # this appears to be null
 137                 'url': tn['original'],
 138                 'height': key,
 139             } for key, tn in episode['assets']['thumbnail'].items()],
 140             'duration': episode['duration'],
 141             'channel': episode['channel_title'],
 142             'channel_id': channel_slug,
 143             'channel_url': f'https://nebula.app/{channel_slug}',
 144             'uploader': episode['channel_title'],
 145             'uploader_id': channel_slug,
 146             'uploader_url': f'https://nebula.app/{channel_slug}',
 147             'series': episode['channel_title'],
 148             'creator': episode['channel_title'],
 149         }
 150
 151     def _perform_login(self, username=None, password=None):
 152         # FIXME: username should be passed from here to inner functions
 153         self._nebula_api_token = self._retrieve_nebula_api_token()
 154         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 155         self._zype_access_token = self._fetch_zype_access_token()
 156
 157
 158 class NebulaIE(NebulaBaseIE):
 159     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
 160     _TESTS = [
 161         {
 162             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 163             'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
 164             'info_dict': {
 165                 'id': '5c271b40b13fd613090034fd',
 166                 'ext': 'mp4',
 167                 'title': 'That Time Disney Remade Beauty and the Beast',
 168                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 169                 'upload_date': '20180731',
 170                 'timestamp': 1533009600,
 171                 'channel': 'Lindsay Ellis',
 172                 'channel_id': 'lindsayellis',
 173                 'uploader': 'Lindsay Ellis',
 174                 'uploader_id': 'lindsayellis',
 175             },
 176             'params': {
 177                 'usenetrc': True,
 178             },
 179         },
 180         {
 181             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 182             'md5': '6d4edd14ce65720fa63aba5c583fb328',
 183             'info_dict': {
 184                 'id': '5e7e78171aaf320001fbd6be',
 185                 'ext': 'mp4',
 186                 'title': 'Landing Craft - How The Allies Got Ashore',
 187                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 188                 'upload_date': '20200327',
 189                 'timestamp': 1585348140,
 190                 'channel': 'Real Engineering',
 191                 'channel_id': 'realengineering',
 192                 'uploader': 'Real Engineering',
 193                 'uploader_id': 'realengineering',
 194             },
 195             'params': {
 196                 'usenetrc': True,
 197             },
 198         },
 199         {
 200             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 201             'md5': '8c7d272910eea320f6f8e6d3084eecf5',
 202             'info_dict': {
 203                 'id': '5e779ebdd157bc0001d1c75a',
 204                 'ext': 'mp4',
 205                 'title': 'Episode 1: The Draw',
 206                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 207                 'upload_date': '20200323',
 208                 'timestamp': 1584980400,
 209                 'channel': 'Tom Scott Presents: Money',
 210                 'channel_id': 'tom-scott-presents-money',
 211                 'uploader': 'Tom Scott Presents: Money',
 212                 'uploader_id': 'tom-scott-presents-money',
 213             },
 214             'params': {
 215                 'usenetrc': True,
 216             },
 217         },
 218         {
 219             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 220             'only_matching': True,
 221         },
 222     ]
 223
 224     def _fetch_video_metadata(self, slug):
 225         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 226                                      video_id=slug,
 227                                      auth_type='bearer',
 228                                      note='Fetching video meta data')
 229
 230     def _real_extract(self, url):
 231         slug = self._match_id(url)
 232         video = self._fetch_video_metadata(slug)
 233         return self._build_video_info(video)
 234
 235
 236 class NebulaCollectionIE(NebulaBaseIE):
 237     IE_NAME = 'nebula:collection'
 238     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
 239     _TESTS = [
 240         {
 241             'url': 'https://nebula.app/tom-scott-presents-money',
 242             'info_dict': {
 243                 'id': 'tom-scott-presents-money',
 244                 'title': 'Tom Scott Presents: Money',
 245                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 246             },
 247             'playlist_count': 5,
 248             'params': {
 249                 'usenetrc': True,
 250             },
 251         }, {
 252             'url': 'https://nebula.app/lindsayellis',
 253             'info_dict': {
 254                 'id': 'lindsayellis',
 255                 'title': 'Lindsay Ellis',
 256                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 257             },
 258             'playlist_mincount': 100,
 259             'params': {
 260                 'usenetrc': True,
 261             },
 262         },
 263     ]
 264
 265     def _generate_playlist_entries(self, collection_id, channel):
 266         episodes = channel['episodes']['results']
 267         for page_num in itertools.count(2):
 268             for episode in episodes:
 269                 yield self._build_video_info(episode)
 270             next_url = channel['episodes']['next']
 271             if not next_url:
 272                 break
 273             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 274                                             note=f'Retrieving channel page {page_num}')
 275             episodes = channel['episodes']['results']
 276
 277     def _real_extract(self, url):
 278         collection_id = self._match_id(url)
 279         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 280         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 281         channel_details = channel['details']
 282
 283         return self.playlist_result(
 284             entries=self._generate_playlist_entries(collection_id, channel),
 285             playlist_id=collection_id,
 286             playlist_title=channel_details['title'],
 287             playlist_description=channel_details['description']
 288         )