yt_dlp/extractor/nebula.py

   1 # coding: utf-8
   2 from __future__ import unicode_literals
   3
   4 import itertools
   5 import json
   6 import time
   7 import urllib
   8
   9 from ..utils import (
  10     ExtractorError,
  11     parse_iso8601,
  12     try_get,
  13 )
  14 from .common import InfoExtractor
  15
  16
  17 class NebulaBaseIE(InfoExtractor):
  18     _NETRC_MACHINE = 'watchnebula'
  19
  20     _nebula_api_token = None
  21     _nebula_bearer_token = None
  22     _zype_access_token = None
  23
  24     def _perform_nebula_auth(self):
  25         username, password = self._get_login_info()
  26         if not (username and password):
  27             self.raise_login_required()
  28
  29         data = json.dumps({'email': username, 'password': password}).encode('utf8')
  30         response = self._download_json(
  31             'https://api.watchnebula.com/api/v1/auth/login/',
  32             data=data, fatal=False, video_id=None,
  33             headers={
  34                 'content-type': 'application/json',
  35                 # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint
  36                 'cookie': ''
  37             },
  38             note='Logging in to Nebula with supplied credentials',
  39             errnote='Authentication failed or rejected')
  40         if not response or not response.get('key'):
  41             self.raise_login_required()
  42
  43         # save nebula token as cookie
  44         self._set_cookie(
  45             'nebula.app', 'nebula-auth',
  46             urllib.parse.quote(
  47                 json.dumps({
  48                     "apiToken": response["key"],
  49                     "isLoggingIn": False,
  50                     "isLoggingOut": False,
  51                 }, separators=(",", ":"))),
  52             expire_time=int(time.time()) + 86400 * 365,
  53         )
  54
  55         return response['key']
  56
  57     def _retrieve_nebula_api_token(self):
  58         """
  59         Check cookie jar for valid token. Try to authenticate using credentials if no valid token
  60         can be found in the cookie jar.
  61         """
  62         nebula_cookies = self._get_cookies('https://nebula.app')
  63         nebula_cookie = nebula_cookies.get('nebula-auth')
  64         if nebula_cookie:
  65             self.to_screen('Authenticating to Nebula with token from cookie jar')
  66             nebula_cookie_value = urllib.parse.unquote(nebula_cookie.value)
  67             nebula_api_token = self._parse_json(nebula_cookie_value, None).get('apiToken')
  68             if nebula_api_token:
  69                 return nebula_api_token
  70
  71         return self._perform_nebula_auth()
  72
  73     def _call_nebula_api(self, url, video_id=None, method='GET', auth_type='api', note=''):
  74         assert method in ('GET', 'POST',)
  75         assert auth_type in ('api', 'bearer',)
  76
  77         def inner_call():
  78             authorization = f'Token {self._nebula_api_token}' if auth_type == 'api' else f'Bearer {self._nebula_bearer_token}'
  79             return self._download_json(
  80                 url, video_id, note=note, headers={'Authorization': authorization},
  81                 data=b'' if method == 'POST' else None)
  82
  83         try:
  84             return inner_call()
  85         except ExtractorError as exc:
  86             # if 401 or 403, attempt credential re-auth and retry
  87             if exc.cause and isinstance(exc.cause, urllib.error.HTTPError) and exc.cause.code in (401, 403):
  88                 self.to_screen(f'Reauthenticating to Nebula and retrying, because last {auth_type} call resulted in error {exc.cause.code}')
  89                 self._login()
  90                 return inner_call()
  91             else:
  92                 raise
  93
  94     def _fetch_nebula_bearer_token(self):
  95         """
  96         Get a Bearer token for the Nebula API. This will be required to fetch video meta data.
  97         """
  98         response = self._call_nebula_api('https://api.watchnebula.com/api/v1/authorization/',
  99                                          method='POST',
 100                                          note='Authorizing to Nebula')
 101         return response['token']
 102
 103     def _fetch_zype_access_token(self):
 104         """
 105         Get a Zype access token, which is required to access video streams -- in our case: to
 106         generate video URLs.
 107         """
 108         user_object = self._call_nebula_api('https://api.watchnebula.com/api/v1/auth/user/', note='Retrieving Zype access token')
 109
 110         access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], str)
 111         if not access_token:
 112             if try_get(user_object, lambda x: x['is_subscribed'], bool):
 113                 # TODO: Reimplement the same Zype token polling the Nebula frontend implements
 114                 # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532
 115                 raise ExtractorError(
 116                     'Unable to extract Zype access token from Nebula API authentication endpoint. '
 117                     'Open an arbitrary video in a browser with this account to generate a token',
 118                     expected=True)
 119             raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')
 120         return access_token
 121
 122     def _build_video_info(self, episode):
 123         zype_id = episode['zype_id']
 124         zype_video_url = f'https://player.zype.com/embed/{zype_id}.html?access_token={self._zype_access_token}'
 125         channel_slug = episode['channel_slug']
 126         return {
 127             'id': episode['zype_id'],
 128             'display_id': episode['slug'],
 129             '_type': 'url_transparent',
 130             'ie_key': 'Zype',
 131             'url': zype_video_url,
 132             'title': episode['title'],
 133             'description': episode['description'],
 134             'timestamp': parse_iso8601(episode['published_at']),
 135             'thumbnails': [{
 136                 # 'id': tn.get('name'),  # this appears to be null
 137                 'url': tn['original'],
 138                 'height': key,
 139             } for key, tn in episode['assets']['thumbnail'].items()],
 140             'duration': episode['duration'],
 141             'channel': episode['channel_title'],
 142             'channel_id': channel_slug,
 143             'channel_url': f'https://nebula.app/{channel_slug}',
 144             'uploader': episode['channel_title'],
 145             'uploader_id': channel_slug,
 146             'uploader_url': f'https://nebula.app/{channel_slug}',
 147             'series': episode['channel_title'],
 148             'creator': episode['channel_title'],
 149         }
 150
 151     def _login(self):
 152         self._nebula_api_token = self._retrieve_nebula_api_token()
 153         self._nebula_bearer_token = self._fetch_nebula_bearer_token()
 154         self._zype_access_token = self._fetch_zype_access_token()
 155
 156     def _real_initialize(self):
 157         self._login()
 158
 159
 160 class NebulaIE(NebulaBaseIE):
 161     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'
 162     _TESTS = [
 163         {
 164             'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',
 165             'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',
 166             'info_dict': {
 167                 'id': '5c271b40b13fd613090034fd',
 168                 'ext': 'mp4',
 169                 'title': 'That Time Disney Remade Beauty and the Beast',
 170                 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',
 171                 'upload_date': '20180731',
 172                 'timestamp': 1533009600,
 173                 'channel': 'Lindsay Ellis',
 174                 'channel_id': 'lindsayellis',
 175                 'uploader': 'Lindsay Ellis',
 176                 'uploader_id': 'lindsayellis',
 177             },
 178             'params': {
 179                 'usenetrc': True,
 180             },
 181         },
 182         {
 183             'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',
 184             'md5': '6d4edd14ce65720fa63aba5c583fb328',
 185             'info_dict': {
 186                 'id': '5e7e78171aaf320001fbd6be',
 187                 'ext': 'mp4',
 188                 'title': 'Landing Craft - How The Allies Got Ashore',
 189                 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',
 190                 'upload_date': '20200327',
 191                 'timestamp': 1585348140,
 192                 'channel': 'Real Engineering',
 193                 'channel_id': 'realengineering',
 194                 'uploader': 'Real Engineering',
 195                 'uploader_id': 'realengineering',
 196             },
 197             'params': {
 198                 'usenetrc': True,
 199             },
 200         },
 201         {
 202             'url': 'https://nebula.app/videos/money-episode-1-the-draw',
 203             'md5': '8c7d272910eea320f6f8e6d3084eecf5',
 204             'info_dict': {
 205                 'id': '5e779ebdd157bc0001d1c75a',
 206                 'ext': 'mp4',
 207                 'title': 'Episode 1: The Draw',
 208                 'description': r'contains:There’s free money on offer… if the players can all work together.',
 209                 'upload_date': '20200323',
 210                 'timestamp': 1584980400,
 211                 'channel': 'Tom Scott Presents: Money',
 212                 'channel_id': 'tom-scott-presents-money',
 213                 'uploader': 'Tom Scott Presents: Money',
 214                 'uploader_id': 'tom-scott-presents-money',
 215             },
 216             'params': {
 217                 'usenetrc': True,
 218             },
 219         },
 220         {
 221             'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',
 222             'only_matching': True,
 223         },
 224     ]
 225
 226     def _fetch_video_metadata(self, slug):
 227         return self._call_nebula_api(f'https://content.watchnebula.com/video/{slug}/',
 228                                      video_id=slug,
 229                                      auth_type='bearer',
 230                                      note='Fetching video meta data')
 231
 232     def _real_extract(self, url):
 233         slug = self._match_id(url)
 234         video = self._fetch_video_metadata(slug)
 235         return self._build_video_info(video)
 236
 237
 238 class NebulaCollectionIE(NebulaBaseIE):
 239     IE_NAME = 'nebula:collection'
 240     _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/(?!videos/)(?P<id>[-\w]+)'
 241     _TESTS = [
 242         {
 243             'url': 'https://nebula.app/tom-scott-presents-money',
 244             'info_dict': {
 245                 'id': 'tom-scott-presents-money',
 246                 'title': 'Tom Scott Presents: Money',
 247                 'description': 'Tom Scott hosts a series all about trust, negotiation and money.',
 248             },
 249             'playlist_count': 5,
 250             'params': {
 251                 'usenetrc': True,
 252             },
 253         }, {
 254             'url': 'https://nebula.app/lindsayellis',
 255             'info_dict': {
 256                 'id': 'lindsayellis',
 257                 'title': 'Lindsay Ellis',
 258                 'description': 'Enjoy these hottest of takes on Disney, Transformers, and Musicals.',
 259             },
 260             'playlist_mincount': 100,
 261             'params': {
 262                 'usenetrc': True,
 263             },
 264         },
 265     ]
 266
 267     def _generate_playlist_entries(self, collection_id, channel):
 268         episodes = channel['episodes']['results']
 269         for page_num in itertools.count(2):
 270             for episode in episodes:
 271                 yield self._build_video_info(episode)
 272             next_url = channel['episodes']['next']
 273             if not next_url:
 274                 break
 275             channel = self._call_nebula_api(next_url, collection_id, auth_type='bearer',
 276                                             note=f'Retrieving channel page {page_num}')
 277             episodes = channel['episodes']['results']
 278
 279     def _real_extract(self, url):
 280         collection_id = self._match_id(url)
 281         channel_url = f'https://content.watchnebula.com/video/channels/{collection_id}/'
 282         channel = self._call_nebula_api(channel_url, collection_id, auth_type='bearer', note='Retrieving channel')
 283         channel_details = channel['details']
 284
 285         return self.playlist_result(
 286             entries=self._generate_playlist_entries(collection_id, channel),
 287             playlist_id=collection_id,
 288             playlist_title=channel_details['title'],
 289             playlist_description=channel_details['description']
 290         )