]>
Commit | Line | Data |
---|---|---|
f1823403 HH |
1 | # coding: utf-8\r |
2 | from __future__ import unicode_literals\r | |
3 | \r | |
4 | import json\r | |
145bd631 | 5 | import time\r |
f1823403 | 6 | \r |
145bd631 | 7 | from urllib.error import HTTPError\r |
f1823403 | 8 | from .common import InfoExtractor\r |
145bd631 | 9 | from ..compat import compat_str, compat_urllib_parse_unquote, compat_urllib_parse_quote\r |
f1823403 HH |
10 | from ..utils import (\r |
11 | ExtractorError,\r | |
12 | parse_iso8601,\r | |
13 | try_get,\r | |
14 | urljoin,\r | |
15 | )\r | |
16 | \r | |
17 | \r | |
18 | class NebulaIE(InfoExtractor):\r | |
19 | \r | |
1ad047d0 | 20 | _VALID_URL = r'https?://(?:www\.)?(?:watchnebula\.com|nebula\.app)/videos/(?P<id>[-\w]+)'\r |
f1823403 HH |
21 | _TESTS = [\r |
22 | {\r | |
1ad047d0 | 23 | 'url': 'https://nebula.app/videos/that-time-disney-remade-beauty-and-the-beast',\r |
f1823403 HH |
24 | 'md5': 'fe79c4df8b3aa2fea98a93d027465c7e',\r |
25 | 'info_dict': {\r | |
26 | 'id': '5c271b40b13fd613090034fd',\r | |
27 | 'ext': 'mp4',\r | |
28 | 'title': 'That Time Disney Remade Beauty and the Beast',\r | |
29 | 'description': 'Note: this video was originally posted on YouTube with the sponsor read included. We weren’t able to remove it without reducing video quality, so it’s presented here in its original context.',\r | |
30 | 'upload_date': '20180731',\r | |
31 | 'timestamp': 1533009600,\r | |
32 | 'channel': 'Lindsay Ellis',\r | |
33 | 'uploader': 'Lindsay Ellis',\r | |
34 | },\r | |
35 | 'params': {\r | |
36 | 'usenetrc': True,\r | |
37 | },\r | |
38 | 'skip': 'All Nebula content requires authentication',\r | |
39 | },\r | |
40 | {\r | |
1ad047d0 | 41 | 'url': 'https://nebula.app/videos/the-logistics-of-d-day-landing-craft-how-the-allies-got-ashore',\r |
f1823403 HH |
42 | 'md5': '6d4edd14ce65720fa63aba5c583fb328',\r |
43 | 'info_dict': {\r | |
44 | 'id': '5e7e78171aaf320001fbd6be',\r | |
45 | 'ext': 'mp4',\r | |
46 | 'title': 'Landing Craft - How The Allies Got Ashore',\r | |
47 | 'description': r're:^In this episode we explore the unsung heroes of D-Day, the landing craft.',\r | |
48 | 'upload_date': '20200327',\r | |
49 | 'timestamp': 1585348140,\r | |
50 | 'channel': 'The Logistics of D-Day',\r | |
51 | 'uploader': 'The Logistics of D-Day',\r | |
52 | },\r | |
53 | 'params': {\r | |
54 | 'usenetrc': True,\r | |
55 | },\r | |
56 | 'skip': 'All Nebula content requires authentication',\r | |
57 | },\r | |
58 | {\r | |
1ad047d0 | 59 | 'url': 'https://nebula.app/videos/money-episode-1-the-draw',\r |
f1823403 HH |
60 | 'md5': '8c7d272910eea320f6f8e6d3084eecf5',\r |
61 | 'info_dict': {\r | |
62 | 'id': '5e779ebdd157bc0001d1c75a',\r | |
63 | 'ext': 'mp4',\r | |
64 | 'title': 'Episode 1: The Draw',\r | |
65 | 'description': r'contains:There’s free money on offer… if the players can all work together.',\r | |
66 | 'upload_date': '20200323',\r | |
67 | 'timestamp': 1584980400,\r | |
68 | 'channel': 'Tom Scott Presents: Money',\r | |
69 | 'uploader': 'Tom Scott Presents: Money',\r | |
70 | },\r | |
71 | 'params': {\r | |
72 | 'usenetrc': True,\r | |
73 | },\r | |
74 | 'skip': 'All Nebula content requires authentication',\r | |
75 | },\r | |
1ad047d0 | 76 | {\r |
77 | 'url': 'https://watchnebula.com/videos/money-episode-1-the-draw',\r | |
78 | 'only_matching': True,\r | |
79 | },\r | |
f1823403 HH |
80 | ]\r |
81 | _NETRC_MACHINE = 'watchnebula'\r | |
82 | \r | |
145bd631 HH |
83 | _nebula_token = None\r |
84 | \r | |
85 | def _retrieve_nebula_auth(self):\r | |
f1823403 HH |
86 | """\r |
87 | Log in to Nebula, and returns a Nebula API token\r | |
88 | """\r | |
89 | \r | |
90 | username, password = self._get_login_info()\r | |
91 | if not (username and password):\r | |
92 | self.raise_login_required()\r | |
93 | \r | |
94 | self.report_login()\r | |
95 | data = json.dumps({'email': username, 'password': password}).encode('utf8')\r | |
96 | response = self._download_json(\r | |
97 | 'https://api.watchnebula.com/api/v1/auth/login/',\r | |
145bd631 | 98 | data=data, fatal=False, video_id=None,\r |
f1823403 HH |
99 | headers={\r |
100 | 'content-type': 'application/json',\r | |
101 | # Submitting the 'sessionid' cookie always causes a 403 on auth endpoint\r | |
102 | 'cookie': ''\r | |
103 | },\r | |
104 | note='Authenticating to Nebula with supplied credentials',\r | |
105 | errnote='Authentication failed or rejected')\r | |
106 | if not response or not response.get('key'):\r | |
107 | self.raise_login_required()\r | |
145bd631 HH |
108 | \r |
109 | # save nebula token as cookie\r | |
110 | self._set_cookie(\r | |
111 | 'nebula.app', 'nebula-auth',\r | |
112 | compat_urllib_parse_quote(\r | |
113 | json.dumps({\r | |
114 | "apiToken": response["key"],\r | |
115 | "isLoggingIn": False,\r | |
116 | "isLoggingOut": False,\r | |
117 | }, separators=(",", ":"))),\r | |
118 | expire_time=int(time.time()) + 86400 * 365,\r | |
119 | )\r | |
120 | \r | |
f1823403 HH |
121 | return response['key']\r |
122 | \r | |
123 | def _retrieve_zype_api_key(self, page_url, display_id):\r | |
124 | """\r | |
125 | Retrieves the Zype API key\r | |
126 | """\r | |
127 | \r | |
128 | # Find the js that has the API key from the webpage and download it\r | |
129 | webpage = self._download_webpage(page_url, video_id=display_id)\r | |
130 | main_script_relpath = self._search_regex(\r | |
131 | r'<script[^>]*src="(?P<script_relpath>[^"]*main.[0-9a-f]*.chunk.js)"[^>]*>', webpage,\r | |
132 | group='script_relpath', name='script relative path', fatal=True)\r | |
133 | main_script_abspath = urljoin(page_url, main_script_relpath)\r | |
134 | main_script = self._download_webpage(main_script_abspath, video_id=display_id,\r | |
135 | note='Retrieving Zype API key')\r | |
136 | \r | |
137 | api_key = self._search_regex(\r | |
138 | r'REACT_APP_ZYPE_API_KEY\s*:\s*"(?P<api_key>[\w-]*)"', main_script,\r | |
139 | group='api_key', name='API key', fatal=True)\r | |
140 | \r | |
141 | return api_key\r | |
142 | \r | |
143 | def _call_zype_api(self, path, params, video_id, api_key, note):\r | |
144 | """\r | |
145 | A helper for making calls to the Zype API.\r | |
146 | """\r | |
147 | query = {'api_key': api_key, 'per_page': 1}\r | |
148 | query.update(params)\r | |
149 | return self._download_json('https://api.zype.com' + path, video_id, query=query, note=note)\r | |
150 | \r | |
151 | def _call_nebula_api(self, path, video_id, access_token, note):\r | |
152 | """\r | |
153 | A helper for making calls to the Nebula API.\r | |
154 | """\r | |
155 | return self._download_json('https://api.watchnebula.com/api/v1' + path, video_id, headers={\r | |
156 | 'Authorization': 'Token {access_token}'.format(access_token=access_token)\r | |
157 | }, note=note)\r | |
158 | \r | |
145bd631 HH |
159 | def _fetch_zype_access_token(self, video_id):\r |
160 | try:\r | |
161 | user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')\r | |
162 | except ExtractorError as exc:\r | |
163 | # if 401, attempt credential auth and retry\r | |
164 | if exc.cause and isinstance(exc.cause, HTTPError) and exc.cause.code == 401:\r | |
165 | self._nebula_token = self._retrieve_nebula_auth()\r | |
166 | user_object = self._call_nebula_api('/auth/user/', video_id, self._nebula_token, note='Retrieving Zype access token')\r | |
167 | else:\r | |
168 | raise\r | |
169 | \r | |
f1823403 HH |
170 | access_token = try_get(user_object, lambda x: x['zype_auth_info']['access_token'], compat_str)\r |
171 | if not access_token:\r | |
172 | if try_get(user_object, lambda x: x['is_subscribed'], bool):\r | |
173 | # TODO: Reimplement the same Zype token polling the Nebula frontend implements\r | |
174 | # see https://github.com/ytdl-org/youtube-dl/pull/24805#issuecomment-749231532\r | |
175 | raise ExtractorError(\r | |
176 | 'Unable to extract Zype access token from Nebula API authentication endpoint. '\r | |
177 | 'Open an arbitrary video in a browser with this account to generate a token',\r | |
178 | expected=True)\r | |
179 | raise ExtractorError('Unable to extract Zype access token from Nebula API authentication endpoint')\r | |
180 | return access_token\r | |
181 | \r | |
182 | def _extract_channel_title(self, video_meta):\r | |
183 | # TODO: Implement the API calls giving us the channel list,\r | |
184 | # so that we can do the title lookup and then figure out the channel URL\r | |
185 | categories = video_meta.get('categories', []) if video_meta else []\r | |
186 | # the channel name is the value of the first category\r | |
187 | for category in categories:\r | |
188 | if category.get('value'):\r | |
189 | return category['value'][0]\r | |
190 | \r | |
145bd631 HH |
191 | def _real_initialize(self):\r |
192 | # check cookie jar for valid token\r | |
193 | nebula_cookies = self._get_cookies('https://nebula.app')\r | |
194 | nebula_cookie = nebula_cookies.get('nebula-auth')\r | |
195 | if nebula_cookie:\r | |
196 | self.to_screen('Authenticating to Nebula with token from cookie jar')\r | |
197 | nebula_cookie_value = compat_urllib_parse_unquote(nebula_cookie.value)\r | |
198 | self._nebula_token = self._parse_json(nebula_cookie_value, None).get('apiToken')\r | |
199 | \r | |
200 | # try to authenticate using credentials if no valid token has been found\r | |
201 | if not self._nebula_token:\r | |
202 | self._nebula_token = self._retrieve_nebula_auth()\r | |
203 | \r | |
f1823403 HH |
204 | def _real_extract(self, url):\r |
205 | display_id = self._match_id(url)\r | |
f1823403 HH |
206 | api_key = self._retrieve_zype_api_key(url, display_id)\r |
207 | \r | |
208 | response = self._call_zype_api('/videos', {'friendly_title': display_id},\r | |
209 | display_id, api_key, note='Retrieving metadata from Zype')\r | |
210 | if len(response.get('response') or []) != 1:\r | |
211 | raise ExtractorError('Unable to find video on Zype API')\r | |
212 | video_meta = response['response'][0]\r | |
213 | \r | |
214 | video_id = video_meta['_id']\r | |
145bd631 | 215 | zype_access_token = self._fetch_zype_access_token(display_id)\r |
f1823403 HH |
216 | \r |
217 | channel_title = self._extract_channel_title(video_meta)\r | |
218 | \r | |
219 | return {\r | |
220 | 'id': video_id,\r | |
221 | 'display_id': display_id,\r | |
222 | '_type': 'url_transparent',\r | |
223 | 'ie_key': 'Zype',\r | |
224 | 'url': 'https://player.zype.com/embed/%s.html?access_token=%s' % (video_id, zype_access_token),\r | |
225 | 'title': video_meta.get('title'),\r | |
226 | 'description': video_meta.get('description'),\r | |
227 | 'timestamp': parse_iso8601(video_meta.get('published_at')),\r | |
145bd631 HH |
228 | 'thumbnails': [{\r |
229 | 'id': tn.get('name'), # this appears to be null\r | |
230 | 'url': tn['url'],\r | |
231 | 'width': tn.get('width'),\r | |
232 | 'height': tn.get('height'),\r | |
233 | } for tn in video_meta.get('thumbnails', [])],\r | |
f1823403 HH |
234 | 'duration': video_meta.get('duration'),\r |
235 | 'channel': channel_title,\r | |
236 | 'uploader': channel_title, # we chose uploader = channel name\r | |
237 | # TODO: uploader_url, channel_id, channel_url\r | |
238 | }\r |