]>
Commit | Line | Data |
---|---|---|
1 | import base64 | |
2 | import functools | |
3 | import math | |
4 | import re | |
5 | import time | |
6 | import urllib.parse | |
7 | ||
8 | from .common import InfoExtractor | |
9 | from .slideslive import SlidesLiveIE | |
10 | from ..utils import ( | |
11 | ExtractorError, | |
12 | InAdvancePagedList, | |
13 | int_or_none, | |
14 | traverse_obj, | |
15 | update_url_query, | |
16 | url_or_none, | |
17 | ) | |
18 | ||
19 | ||
20 | class VideoKenBaseIE(InfoExtractor): | |
21 | _ORGANIZATIONS = { | |
22 | 'videos.icts.res.in': 'icts', | |
23 | 'videos.cncf.io': 'cncf', | |
24 | 'videos.neurips.cc': 'neurips', | |
25 | } | |
26 | _BASE_URL_RE = rf'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/' | |
27 | ||
28 | _PAGE_SIZE = 12 | |
29 | ||
30 | def _get_org_id_and_api_key(self, org, video_id): | |
31 | details = self._download_json( | |
32 | f'https://analytics.videoken.com/api/videolake/{org}/details', video_id, | |
33 | note='Downloading organization ID and API key', headers={ | |
34 | 'Accept': 'application/json', | |
35 | }) | |
36 | return details['id'], details['apikey'] | |
37 | ||
38 | def _create_slideslive_url(self, video_url, video_id, referer): | |
39 | if not video_url and not video_id: | |
40 | return | |
41 | elif not video_url or 'embed/sign-in' in video_url: | |
42 | video_url = f'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}' | |
43 | if url_or_none(referer): | |
44 | return update_url_query(video_url, { | |
45 | 'embed_parent_url': referer, | |
46 | 'embed_container_origin': f'https://{urllib.parse.urlparse(referer).netloc}', | |
47 | }) | |
48 | return video_url | |
49 | ||
50 | def _extract_videos(self, videos, url): | |
51 | for video in traverse_obj(videos, (('videos', 'results'), ...)): | |
52 | video_id = traverse_obj(video, 'youtube_id', 'videoid') | |
53 | if not video_id: | |
54 | continue | |
55 | ie_key = None | |
56 | if traverse_obj(video, 'type', 'source') == 'youtube': | |
57 | video_url = video_id | |
58 | ie_key = 'Youtube' | |
59 | else: | |
60 | video_url = traverse_obj(video, 'embed_url', 'embeddableurl') | |
61 | if urllib.parse.urlparse(video_url).netloc == 'slideslive.com': | |
62 | ie_key = SlidesLiveIE | |
63 | video_url = self._create_slideslive_url(video_url, video_id, url) | |
64 | if not video_url: | |
65 | continue | |
66 | yield self.url_result(video_url, ie_key, video_id) | |
67 | ||
68 | ||
69 | class VideoKenIE(VideoKenBaseIE): | |
70 | _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)' | |
71 | _TESTS = [{ | |
72 | # neurips -> videoken -> slideslive | |
73 | 'url': 'https://videos.neurips.cc/video/slideslive-38922815', | |
74 | 'info_dict': { | |
75 | 'id': '38922815', | |
76 | 'ext': 'mp4', | |
77 | 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures', | |
78 | 'timestamp': 1630939331, | |
79 | 'upload_date': '20210906', | |
80 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
81 | 'thumbnails': 'count:330', | |
82 | 'chapters': 'count:329', | |
83 | }, | |
84 | 'params': { | |
85 | 'skip_download': 'm3u8', | |
86 | }, | |
87 | 'expected_warnings': ['Failed to download VideoKen API JSON'], | |
88 | }, { | |
89 | # neurips -> videoken -> slideslive -> youtube | |
90 | 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348', | |
91 | 'info_dict': { | |
92 | 'id': '2Xa_dt78rJE', | |
93 | 'ext': 'mp4', | |
94 | 'display_id': '38923348', | |
95 | 'title': 'Machine Education', | |
96 | 'description': 'Watch full version of this video at https://slideslive.com/38923348.', | |
97 | 'channel': 'SlidesLive Videos - G2', | |
98 | 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w', | |
99 | 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', | |
100 | 'uploader': 'SlidesLive Videos - G2', | |
101 | 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w', | |
102 | 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w', | |
103 | 'duration': 2504, | |
104 | 'timestamp': 1618922125, | |
105 | 'upload_date': '20200131', | |
106 | 'age_limit': 0, | |
107 | 'channel_follower_count': int, | |
108 | 'view_count': int, | |
109 | 'availability': 'unlisted', | |
110 | 'live_status': 'not_live', | |
111 | 'playable_in_embed': True, | |
112 | 'categories': ['People & Blogs'], | |
113 | 'tags': [], | |
114 | 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', | |
115 | 'thumbnails': 'count:78', | |
116 | 'chapters': 'count:77', | |
117 | }, | |
118 | 'params': { | |
119 | 'skip_download': 'm3u8', | |
120 | }, | |
121 | 'expected_warnings': ['Failed to download VideoKen API JSON'], | |
122 | }, { | |
123 | # icts -> videoken -> youtube | |
124 | 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc', | |
125 | 'info_dict': { | |
126 | 'id': 'zysIsojYdvc', | |
127 | 'ext': 'mp4', | |
128 | 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad', | |
129 | 'description': 'md5:87433069d79719eeadc1962cc2ace00b', | |
130 | 'channel': 'International Centre for Theoretical Sciences', | |
131 | 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ', | |
132 | 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ', | |
133 | 'uploader': 'International Centre for Theoretical Sciences', | |
134 | 'uploader_id': 'ICTStalks', | |
135 | 'uploader_url': 'http://www.youtube.com/user/ICTStalks', | |
136 | 'duration': 3372, | |
137 | 'upload_date': '20191004', | |
138 | 'age_limit': 0, | |
139 | 'live_status': 'not_live', | |
140 | 'availability': 'public', | |
141 | 'playable_in_embed': True, | |
142 | 'channel_follower_count': int, | |
143 | 'like_count': int, | |
144 | 'view_count': int, | |
145 | 'categories': ['Science & Technology'], | |
146 | 'tags': [], | |
147 | 'thumbnail': r're:^https?://.*\.(?:jpg|webp)', | |
148 | 'thumbnails': 'count:42', | |
149 | 'chapters': 'count:20', | |
150 | }, | |
151 | 'params': { | |
152 | 'skip_download': 'm3u8', | |
153 | }, | |
154 | }, { | |
155 | 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8', | |
156 | 'only_matching': True, | |
157 | }, { | |
158 | 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI', | |
159 | 'only_matching': True, | |
160 | }, { | |
161 | 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU', | |
162 | 'only_matching': True, | |
163 | }] | |
164 | ||
165 | def _real_extract(self, url): | |
166 | hostname, video_id = self._match_valid_url(url).group('host', 'id') | |
167 | org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], video_id) | |
168 | details = self._download_json( | |
169 | 'https://analytics.videoken.com/api/videoinfo_private', video_id, query={ | |
170 | 'videoid': video_id, | |
171 | 'org_id': org_id, | |
172 | }, headers={'Accept': 'application/json'}, note='Downloading VideoKen API JSON', | |
173 | errnote='Failed to download VideoKen API JSON', fatal=False) | |
174 | if details: | |
175 | return next(self._extract_videos({'videos': [details]}, url)) | |
176 | # fallback for API error 400 response | |
177 | elif video_id.startswith('slideslive-'): | |
178 | return self.url_result( | |
179 | self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) | |
180 | elif re.match(r'^[\w-]{11}$', video_id): | |
181 | self.url_result(video_id, 'Youtube', video_id) | |
182 | else: | |
183 | raise ExtractorError('Unable to extract without VideoKen API response') | |
184 | ||
185 | ||
186 | class VideoKenPlayerIE(VideoKenBaseIE): | |
187 | _VALID_URL = r'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)' | |
188 | _TESTS = [{ | |
189 | 'url': 'https://player.videoken.com/embed/slideslive-38968434', | |
190 | 'info_dict': { | |
191 | 'id': '38968434', | |
192 | 'ext': 'mp4', | |
193 | 'title': 'Deep Learning with Label Differential Privacy', | |
194 | 'timestamp': 1643377020, | |
195 | 'upload_date': '20220128', | |
196 | 'thumbnail': r're:^https?://.*\.(?:jpg|png)', | |
197 | 'thumbnails': 'count:30', | |
198 | 'chapters': 'count:29', | |
199 | }, | |
200 | 'params': { | |
201 | 'skip_download': 'm3u8', | |
202 | }, | |
203 | }] | |
204 | ||
205 | def _real_extract(self, url): | |
206 | video_id = self._match_id(url) | |
207 | return self.url_result( | |
208 | self._create_slideslive_url(None, video_id, url), SlidesLiveIE, video_id) | |
209 | ||
210 | ||
211 | class VideoKenPlaylistIE(VideoKenBaseIE): | |
212 | _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'(?:category/\d+/)?playlist/(?P<id>\d+)' | |
213 | _TESTS = [{ | |
214 | 'url': 'https://videos.icts.res.in/category/1822/playlist/381', | |
215 | 'playlist_mincount': 117, | |
216 | 'info_dict': { | |
217 | 'id': '381', | |
218 | 'title': 'Cosmology - The Next Decade', | |
219 | }, | |
220 | }] | |
221 | ||
222 | def _real_extract(self, url): | |
223 | hostname, playlist_id = self._match_valid_url(url).group('host', 'id') | |
224 | org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], playlist_id) | |
225 | videos = self._download_json( | |
226 | f'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/', | |
227 | playlist_id, headers={'Accept': 'application/json'}, note='Downloading API JSON') | |
228 | return self.playlist_result(self._extract_videos(videos, url), playlist_id, videos.get('title')) | |
229 | ||
230 | ||
231 | class VideoKenCategoryIE(VideoKenBaseIE): | |
232 | _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'category/(?P<id>\d+)/?(?:$|[?#])' | |
233 | _TESTS = [{ | |
234 | 'url': 'https://videos.icts.res.in/category/1822/', | |
235 | 'playlist_mincount': 500, | |
236 | 'info_dict': { | |
237 | 'id': '1822', | |
238 | 'title': 'Programs', | |
239 | }, | |
240 | }, { | |
241 | 'url': 'https://videos.neurips.cc/category/350/', | |
242 | 'playlist_mincount': 34, | |
243 | 'info_dict': { | |
244 | 'id': '350', | |
245 | 'title': 'NeurIPS 2018', | |
246 | }, | |
247 | }, { | |
248 | 'url': 'https://videos.cncf.io/category/479/', | |
249 | 'playlist_mincount': 328, | |
250 | 'info_dict': { | |
251 | 'id': '479', | |
252 | 'title': 'KubeCon + CloudNativeCon Europe\'19', | |
253 | }, | |
254 | }] | |
255 | ||
256 | def _get_category_page(self, category_id, org_id, page=1, note=None): | |
257 | return self._download_json( | |
258 | f'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id, | |
259 | fatal=False, note=note if note else f'Downloading category page {page}', | |
260 | query={ | |
261 | 'category_id': category_id, | |
262 | 'page_number': page, | |
263 | 'length': self._PAGE_SIZE, | |
264 | }, headers={'Accept': 'application/json'}) or {} | |
265 | ||
266 | def _entries(self, category_id, org_id, url, page): | |
267 | videos = self._get_category_page(category_id, org_id, page + 1) | |
268 | yield from self._extract_videos(videos, url) | |
269 | ||
270 | def _real_extract(self, url): | |
271 | hostname, category_id = self._match_valid_url(url).group('host', 'id') | |
272 | org_id, _ = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], category_id) | |
273 | category_info = self._get_category_page(category_id, org_id, note='Downloading category info') | |
274 | category = category_info['category_name'] | |
275 | total_pages = math.ceil(int(category_info['recordsTotal']) / self._PAGE_SIZE) | |
276 | return self.playlist_result(InAdvancePagedList( | |
277 | functools.partial(self._entries, category_id, org_id, url), | |
278 | total_pages, self._PAGE_SIZE), category_id, category) | |
279 | ||
280 | ||
281 | class VideoKenTopicIE(VideoKenBaseIE): | |
282 | _VALID_URL = VideoKenBaseIE._BASE_URL_RE + r'topic/(?P<id>[^/#?]+)/?(?:$|[?#])' | |
283 | _TESTS = [{ | |
284 | 'url': 'https://videos.neurips.cc/topic/machine%20learning/', | |
285 | 'playlist_mincount': 500, | |
286 | 'info_dict': { | |
287 | 'id': 'machine_learning', | |
288 | 'title': 'machine learning', | |
289 | }, | |
290 | }, { | |
291 | 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/', | |
292 | 'playlist_mincount': 77, | |
293 | 'info_dict': { | |
294 | 'id': 'gravitational_waves', | |
295 | 'title': 'gravitational waves' | |
296 | }, | |
297 | }, { | |
298 | 'url': 'https://videos.cncf.io/topic/prometheus/', | |
299 | 'playlist_mincount': 134, | |
300 | 'info_dict': { | |
301 | 'id': 'prometheus', | |
302 | 'title': 'prometheus', | |
303 | }, | |
304 | }] | |
305 | ||
306 | def _get_topic_page(self, topic, org_id, search_id, api_key, page=1, note=None): | |
307 | return self._download_json( | |
308 | 'https://es.videoken.com/api/v1.0/get_results', topic, fatal=False, query={ | |
309 | 'orgid': org_id, | |
310 | 'size': self._PAGE_SIZE, | |
311 | 'query': topic, | |
312 | 'page': page, | |
313 | 'sort': 'upload_desc', | |
314 | 'filter': 'all', | |
315 | 'token': api_key, | |
316 | 'is_topic': 'true', | |
317 | 'category': '', | |
318 | 'searchid': search_id, | |
319 | }, headers={'Accept': 'application/json'}, | |
320 | note=note if note else f'Downloading topic page {page}') or {} | |
321 | ||
322 | def _entries(self, topic, org_id, search_id, api_key, url, page): | |
323 | videos = self._get_topic_page(topic, org_id, search_id, api_key, page + 1) | |
324 | yield from self._extract_videos(videos, url) | |
325 | ||
326 | def _real_extract(self, url): | |
327 | hostname, topic_id = self._match_valid_url(url).group('host', 'id') | |
328 | topic = urllib.parse.unquote(topic_id) | |
329 | topic_id = topic.replace(' ', '_') | |
330 | org_id, api_key = self._get_org_id_and_api_key(self._ORGANIZATIONS[hostname], topic) | |
331 | search_id = base64.b64encode(f':{topic}:{int(time.time())}:transient'.encode()).decode() | |
332 | total_pages = int_or_none(self._get_topic_page( | |
333 | topic, org_id, search_id, api_key, note='Downloading topic info')['total_no_of_pages']) | |
334 | return self.playlist_result(InAdvancePagedList( | |
335 | functools.partial(self._entries, topic, org_id, search_id, api_key, url), | |
336 | total_pages, self._PAGE_SIZE), topic_id, topic) |