8 from .common
import InfoExtractor
9 from .slideslive
import SlidesLiveIE
20 class VideoKenBaseIE(InfoExtractor
):
22 'videos.icts.res.in': 'icts',
23 'videos.cncf.io': 'cncf',
24 'videos.neurips.cc': 'neurips',
26 _BASE_URL_RE
= rf
'https?://(?P<host>{"|".join(map(re.escape, _ORGANIZATIONS))})/'
30 def _get_org_id_and_api_key(self
, org
, video_id
):
31 details
= self
._download
_json
(
32 f
'https://analytics.videoken.com/api/videolake/{org}/details', video_id
,
33 note
='Downloading organization ID and API key', headers
={
34 'Accept': 'application/json',
36 return details
['id'], details
['apikey']
38 def _create_slideslive_url(self
, video_url
, video_id
, referer
):
39 if not video_url
and not video_id
:
41 elif not video_url
or 'embed/sign-in' in video_url
:
42 video_url
= f
'https://slideslive.com/embed/{video_id.lstrip("slideslive-")}'
43 if url_or_none(referer
):
44 return update_url_query(video_url
, {
45 'embed_parent_url': referer
,
46 'embed_container_origin': f
'https://{urllib.parse.urlparse(referer).netloc}',
50 def _extract_videos(self
, videos
, url
):
51 for video
in traverse_obj(videos
, (('videos', 'results'), ...)):
52 video_id
= traverse_obj(video
, 'youtube_id', 'videoid')
56 if traverse_obj(video
, 'type', 'source') == 'youtube':
60 video_url
= traverse_obj(video
, 'embed_url', 'embeddableurl')
61 if urllib
.parse
.urlparse(video_url
).netloc
== 'slideslive.com':
63 video_url
= self
._create
_slideslive
_url
(video_url
, video_id
, url
)
66 yield self
.url_result(video_url
, ie_key
, video_id
)
69 class VideoKenIE(VideoKenBaseIE
):
70 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'(?:(?:topic|category)/[^/#?]+/)?video/(?P<id>[\w-]+)'
72 # neurips -> videoken -> slideslive
73 'url': 'https://videos.neurips.cc/video/slideslive-38922815',
77 'title': 'Efficient Processing of Deep Neural Network: from Algorithms to Hardware Architectures',
78 'timestamp': 1630939331,
79 'upload_date': '20210906',
80 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
81 'thumbnails': 'count:330',
82 'chapters': 'count:329',
85 'skip_download': 'm3u8',
87 'expected_warnings': ['Failed to download VideoKen API JSON'],
89 # neurips -> videoken -> slideslive -> youtube
90 'url': 'https://videos.neurips.cc/topic/machine%20learning/video/slideslive-38923348',
94 'display_id': '38923348',
95 'title': 'Machine Education',
96 'description': 'Watch full version of this video at https://slideslive.com/38923348.',
97 'channel': 'SlidesLive Videos - G2',
98 'channel_id': 'UCOExahQQ588Da8Nft_Ltb9w',
99 'channel_url': 'https://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
100 'uploader': 'SlidesLive Videos - G2',
101 'uploader_id': 'UCOExahQQ588Da8Nft_Ltb9w',
102 'uploader_url': 'http://www.youtube.com/channel/UCOExahQQ588Da8Nft_Ltb9w',
104 'timestamp': 1618922125,
105 'upload_date': '20200131',
107 'channel_follower_count': int,
109 'availability': 'unlisted',
110 'live_status': 'not_live',
111 'playable_in_embed': True,
112 'categories': ['People & Blogs'],
114 'thumbnail': r
're:^https?://.*\.(?:jpg|webp)',
115 'thumbnails': 'count:78',
116 'chapters': 'count:77',
119 'skip_download': 'm3u8',
121 'expected_warnings': ['Failed to download VideoKen API JSON'],
123 # icts -> videoken -> youtube
124 'url': 'https://videos.icts.res.in/topic/random%20variable/video/zysIsojYdvc',
128 'title': 'Small-worlds, complex networks and random graphs (Lecture 3) by Remco van der Hofstad',
129 'description': 'md5:87433069d79719eeadc1962cc2ace00b',
130 'channel': 'International Centre for Theoretical Sciences',
131 'channel_id': 'UCO3xnVTHzB7l-nc8mABUJIQ',
132 'channel_url': 'https://www.youtube.com/channel/UCO3xnVTHzB7l-nc8mABUJIQ',
133 'uploader': 'International Centre for Theoretical Sciences',
134 'uploader_id': 'ICTStalks',
135 'uploader_url': 'http://www.youtube.com/user/ICTStalks',
137 'upload_date': '20191004',
139 'live_status': 'not_live',
140 'availability': 'public',
141 'playable_in_embed': True,
142 'channel_follower_count': int,
145 'categories': ['Science & Technology'],
147 'thumbnail': r
're:^https?://.*\.(?:jpg|webp)',
148 'thumbnails': 'count:42',
149 'chapters': 'count:20',
152 'skip_download': 'm3u8',
155 'url': 'https://videos.cncf.io/category/478/video/IL4nxbmUIX8',
156 'only_matching': True,
158 'url': 'https://videos.cncf.io/topic/kubernetes/video/YAM2d7yTrrI',
159 'only_matching': True,
161 'url': 'https://videos.icts.res.in/video/d7HuP_abpKU',
162 'only_matching': True,
165 def _real_extract(self
, url
):
166 hostname
, video_id
= self
._match
_valid
_url
(url
).group('host', 'id')
167 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], video_id
)
168 details
= self
._download
_json
(
169 'https://analytics.videoken.com/api/videoinfo_private', video_id
, query
={
172 }, headers
={'Accept': 'application/json'}
, note
='Downloading VideoKen API JSON',
173 errnote
='Failed to download VideoKen API JSON', fatal
=False)
175 return next(self
._extract
_videos
({'videos': [details]}
, url
))
176 # fallback for API error 400 response
177 elif video_id
.startswith('slideslive-'):
178 return self
.url_result(
179 self
._create
_slideslive
_url
(None, video_id
, url
), SlidesLiveIE
, video_id
)
180 elif re
.match(r
'^[\w-]{11}$', video_id
):
181 self
.url_result(video_id
, 'Youtube', video_id
)
183 raise ExtractorError('Unable to extract without VideoKen API response')
186 class VideoKenPlayerIE(VideoKenBaseIE
):
187 _VALID_URL
= r
'https?://player\.videoken\.com/embed/slideslive-(?P<id>\d+)'
189 'url': 'https://player.videoken.com/embed/slideslive-38968434',
193 'title': 'Deep Learning with Label Differential Privacy',
194 'timestamp': 1643377020,
195 'upload_date': '20220128',
196 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
197 'thumbnails': 'count:30',
198 'chapters': 'count:29',
201 'skip_download': 'm3u8',
205 def _real_extract(self
, url
):
206 video_id
= self
._match
_id
(url
)
207 return self
.url_result(
208 self
._create
_slideslive
_url
(None, video_id
, url
), SlidesLiveIE
, video_id
)
211 class VideoKenPlaylistIE(VideoKenBaseIE
):
212 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'(?:category/\d+/)?playlist/(?P<id>\d+)'
214 'url': 'https://videos.icts.res.in/category/1822/playlist/381',
215 'playlist_mincount': 117,
218 'title': 'Cosmology - The Next Decade',
222 def _real_extract(self
, url
):
223 hostname
, playlist_id
= self
._match
_valid
_url
(url
).group('host', 'id')
224 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], playlist_id
)
225 videos
= self
._download
_json
(
226 f
'https://analytics.videoken.com/api/{org_id}/playlistitems/{playlist_id}/',
227 playlist_id
, headers
={'Accept': 'application/json'}
, note
='Downloading API JSON')
228 return self
.playlist_result(self
._extract
_videos
(videos
, url
), playlist_id
, videos
.get('title'))
231 class VideoKenCategoryIE(VideoKenBaseIE
):
232 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'category/(?P<id>\d+)/?(?:$|[?#])'
234 'url': 'https://videos.icts.res.in/category/1822/',
235 'playlist_mincount': 500,
241 'url': 'https://videos.neurips.cc/category/350/',
242 'playlist_mincount': 34,
245 'title': 'NeurIPS 2018',
248 'url': 'https://videos.cncf.io/category/479/',
249 'playlist_mincount': 328,
252 'title': 'KubeCon + CloudNativeCon Europe\'19',
256 def _get_category_page(self
, category_id
, org_id
, page
=1, note
=None):
257 return self
._download
_json
(
258 f
'https://analytics.videoken.com/api/videolake/{org_id}/category_videos', category_id
,
259 fatal
=False, note
=note
if note
else f
'Downloading category page {page}',
261 'category_id': category_id
,
263 'length': self
._PAGE
_SIZE
,
264 }, headers
={'Accept': 'application/json'}
) or {}
266 def _entries(self
, category_id
, org_id
, url
, page
):
267 videos
= self
._get
_category
_page
(category_id
, org_id
, page
+ 1)
268 yield from self
._extract
_videos
(videos
, url
)
270 def _real_extract(self
, url
):
271 hostname
, category_id
= self
._match
_valid
_url
(url
).group('host', 'id')
272 org_id
, _
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], category_id
)
273 category_info
= self
._get
_category
_page
(category_id
, org_id
, note
='Downloading category info')
274 category
= category_info
['category_name']
275 total_pages
= math
.ceil(int(category_info
['recordsTotal']) / self
._PAGE
_SIZE
)
276 return self
.playlist_result(InAdvancePagedList(
277 functools
.partial(self
._entries
, category_id
, org_id
, url
),
278 total_pages
, self
._PAGE
_SIZE
), category_id
, category
)
281 class VideoKenTopicIE(VideoKenBaseIE
):
282 _VALID_URL
= VideoKenBaseIE
._BASE
_URL
_RE
+ r
'topic/(?P<id>[^/#?]+)/?(?:$|[?#])'
284 'url': 'https://videos.neurips.cc/topic/machine%20learning/',
285 'playlist_mincount': 500,
287 'id': 'machine_learning',
288 'title': 'machine learning',
291 'url': 'https://videos.icts.res.in/topic/gravitational%20waves/',
292 'playlist_mincount': 77,
294 'id': 'gravitational_waves',
295 'title': 'gravitational waves'
298 'url': 'https://videos.cncf.io/topic/prometheus/',
299 'playlist_mincount': 134,
302 'title': 'prometheus',
306 def _get_topic_page(self
, topic
, org_id
, search_id
, api_key
, page
=1, note
=None):
307 return self
._download
_json
(
308 'https://es.videoken.com/api/v1.0/get_results', topic
, fatal
=False, query
={
310 'size': self
._PAGE
_SIZE
,
313 'sort': 'upload_desc',
318 'searchid': search_id
,
319 }, headers
={'Accept': 'application/json'}
,
320 note
=note
if note
else f
'Downloading topic page {page}') or {}
322 def _entries(self
, topic
, org_id
, search_id
, api_key
, url
, page
):
323 videos
= self
._get
_topic
_page
(topic
, org_id
, search_id
, api_key
, page
+ 1)
324 yield from self
._extract
_videos
(videos
, url
)
326 def _real_extract(self
, url
):
327 hostname
, topic_id
= self
._match
_valid
_url
(url
).group('host', 'id')
328 topic
= urllib
.parse
.unquote(topic_id
)
329 topic_id
= topic
.replace(' ', '_')
330 org_id
, api_key
= self
._get
_org
_id
_and
_api
_key
(self
._ORGANIZATIONS
[hostname
], topic
)
331 search_id
= base64
.b64encode(f
':{topic}:{int(time.time())}:transient'.encode()).decode()
332 total_pages
= int_or_none(self
._get
_topic
_page
(
333 topic
, org_id
, search_id
, api_key
, note
='Downloading topic info')['total_no_of_pages'])
334 return self
.playlist_result(InAdvancePagedList(
335 functools
.partial(self
._entries
, topic
, org_id
, search_id
, api_key
, url
),
336 total_pages
, self
._PAGE
_SIZE
), topic_id
, topic
)