3 import xml
.etree
.ElementTree
5 from .common
import InfoExtractor
19 class SlidesLiveIE(InfoExtractor
):
20 _VALID_URL
= r
'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
22 # service_name = yoda, only XML slides info
23 'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
27 'title': 'GCC IA16 backend',
28 'timestamp': 1648189972,
29 'upload_date': '20220325',
30 'thumbnail': r
're:^https?://.*\.jpg',
31 'thumbnails': 'count:42',
32 'chapters': 'count:41',
36 'skip_download': 'm3u8',
39 # service_name = yoda, /v7/ slides
40 'url': 'https://slideslive.com/38935785',
44 'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
45 'upload_date': '20211115',
46 'timestamp': 1636996003,
47 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
48 'thumbnails': 'count:640',
49 'chapters': 'count:639',
53 'skip_download': 'm3u8',
56 # service_name = yoda, /v1/ slides
57 'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
61 'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
62 'upload_date': '20220201',
63 'thumbnail': r
're:^https?://.*\.jpg',
64 'timestamp': 1643728135,
65 'thumbnails': 'count:3',
66 'chapters': 'count:2',
70 'skip_download': 'm3u8',
73 # service_name = youtube, only XML slides info
74 'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
75 'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
78 'display_id': '38897546',
80 'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
81 'description': 'Watch full version of this video at https://slideslive.com/38897546.',
82 'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
83 'channel': 'SlidesLive Videos - G1',
84 'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
85 'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
86 'uploader': 'SlidesLive Videos - G1',
87 'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
88 'live_status': 'not_live',
89 'upload_date': '20160710',
90 'timestamp': 1618786715,
95 'channel_follower_count': int,
97 'thumbnail': r
're:^https?://.*\.(?:jpg|webp)',
98 'thumbnails': 'count:169',
99 'playable_in_embed': True,
100 'availability': 'unlisted',
102 'categories': ['People & Blogs'],
103 'chapters': 'count:168',
106 # embed-only presentation, only XML slides info
107 'url': 'https://slideslive.com/embed/presentation/38925850',
111 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
112 'thumbnail': r
're:^https?://.*\.jpg',
113 'thumbnails': 'count:8',
114 'timestamp': 1629671508,
115 'upload_date': '20210822',
116 'chapters': 'count:7',
120 'skip_download': 'm3u8',
123 # embed-only presentation, only JSON slides info, /v5/ slides (.png)
124 'url': 'https://slideslive.com/38979920/',
128 'title': 'MoReL: Multi-omics Relational Learning',
129 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
130 'thumbnails': 'count:7',
131 'timestamp': 1654714970,
132 'upload_date': '20220608',
133 'chapters': 'count:6',
137 'skip_download': 'm3u8',
141 'url': 'https://slideslive.com/38954074',
145 'title': 'Decentralized Attribution of Generative Models',
146 'thumbnail': r
're:^https?://.*\.jpg',
147 'thumbnails': 'count:16',
148 'timestamp': 1622806321,
149 'upload_date': '20210604',
150 'chapters': 'count:15',
154 'skip_download': 'm3u8',
158 'url': 'https://slideslive.com/38979570/',
162 'title': 'Efficient Active Search for Combinatorial Optimization Problems',
163 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
164 'thumbnails': 'count:9',
165 'timestamp': 1654714896,
166 'upload_date': '20220608',
167 'chapters': 'count:8',
171 'skip_download': 'm3u8',
175 'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
179 'title': 'The Representation Power of Neural Networks',
180 'timestamp': 1654714962,
181 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
182 'thumbnails': 'count:22',
183 'upload_date': '20220608',
184 'chapters': 'count:21',
188 'skip_download': 'm3u8',
191 # /v7/ slides, 2 video slides
192 'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
195 'id': '38979682-playlist',
196 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
202 'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
203 'timestamp': 1654714920,
204 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
205 'thumbnails': 'count:30',
206 'upload_date': '20220608',
207 'chapters': 'count:31',
212 'id': '38979682-021',
214 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
216 'timestamp': 1654714920,
217 'upload_date': '20220608',
221 'id': '38979682-024',
223 'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
225 'timestamp': 1654714920,
226 'upload_date': '20220608',
230 'skip_download': 'm3u8',
233 # /v6/ slides, 1 video slide, edit.videoken.com embed
234 'url': 'https://slideslive.com/38979481/',
237 'id': '38979481-playlist',
238 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
244 'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
245 'timestamp': 1654714877,
246 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
247 'thumbnails': 'count:43',
248 'upload_date': '20220608',
249 'chapters': 'count:43',
254 'id': '38979481-013',
256 'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
258 'timestamp': 1654714877,
259 'upload_date': '20220608',
263 'skip_download': 'm3u8',
266 # /v3/ slides, .jpg and .png, service_name = youtube
267 'url': 'https://slideslive.com/embed/38932460/',
270 'display_id': '38932460',
272 'title': 'Active Learning for Hierarchical Multi-Label Classification',
273 'description': 'Watch full version of this video at https://slideslive.com/38932460.',
274 'channel': 'SlidesLive Videos - A',
275 'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
276 'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
277 'uploader': 'SlidesLive Videos - A',
278 'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
279 'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
280 'upload_date': '20200903',
281 'timestamp': 1602599092,
284 'live_status': 'not_live',
285 'playable_in_embed': True,
286 'availability': 'unlisted',
287 'categories': ['People & Blogs'],
289 'channel_follower_count': int,
292 'thumbnail': r
're:^https?://.*\.(?:jpg|png|webp)',
293 'thumbnails': 'count:21',
294 'chapters': 'count:20',
297 'skip_download': 'm3u8',
300 # /v3/ slides, .png only, service_name = yoda
301 'url': 'https://slideslive.com/38983994',
305 'title': 'Zero-Shot AutoML with Pretrained Models',
306 'timestamp': 1662384834,
307 'upload_date': '20220905',
308 'thumbnail': r
're:^https?://.*\.(?:jpg|png)',
309 'thumbnails': 'count:23',
310 'chapters': 'count:22',
314 'skip_download': 'm3u8',
317 # service_name = yoda
318 'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
319 'only_matching': True,
321 # dead link, service_name = url
322 'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
323 'only_matching': True,
325 # dead link, service_name = vimeo
326 'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
327 'only_matching': True,
331 # only XML slides info
332 'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
336 'title': 'Towards a Deep Network Architecture for Structured Smoothness',
337 'thumbnail': r
're:^https?://.*\.jpg',
338 'thumbnails': 'count:8',
339 'timestamp': 1629671508,
340 'upload_date': '20210822',
341 'chapters': 'count:7',
345 'skip_download': 'm3u8',
350 def _extract_embed_urls(cls
, url
, webpage
):
351 # Reference: https://slideslive.com/embed_presentation.js
352 for embed_id
in re
.findall(r
'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d
+)["\']', webpage):
353 url_parsed = urllib.parse.urlparse(url)
354 origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
355 yield update_url_query(
356 f'https://slideslive.com/embed/presentation/{embed_id}', {
357 'embed_parent_url': url,
358 'embed_container_origin': origin,
361 def _download_embed_webpage_handle(self, video_id, headers):
362 return self._download_webpage_handle(
363 f'https://slideslive.com/embed/presentation/{video_id}', video_id,
364 headers=headers, query=traverse_obj(headers, {
365 'embed_parent_url': 'Referer',
366 'embed_container_origin': 'Origin',
369 def _extract_custom_m3u8_info(self, m3u8_data):
373 'PRESENTATION-TITLE': 'title',
374 'PRESENTATION-UPDATED-AT': 'timestamp',
375 'PRESENTATION-THUMBNAIL': 'thumbnail',
376 'PLAYLIST-TYPE': 'playlist_type',
377 'VOD-VIDEO-SERVICE-NAME': 'service_name',
378 'VOD-VIDEO-ID': 'service_id',
379 'VOD-VIDEO-SERVERS': 'video_servers',
380 'VOD-SUBTITLES': 'subtitles',
381 'VOD-SLIDES-JSON-URL': 'slides_json_url',
382 'VOD-SLIDES-XML-URL': 'slides_xml_url',
385 for line in m3u8_data.splitlines():
386 if not line.startswith('#EXT-SL-'):
388 tag, _, value = line.partition(':')
389 key = lookup.get(tag.lstrip('#EXT-SL-'))
392 m3u8_dict[key] = value
394 # Some values are stringified JSON arrays
395 for key in ('video_servers', 'subtitles'):
397 m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
401 def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
402 formats, duration = [], None
404 hls_formats = self._extract_m3u8_formats(
405 f'https://{cdn_hostname}/{path}/master.m3u8',
406 video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
408 if not skip_duration:
409 duration = self._extract_m3u8_vod_duration(
410 hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
411 formats.extend(hls_formats)
413 dash_formats = self._extract_mpd_formats(
414 f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
416 if not duration and not skip_duration:
417 duration = self._extract_mpd_vod_duration(
418 f'https://{cdn_hostname}/{path}/master.mpd', video_id,
419 note='Extracting duration from DASH manifest')
420 formats.extend(dash_formats)
422 return formats, duration
424 def _real_extract(self, url):
425 video_id = self._match_id(url)
426 webpage, urlh = self._download_embed_webpage_handle(
427 video_id, headers=traverse_obj(parse_qs(url), {
428 'Referer': ('embed_parent_url', -1),
429 'Origin': ('embed_container_origin', -1)}))
430 redirect_url = urlh.url
431 if 'domain_not_allowed' in redirect_url:
432 domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
434 raise ExtractorError(
435 'This is an embed-only presentation. Try passing --referer', expected=True)
436 webpage, _ = self._download_embed_webpage_handle(video_id, headers={
437 'Referer': f'https://{domain}/',
438 'Origin': f'https://{domain}',
441 player_token = self._search_regex(r'data-player-token="([^
"]+)"', webpage, 'player token
')
442 player_data = self._download_webpage(
443 f'https
://ben
.slideslive
.com
/player
/{video_id}
', video_id,
444 note='Downloading player info
', query={'player_token': player_token})
445 player_info = self._extract_custom_m3u8_info(player_data)
447 service_name = player_info['service_name
'].lower()
448 assert service_name in ('url
', 'yoda
', 'vimeo
', 'youtube
')
449 service_id = player_info['service_id
']
451 slide_url_template = 'https
://slides
.slideslive
.com
/%s/slides
/original
/%s%s'
452 slides, slides_info = {}, []
454 if player_info.get('slides_json_url
'):
455 slides = self._download_json(
456 player_info['slides_json_url
'], video_id, fatal=False,
457 note='Downloading slides JSON
', errnote=False) or {}
458 slide_ext_default = '.png
'
459 slide_quality = traverse_obj(slides, ('slide_qualities
', 0))
461 slide_ext_default = '.jpg
'
462 slide_url_template = f'https
://cdn
.slideslive
.com
/data
/presentations
/%s/slides
/{slide_quality}
/%s%s'
463 for slide_id, slide in enumerate(traverse_obj(slides, ('slides
', ...), expected_type=dict), 1):
465 slide_id, traverse_obj(slide, ('image
', 'name
')),
466 traverse_obj(slide, ('image
', 'extname
'), default=slide_ext_default),
467 int_or_none(slide.get('time
'), scale=1000)))
469 if not slides and player_info.get('slides_xml_url
'):
470 slides = self._download_xml(
471 player_info['slides_xml_url
'], video_id, fatal=False,
472 note='Downloading slides XML
', errnote='Failed to download slides info
')
473 if isinstance(slides, xml.etree.ElementTree.Element):
474 slide_url_template = 'https
://cdn
.slideslive
.com
/data
/presentations
/%s/slides
/big
/%s%s'
475 for slide_id, slide in enumerate(slides.findall('./slide
')):
477 slide_id, xpath_text(slide, './slideName
', 'name
'), '.jpg
',
478 int_or_none(xpath_text(slide, './timeSec
', 'time
'))))
480 chapters, thumbnails = [], []
481 if url_or_none(player_info.get('thumbnail
')):
482 thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
483 for slide_id, slide_path, slide_ext, start_time in slides_info:
486 'id': f'{slide_id:03d}
',
487 'url
': slide_url_template % (video_id, slide_path, slide_ext),
490 'title
': f'Slide {slide_id:03d}
',
491 'start_time
': start_time,
495 for sub in traverse_obj(player_info, ('subtitles
', ...), expected_type=dict):
496 webvtt_url = url_or_none(sub.get('webvtt_url
'))
499 subtitles.setdefault(sub.get('language
') or 'en
', []).append({
506 'title
': player_info.get('title
') or self._html_search_meta('title
', webpage, default=''),
507 'timestamp
': unified_timestamp(player_info.get('timestamp
')),
508 'is_live
': player_info.get('playlist_type
') != 'vod
',
509 'thumbnails
': thumbnails,
510 'chapters
': chapters,
511 'subtitles
': subtitles,
514 if service_name == 'url
':
515 info['url
'] = service_id
516 elif service_name == 'yoda
':
517 formats, duration = self._extract_formats_and_duration(
518 player_info['video_servers
'][0], service_id, video_id)
520 'duration
': duration,
525 '_type
': 'url_transparent
',
527 'ie_key
': service_name.capitalize(),
528 'display_id
': video_id,
530 if service_name == 'vimeo
':
531 info['url
'] = smuggle_url(
532 f'https
://player
.vimeo
.com
/video
/{service_id}
',
535 video_slides = traverse_obj(slides, ('slides
', ..., 'video
', 'id'))
542 service_data = self._download_json(
543 f'https
://ben
.slideslive
.com
/player
/{video_id}
/slides_video_service_data
',
544 video_id, fatal=False, query={
545 'player_token
': player_token,
546 'videos
': ','.join(video_slides),
547 }, note='Downloading video slides info
', errnote='Failed to download video slides info
') or {}
549 for slide_id, slide in enumerate(traverse_obj(slides, ('slides
', ...)), 1):
550 if not traverse_obj(slide, ('video
', 'service
')) == 'yoda
':
552 video_path = traverse_obj(slide, ('video
', 'id'))
553 cdn_hostname = traverse_obj(service_data, (
554 video_path, 'video_servers
', ...), get_all=False)
555 if not cdn_hostname or not video_path:
557 formats, _ = self._extract_formats_and_duration(
558 cdn_hostname, video_path, video_id, skip_duration=True)
562 'id': f'{video_id}
-{slide_id:03d}
',
563 'title
': f'{info["title"]}
- Slide {slide_id:03d}
',
564 'timestamp
': info['timestamp
'],
565 'duration
': int_or_none(traverse_obj(slide, ('video
', 'duration_ms
')), scale=1000),
569 return self.playlist_result(entries(), f'{video_id}
-playlist
', info['title
'])