[yt-dlp.git] / yt_dlp / extractor / slideslive.py

import re
import urllib.parse
import xml.etree.ElementTree

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    smuggle_url,
    traverse_obj,
    unified_timestamp,
    update_url_query,
    url_or_none,
    xpath_text,
)


class SlidesLiveIE(InfoExtractor):
    _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
    _TESTS = [{
        # service_name = yoda, only XML slides info
        'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
        'info_dict': {
            'id': '38902413',
            'ext': 'mp4',
            'title': 'GCC IA16 backend',
            'timestamp': 1697793372,
            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:42',
            'chapters': 'count:41',
            'duration': 1638,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda, /v7/ slides
        'url': 'https://slideslive.com/38935785',
        'info_dict': {
            'id': '38935785',
            'ext': 'mp4',
            'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
            'upload_date': '20231020',
            'timestamp': 1697807002,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:640',
            'chapters': 'count:639',
            'duration': 9832,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda, /v1/ slides
        'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
        'info_dict': {
            'id': '38973182',
            'ext': 'mp4',
            'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1697822521,
            'thumbnails': 'count:3',
            'chapters': 'count:2',
            'duration': 5889,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # formerly youtube, converted to native
        'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
        'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
        'info_dict': {
            'id': '38897546',
            'ext': 'mp4',
            'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
            'thumbnail': r're:^https?://.*\.jpg',
            'upload_date': '20231029',
            'timestamp': 1698588144,
            'thumbnails': 'count:169',
            'chapters': 'count:168',
            'duration': 6827,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # embed-only presentation, only XML slides info
        'url': 'https://slideslive.com/embed/presentation/38925850',
        'info_dict': {
            'id': '38925850',
            'ext': 'mp4',
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
            'timestamp': 1697803109,
            'upload_date': '20231020',
            'chapters': 'count:7',
            'duration': 326,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # embed-only presentation, only JSON slides info, /v5/ slides (.png)
        'url': 'https://slideslive.com/38979920/',
        'info_dict': {
            'id': '38979920',
            'ext': 'mp4',
            'title': 'MoReL: Multi-omics Relational Learning',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:7',
            'timestamp': 1697824939,
            'upload_date': '20231020',
            'chapters': 'count:6',
            'duration': 171,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v2/ slides (.jpg)
        'url': 'https://slideslive.com/38954074',
        'info_dict': {
            'id': '38954074',
            'ext': 'mp4',
            'title': 'Decentralized Attribution of Generative Models',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:16',
            'timestamp': 1697814901,
            'upload_date': '20231020',
            'chapters': 'count:15',
            'duration': 306,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v4/ slides (.png)
        'url': 'https://slideslive.com/38979570/',
        'info_dict': {
            'id': '38979570',
            'ext': 'mp4',
            'title': 'Efficient Active Search for Combinatorial Optimization Problems',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:9',
            'timestamp': 1697824757,
            'upload_date': '20231020',
            'chapters': 'count:8',
            'duration': 295,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v10/ slides
        'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
        'info_dict': {
            'id': '38979880',
            'ext': 'mp4',
            'title': 'The Representation Power of Neural Networks',
            'timestamp': 1697824919,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:22',
            'upload_date': '20231020',
            'chapters': 'count:21',
            'duration': 294,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v7/ slides, 2 video slides
        'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
        'playlist_count': 3,
        'info_dict': {
            'id': '38979682-playlist',
            'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
        },
        'playlist': [{
            'info_dict': {
                'id': '38979682',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
                'timestamp': 1697824815,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:30',
                'upload_date': '20231020',
                'chapters': 'count:31',
                'duration': 272,
            },
        }, {
            'info_dict': {
                'id': '38979682-021',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
                'duration': 3,
                'timestamp': 1697824815,
                'upload_date': '20231020',
            },
        }, {
            'info_dict': {
                'id': '38979682-024',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
                'duration': 4,
                'timestamp': 1697824815,
                'upload_date': '20231020',
            },
        }],
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v6/ slides, 1 video slide, edit.videoken.com embed
        'url': 'https://slideslive.com/38979481/',
        'playlist_count': 2,
        'info_dict': {
            'id': '38979481-playlist',
            'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
        },
        'playlist': [{
            'info_dict': {
                'id': '38979481',
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
                'timestamp': 1697824716,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:43',
                'upload_date': '20231020',
                'chapters': 'count:43',
                'duration': 315,
            },
        }, {
            'info_dict': {
                'id': '38979481-013',
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
                'duration': 3,
                'timestamp': 1697824716,
                'upload_date': '20231020',
            },
        }],
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v3/ slides, .jpg and .png, service_name = youtube
        'url': 'https://slideslive.com/embed/38932460/',
        'info_dict': {
            'id': 'RTPdrgkyTiE',
            'display_id': '38932460',
            'ext': 'mp4',
            'title': 'Active Learning for Hierarchical Multi-Label Classification',
            'description': 'Watch full version of this video at https://slideslive.com/38932460.',
            'channel': 'SlidesLive Videos - A',
            'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
            'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
            'uploader': 'SlidesLive Videos - A',
            'uploader_id': '@slideslivevideos-a6075',
            'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
            'upload_date': '20200903',
            'timestamp': 1697805922,
            'duration': 942,
            'age_limit': 0,
            'live_status': 'not_live',
            'playable_in_embed': True,
            'availability': 'unlisted',
            'categories': ['People & Blogs'],
            'tags': [],
            'channel_follower_count': int,
            'like_count': int,
            'view_count': int,
            'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
            'thumbnails': 'count:21',
            'chapters': 'count:20',
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v3/ slides, .png only, service_name = yoda
        'url': 'https://slideslive.com/38983994',
        'info_dict': {
            'id': '38983994',
            'ext': 'mp4',
            'title': 'Zero-Shot AutoML with Pretrained Models',
            'timestamp': 1697826708,
            'upload_date': '20231020',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:23',
            'chapters': 'count:22',
            'duration': 295,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda
        'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
        'only_matching': True,
    }, {
        # dead link, service_name = url
        'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
        'only_matching': True,
    }, {
        # dead link, service_name = vimeo
        'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
        'only_matching': True,
    }]

    _WEBPAGE_TESTS = [{
        # only XML slides info
        'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
        'info_dict': {
            'id': '38925850',
            'ext': 'mp4',
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
            'timestamp': 1697803109,
            'upload_date': '20231020',
            'chapters': 'count:7',
            'duration': 326,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }]

    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        # Reference: https://slideslive.com/embed_presentation.js
        for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage):
            url_parsed = urllib.parse.urlparse(url)
            origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
            yield update_url_query(
                f'https://slideslive.com/embed/presentation/{embed_id}', {
                    'embed_parent_url': url,
                    'embed_container_origin': origin,
                })

    def _download_embed_webpage_handle(self, video_id, headers):
        return self._download_webpage_handle(
            f'https://slideslive.com/embed/presentation/{video_id}', video_id,
            headers=headers, query=traverse_obj(headers, {
                'embed_parent_url': 'Referer',
                'embed_container_origin': 'Origin',
            }))

    def _extract_custom_m3u8_info(self, m3u8_data):
        m3u8_dict = {}

        lookup = {
            'PRESENTATION-TITLE': 'title',
            'PRESENTATION-UPDATED-AT': 'timestamp',
            'PRESENTATION-THUMBNAIL': 'thumbnail',
            'PLAYLIST-TYPE': 'playlist_type',
            'VOD-VIDEO-SERVICE-NAME': 'service_name',
            'VOD-VIDEO-ID': 'service_id',
            'VOD-VIDEO-SERVERS': 'video_servers',
            'VOD-SUBTITLES': 'subtitles',
            'VOD-SLIDES-JSON-URL': 'slides_json_url',
            'VOD-SLIDES-XML-URL': 'slides_xml_url',
        }

        for line in m3u8_data.splitlines():
            if not line.startswith('#EXT-SL-'):
                continue
            tag, _, value = line.partition(':')
            key = lookup.get(tag[8:])
            if not key:
                continue
            m3u8_dict[key] = value

        # Some values are stringified JSON arrays
        for key in ('video_servers', 'subtitles'):
            if key in m3u8_dict:
                m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []

        return m3u8_dict

    def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
        formats, duration = [], None

        hls_formats = self._extract_m3u8_formats(
            f'https://{cdn_hostname}/{path}/master.m3u8',
            video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
        if hls_formats:
            if not skip_duration:
                duration = self._extract_m3u8_vod_duration(
                    hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
            formats.extend(hls_formats)

        dash_formats = self._extract_mpd_formats(
            f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
        if dash_formats:
            if not duration and not skip_duration:
                duration = self._extract_mpd_vod_duration(
                    f'https://{cdn_hostname}/{path}/master.mpd', video_id,
                    note='Extracting duration from DASH manifest')
            formats.extend(dash_formats)

        return formats, duration

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage, urlh = self._download_embed_webpage_handle(
            video_id, headers=traverse_obj(parse_qs(url), {
                'Referer': ('embed_parent_url', -1),
                'Origin': ('embed_container_origin', -1)}))
        redirect_url = urlh.url
        if 'domain_not_allowed' in redirect_url:
            domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
            if not domain:
                raise ExtractorError(
                    'This is an embed-only presentation. Try passing --referer', expected=True)
            webpage, _ = self._download_embed_webpage_handle(video_id, headers={
                'Referer': f'https://{domain}/',
                'Origin': f'https://{domain}',
            })

        player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
        player_data = self._download_webpage(
            f'https://ben.slideslive.com/player/{video_id}', video_id,
            note='Downloading player info', query={'player_token': player_token})
        player_info = self._extract_custom_m3u8_info(player_data)

        service_name = player_info['service_name'].lower()
        assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
        service_id = player_info['service_id']

        slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
        slides, slides_info = {}, []

        if player_info.get('slides_json_url'):
            slides = self._download_json(
                player_info['slides_json_url'], video_id, fatal=False,
                note='Downloading slides JSON', errnote=False) or {}
            slide_ext_default = '.png'
            slide_quality = traverse_obj(slides, ('slide_qualities', 0))
            if slide_quality:
                slide_ext_default = '.jpg'
                slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
            for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
                slides_info.append((
                    slide_id, traverse_obj(slide, ('image', 'name')),
                    traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
                    int_or_none(slide.get('time'), scale=1000)))

        if not slides and player_info.get('slides_xml_url'):
            slides = self._download_xml(
                player_info['slides_xml_url'], video_id, fatal=False,
                note='Downloading slides XML', errnote='Failed to download slides info')
            if isinstance(slides, xml.etree.ElementTree.Element):
                slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
                for slide_id, slide in enumerate(slides.findall('./slide')):
                    slides_info.append((
                        slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
                        int_or_none(xpath_text(slide, './timeSec', 'time'))))

        chapters, thumbnails = [], []
        if url_or_none(player_info.get('thumbnail')):
            thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
        for slide_id, slide_path, slide_ext, start_time in slides_info:
            if slide_path:
                thumbnails.append({
                    'id': f'{slide_id:03d}',
                    'url': slide_url_template % (video_id, slide_path, slide_ext),
                })
            chapters.append({
                'title': f'Slide {slide_id:03d}',
                'start_time': start_time,
            })

        subtitles = {}
        for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
            webvtt_url = url_or_none(sub.get('webvtt_url'))
            if not webvtt_url:
                continue
            subtitles.setdefault(sub.get('language') or 'en', []).append({
                'url': webvtt_url,
                'ext': 'vtt',
            })

        info = {
            'id': video_id,
            'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
            'timestamp': unified_timestamp(player_info.get('timestamp')),
            'is_live': player_info.get('playlist_type') != 'vod',
            'thumbnails': thumbnails,
            'chapters': chapters,
            'subtitles': subtitles,
        }

        if service_name == 'url':
            info['url'] = service_id
        elif service_name == 'yoda':
            formats, duration = self._extract_formats_and_duration(
                player_info['video_servers'][0], service_id, video_id)
            info.update({
                'duration': duration,
                'formats': formats,
            })
        else:
            info.update({
                '_type': 'url_transparent',
                'url': service_id,
                'ie_key': service_name.capitalize(),
                'display_id': video_id,
            })
            if service_name == 'vimeo':
                info['url'] = smuggle_url(
                    f'https://player.vimeo.com/video/{service_id}',
                    {'referer': url})

        video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
        if not video_slides:
            return info

        def entries():
            yield info

            service_data = self._download_json(
                f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
                video_id, fatal=False, query={
                    'player_token': player_token,
                    'videos': ','.join(video_slides),
                }, note='Downloading video slides info', errnote='Failed to download video slides info') or {}

            for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
                if traverse_obj(slide, ('video', 'service')) != 'yoda':
                    continue
                video_path = traverse_obj(slide, ('video', 'id'))
                cdn_hostname = traverse_obj(service_data, (
                    video_path, 'video_servers', ...), get_all=False)
                if not cdn_hostname or not video_path:
                    continue
                formats, _ = self._extract_formats_and_duration(
                    cdn_hostname, video_path, video_id, skip_duration=True)
                if not formats:
                    continue
                yield {
                    'id': f'{video_id}-{slide_id:03d}',
                    'title': f'{info["title"]} - Slide {slide_id:03d}',
                    'timestamp': info['timestamp'],
                    'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
                    'formats': formats,
                }

        return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])
Commit	Line	Data
3d667e00	1	import re
3d667e00	2	import urllib.parse
d4f14a72	3	import xml.etree.ElementTree
3d667e00	4
d0f2d641	5	from .common import InfoExtractor
29f7c58a	6	from ..utils import (
3d667e00	7	ExtractorError,
	8	int_or_none,
	9	parse_qs,
29f7c58a	10	smuggle_url,
f69b0554	11	traverse_obj,
f69b0554	12	unified_timestamp,
3d667e00	13	update_url_query,
29f7c58a	14	url_or_none,
3d667e00	15	xpath_text,
29f7c58a	16	)
d0f2d641 JW	17
	18
	19	class SlidesLiveIE(InfoExtractor):
3d667e00	20	_VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
d0f2d641	21	_TESTS = [{
3d667e00	22	# service_name = yoda, only XML slides info
d0f2d641	23	'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
d0f2d641	24	'info_dict': {
f69b0554	25	'id': '38902413',
d0f2d641	26	'ext': 'mp4',
b33a05d2	27	'title': 'GCC IA16 backend',
615a8444	28	'timestamp': 1697793372,
615a8444	29	'upload_date': '20231020',
f69b0554	30	'thumbnail': r're:^https?://.*\.jpg',
3d667e00	31	'thumbnails': 'count:42',
3d667e00	32	'chapters': 'count:41',
5ab3534d	33	'duration': 1638,
f69b0554	34	},
	35	'params': {
	36	'skip_download': 'm3u8',
	37	},
29f7c58a	38	}, {
3d667e00	39	# service_name = yoda, /v7/ slides
29f7c58a	40	'url': 'https://slideslive.com/38935785',
29f7c58a	41	'info_dict': {
f69b0554	42	'id': '38935785',
29f7c58a	43	'ext': 'mp4',
29f7c58a	44	'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
615a8444	45	'upload_date': '20231020',
615a8444	46	'timestamp': 1697807002,
3d667e00	47	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	48	'thumbnails': 'count:640',
	49	'chapters': 'count:639',
5ab3534d	50	'duration': 9832,
f69b0554	51	},
	52	'params': {
	53	'skip_download': 'm3u8',
	54	},
	55	}, {
3d667e00	56	# service_name = yoda, /v1/ slides
f69b0554	57	'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
	58	'info_dict': {
	59	'id': '38973182',
	60	'ext': 'mp4',
	61	'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
615a8444	62	'upload_date': '20231020',
f69b0554	63	'thumbnail': r're:^https?://.*\.jpg',
615a8444	64	'timestamp': 1697822521,
3d667e00	65	'thumbnails': 'count:3',
3d667e00	66	'chapters': 'count:2',
5ab3534d	67	'duration': 5889,
f69b0554	68	},
	69	'params': {
	70	'skip_download': 'm3u8',
29f7c58a	71	},
aa1d5eb9	72	}, {
615a8444	73	# formerly youtube, converted to native
f69b0554	74	'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
	75	'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
	76	'info_dict': {
615a8444	77	'id': '38897546',
f69b0554	78	'ext': 'mp4',
f69b0554	79	'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
615a8444	80	'thumbnail': r're:^https?://.*\.jpg',
	81	'upload_date': '20231029',
	82	'timestamp': 1698588144,
3d667e00	83	'thumbnails': 'count:169',
3d667e00	84	'chapters': 'count:168',
615a8444	85	'duration': 6827,
	86	},
	87	'params': {
	88	'skip_download': 'm3u8',
3d667e00	89	},
	90	}, {
	91	# embed-only presentation, only XML slides info
	92	'url': 'https://slideslive.com/embed/presentation/38925850',
	93	'info_dict': {
	94	'id': '38925850',
	95	'ext': 'mp4',
	96	'title': 'Towards a Deep Network Architecture for Structured Smoothness',
	97	'thumbnail': r're:^https?://.*\.jpg',
	98	'thumbnails': 'count:8',
615a8444	99	'timestamp': 1697803109,
615a8444	100	'upload_date': '20231020',
3d667e00	101	'chapters': 'count:7',
5ab3534d	102	'duration': 326,
3d667e00	103	},
	104	'params': {
	105	'skip_download': 'm3u8',
f69b0554	106	},
f69b0554	107	}, {
3d667e00	108	# embed-only presentation, only JSON slides info, /v5/ slides (.png)
	109	'url': 'https://slideslive.com/38979920/',
	110	'info_dict': {
	111	'id': '38979920',
	112	'ext': 'mp4',
	113	'title': 'MoReL: Multi-omics Relational Learning',
	114	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	115	'thumbnails': 'count:7',
615a8444	116	'timestamp': 1697824939,
615a8444	117	'upload_date': '20231020',
3d667e00	118	'chapters': 'count:6',
5ab3534d	119	'duration': 171,
3d667e00	120	},
	121	'params': {
	122	'skip_download': 'm3u8',
	123	},
	124	}, {
	125	# /v2/ slides (.jpg)
	126	'url': 'https://slideslive.com/38954074',
	127	'info_dict': {
	128	'id': '38954074',
	129	'ext': 'mp4',
	130	'title': 'Decentralized Attribution of Generative Models',
	131	'thumbnail': r're:^https?://.*\.jpg',
	132	'thumbnails': 'count:16',
615a8444	133	'timestamp': 1697814901,
615a8444	134	'upload_date': '20231020',
3d667e00	135	'chapters': 'count:15',
5ab3534d	136	'duration': 306,
3d667e00	137	},
	138	'params': {
	139	'skip_download': 'm3u8',
	140	},
	141	}, {
	142	# /v4/ slides (.png)
	143	'url': 'https://slideslive.com/38979570/',
	144	'info_dict': {
	145	'id': '38979570',
	146	'ext': 'mp4',
	147	'title': 'Efficient Active Search for Combinatorial Optimization Problems',
	148	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	149	'thumbnails': 'count:9',
615a8444	150	'timestamp': 1697824757,
615a8444	151	'upload_date': '20231020',
3d667e00	152	'chapters': 'count:8',
5ab3534d	153	'duration': 295,
3d667e00	154	},
	155	'params': {
	156	'skip_download': 'm3u8',
	157	},
	158	}, {
	159	# /v10/ slides
	160	'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
	161	'info_dict': {
	162	'id': '38979880',
	163	'ext': 'mp4',
	164	'title': 'The Representation Power of Neural Networks',
615a8444	165	'timestamp': 1697824919,
3d667e00	166	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
3d667e00	167	'thumbnails': 'count:22',
615a8444	168	'upload_date': '20231020',
3d667e00	169	'chapters': 'count:21',
5ab3534d	170	'duration': 294,
3d667e00	171	},
	172	'params': {
	173	'skip_download': 'm3u8',
	174	},
	175	}, {
	176	# /v7/ slides, 2 video slides
	177	'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
	178	'playlist_count': 3,
	179	'info_dict': {
	180	'id': '38979682-playlist',
	181	'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
	182	},
	183	'playlist': [{
	184	'info_dict': {
	185	'id': '38979682',
	186	'ext': 'mp4',
	187	'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
615a8444	188	'timestamp': 1697824815,
3d667e00	189	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
3d667e00	190	'thumbnails': 'count:30',
615a8444	191	'upload_date': '20231020',
3d667e00	192	'chapters': 'count:31',
5ab3534d	193	'duration': 272,
3d667e00	194	},
	195	}, {
	196	'info_dict': {
	197	'id': '38979682-021',
	198	'ext': 'mp4',
	199	'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
	200	'duration': 3,
615a8444	201	'timestamp': 1697824815,
615a8444	202	'upload_date': '20231020',
3d667e00	203	},
	204	}, {
	205	'info_dict': {
	206	'id': '38979682-024',
	207	'ext': 'mp4',
	208	'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
	209	'duration': 4,
615a8444	210	'timestamp': 1697824815,
615a8444	211	'upload_date': '20231020',
3d667e00	212	},
	213	}],
	214	'params': {
	215	'skip_download': 'm3u8',
	216	},
	217	}, {
	218	# /v6/ slides, 1 video slide, edit.videoken.com embed
	219	'url': 'https://slideslive.com/38979481/',
	220	'playlist_count': 2,
	221	'info_dict': {
	222	'id': '38979481-playlist',
	223	'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
	224	},
	225	'playlist': [{
	226	'info_dict': {
	227	'id': '38979481',
	228	'ext': 'mp4',
	229	'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
615a8444	230	'timestamp': 1697824716,
3d667e00	231	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
3d667e00	232	'thumbnails': 'count:43',
615a8444	233	'upload_date': '20231020',
3d667e00	234	'chapters': 'count:43',
5ab3534d	235	'duration': 315,
3d667e00	236	},
	237	}, {
	238	'info_dict': {
	239	'id': '38979481-013',
	240	'ext': 'mp4',
	241	'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
	242	'duration': 3,
615a8444	243	'timestamp': 1697824716,
615a8444	244	'upload_date': '20231020',
3d667e00	245	},
	246	}],
	247	'params': {
	248	'skip_download': 'm3u8',
	249	},
	250	}, {
	251	# /v3/ slides, .jpg and .png, service_name = youtube
	252	'url': 'https://slideslive.com/embed/38932460/',
	253	'info_dict': {
	254	'id': 'RTPdrgkyTiE',
	255	'display_id': '38932460',
	256	'ext': 'mp4',
	257	'title': 'Active Learning for Hierarchical Multi-Label Classification',
	258	'description': 'Watch full version of this video at https://slideslive.com/38932460.',
	259	'channel': 'SlidesLive Videos - A',
	260	'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
	261	'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
	262	'uploader': 'SlidesLive Videos - A',
615a8444	263	'uploader_id': '@slideslivevideos-a6075',
615a8444	264	'uploader_url': 'https://www.youtube.com/@slideslivevideos-a6075',
3d667e00	265	'upload_date': '20200903',
615a8444	266	'timestamp': 1697805922,
3d667e00	267	'duration': 942,
	268	'age_limit': 0,
	269	'live_status': 'not_live',
	270	'playable_in_embed': True,
	271	'availability': 'unlisted',
	272	'categories': ['People & Blogs'],
	273	'tags': [],
	274	'channel_follower_count': int,
	275	'like_count': int,
	276	'view_count': int,
	277	'thumbnail': r're:^https?://.*\.(?:jpg\|png\|webp)',
	278	'thumbnails': 'count:21',
	279	'chapters': 'count:20',
	280	},
	281	'params': {
	282	'skip_download': 'm3u8',
	283	},
5ab3534d	284	}, {
	285	# /v3/ slides, .png only, service_name = yoda
	286	'url': 'https://slideslive.com/38983994',
	287	'info_dict': {
	288	'id': '38983994',
	289	'ext': 'mp4',
	290	'title': 'Zero-Shot AutoML with Pretrained Models',
615a8444	291	'timestamp': 1697826708,
615a8444	292	'upload_date': '20231020',
5ab3534d	293	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	294	'thumbnails': 'count:23',
	295	'chapters': 'count:22',
	296	'duration': 295,
	297	},
	298	'params': {
	299	'skip_download': 'm3u8',
	300	},
3d667e00	301	}, {
3d667e00	302	# service_name = yoda
aa1d5eb9 RA	303	'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
aa1d5eb9 RA	304	'only_matching': True,
73d8f3a6	305	}, {
3d667e00	306	# dead link, service_name = url
73d8f3a6 RA	307	'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
	308	'only_matching': True,
	309	}, {
3d667e00	310	# dead link, service_name = vimeo
73d8f3a6 RA	311	'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
73d8f3a6 RA	312	'only_matching': True,
d0f2d641 JW	313	}]
d0f2d641 JW	314
3d667e00	315	_WEBPAGE_TESTS = [{
	316	# only XML slides info
	317	'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
	318	'info_dict': {
	319	'id': '38925850',
	320	'ext': 'mp4',
	321	'title': 'Towards a Deep Network Architecture for Structured Smoothness',
	322	'thumbnail': r're:^https?://.*\.jpg',
	323	'thumbnails': 'count:8',
615a8444	324	'timestamp': 1697803109,
615a8444	325	'upload_date': '20231020',
3d667e00	326	'chapters': 'count:7',
5ab3534d	327	'duration': 326,
3d667e00	328	},
	329	'params': {
	330	'skip_download': 'm3u8',
	331	},
	332	}]
	333
	334	@classmethod
	335	def _extract_embed_urls(cls, url, webpage):
	336	# Reference: https://slideslive.com/embed_presentation.js
	337	for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s\([^)]+\bpresentationId:\s["\'](\d+)["\']', webpage):
	338	url_parsed = urllib.parse.urlparse(url)
	339	origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
	340	yield update_url_query(
	341	f'https://slideslive.com/embed/presentation/{embed_id}', {
	342	'embed_parent_url': url,
	343	'embed_container_origin': origin,
	344	})
	345
	346	def _download_embed_webpage_handle(self, video_id, headers):
	347	return self._download_webpage_handle(
	348	f'https://slideslive.com/embed/presentation/{video_id}', video_id,
	349	headers=headers, query=traverse_obj(headers, {
	350	'embed_parent_url': 'Referer',
	351	'embed_container_origin': 'Origin',
	352	}))
	353
f69b0554	354	def _extract_custom_m3u8_info(self, m3u8_data):
	355	m3u8_dict = {}
	356
	357	lookup = {
	358	'PRESENTATION-TITLE': 'title',
	359	'PRESENTATION-UPDATED-AT': 'timestamp',
	360	'PRESENTATION-THUMBNAIL': 'thumbnail',
	361	'PLAYLIST-TYPE': 'playlist_type',
	362	'VOD-VIDEO-SERVICE-NAME': 'service_name',
	363	'VOD-VIDEO-ID': 'service_id',
	364	'VOD-VIDEO-SERVERS': 'video_servers',
	365	'VOD-SUBTITLES': 'subtitles',
3d667e00	366	'VOD-SLIDES-JSON-URL': 'slides_json_url',
3d667e00	367	'VOD-SLIDES-XML-URL': 'slides_xml_url',
f69b0554	368	}
	369
	370	for line in m3u8_data.splitlines():
	371	if not line.startswith('#EXT-SL-'):
	372	continue
	373	tag, _, value = line.partition(':')
93240fc1	374	key = lookup.get(tag[8:])
f69b0554	375	if not key:
	376	continue
	377	m3u8_dict[key] = value
	378
	379	# Some values are stringified JSON arrays
	380	for key in ('video_servers', 'subtitles'):
	381	if key in m3u8_dict:
	382	m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
	383
	384	return m3u8_dict
	385
5ab3534d	386	def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
	387	formats, duration = [], None
	388
	389	hls_formats = self._extract_m3u8_formats(
3d667e00	390	f'https://{cdn_hostname}/{path}/master.m3u8',
5ab3534d	391	video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
	392	if hls_formats:
	393	if not skip_duration:
	394	duration = self._extract_m3u8_vod_duration(
	395	hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
	396	formats.extend(hls_formats)
	397
	398	dash_formats = self._extract_mpd_formats(
	399	f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
	400	if dash_formats:
	401	if not duration and not skip_duration:
	402	duration = self._extract_mpd_vod_duration(
	403	f'https://{cdn_hostname}/{path}/master.mpd', video_id,
	404	note='Extracting duration from DASH manifest')
	405	formats.extend(dash_formats)
	406
	407	return formats, duration
3d667e00	408
d0f2d641 JW	409	def _real_extract(self, url):
d0f2d641 JW	410	video_id = self._match_id(url)
3d667e00	411	webpage, urlh = self._download_embed_webpage_handle(
	412	video_id, headers=traverse_obj(parse_qs(url), {
	413	'Referer': ('embed_parent_url', -1),
	414	'Origin': ('embed_container_origin', -1)}))
3d2623a8	415	redirect_url = urlh.url
3d667e00	416	if 'domain_not_allowed' in redirect_url:
	417	domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
	418	if not domain:
	419	raise ExtractorError(
	420	'This is an embed-only presentation. Try passing --referer', expected=True)
	421	webpage, _ = self._download_embed_webpage_handle(video_id, headers={
	422	'Referer': f'https://{domain}/',
	423	'Origin': f'https://{domain}',
	424	})
	425
f69b0554	426	player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
	427	player_data = self._download_webpage(
	428	f'https://ben.slideslive.com/player/{video_id}', video_id,
	429	note='Downloading player info', query={'player_token': player_token})
	430	player_info = self._extract_custom_m3u8_info(player_data)
	431
	432	service_name = player_info['service_name'].lower()
29f7c58a	433	assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
f69b0554	434	service_id = player_info['service_id']
f69b0554	435
5ab3534d	436	slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
	437	slides, slides_info = {}, []
	438
3d667e00	439	if player_info.get('slides_json_url'):
5ab3534d	440	slides = self._download_json(
	441	player_info['slides_json_url'], video_id, fatal=False,
	442	note='Downloading slides JSON', errnote=False) or {}
	443	slide_ext_default = '.png'
	444	slide_quality = traverse_obj(slides, ('slide_qualities', 0))
	445	if slide_quality:
	446	slide_ext_default = '.jpg'
	447	slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
	448	for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
3d667e00	449	slides_info.append((
3d667e00	450	slide_id, traverse_obj(slide, ('image', 'name')),
5ab3534d	451	traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
3d667e00	452	int_or_none(slide.get('time'), scale=1000)))
	453
	454	if not slides and player_info.get('slides_xml_url'):
3d667e00	455	slides = self._download_xml(
5ab3534d	456	player_info['slides_xml_url'], video_id, fatal=False,
3d667e00	457	note='Downloading slides XML', errnote='Failed to download slides info')
d4f14a72	458	if isinstance(slides, xml.etree.ElementTree.Element):
	459	slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
	460	for slide_id, slide in enumerate(slides.findall('./slide')):
	461	slides_info.append((
	462	slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
	463	int_or_none(xpath_text(slide, './timeSec', 'time'))))
3d667e00	464
3d667e00	465	chapters, thumbnails = [], []
	466	if url_or_none(player_info.get('thumbnail')):
	467	thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
5ab3534d	468	for slide_id, slide_path, slide_ext, start_time in slides_info:
3d667e00	469	if slide_path:
	470	thumbnails.append({
	471	'id': f'{slide_id:03d}',
5ab3534d	472	'url': slide_url_template % (video_id, slide_path, slide_ext),
3d667e00	473	})
	474	chapters.append({
	475	'title': f'Slide {slide_id:03d}',
	476	'start_time': start_time,
	477	})
	478
29f7c58a	479	subtitles = {}
f69b0554	480	for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
29f7c58a	481	webvtt_url = url_or_none(sub.get('webvtt_url'))
	482	if not webvtt_url:
	483	continue
f69b0554	484	subtitles.setdefault(sub.get('language') or 'en', []).append({
29f7c58a	485	'url': webvtt_url,
f69b0554	486	'ext': 'vtt',
29f7c58a	487	})
f69b0554	488
73d8f3a6 RA	489	info = {
73d8f3a6 RA	490	'id': video_id,
f69b0554	491	'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
	492	'timestamp': unified_timestamp(player_info.get('timestamp')),
	493	'is_live': player_info.get('playlist_type') != 'vod',
3d667e00	494	'thumbnails': thumbnails,
3d667e00	495	'chapters': chapters,
29f7c58a	496	'subtitles': subtitles,
73d8f3a6	497	}
f69b0554	498
3d667e00	499	if service_name == 'url':
	500	info['url'] = service_id
	501	elif service_name == 'yoda':
5ab3534d	502	formats, duration = self._extract_formats_and_duration(
	503	player_info['video_servers'][0], service_id, video_id)
	504	info.update({
	505	'duration': duration,
	506	'formats': formats,
	507	})
73d8f3a6 RA	508	else:
73d8f3a6 RA	509	info.update({
b33a05d2	510	'_type': 'url_transparent',
29f7c58a	511	'url': service_id,
73d8f3a6	512	'ie_key': service_name.capitalize(),
f69b0554	513	'display_id': video_id,
73d8f3a6 RA	514	})
	515	if service_name == 'vimeo':
	516	info['url'] = smuggle_url(
f69b0554	517	f'https://player.vimeo.com/video/{service_id}',
f04b5bed	518	{'referer': url})
f69b0554	519
5ab3534d	520	video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
3d667e00	521	if not video_slides:
	522	return info
	523
	524	def entries():
	525	yield info
	526
	527	service_data = self._download_json(
	528	f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
	529	video_id, fatal=False, query={
	530	'player_token': player_token,
	531	'videos': ','.join(video_slides),
	532	}, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
	533
5ab3534d	534	for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
add96eb9	535	if traverse_obj(slide, ('video', 'service')) != 'yoda':
3d667e00	536	continue
	537	video_path = traverse_obj(slide, ('video', 'id'))
	538	cdn_hostname = traverse_obj(service_data, (
	539	video_path, 'video_servers', ...), get_all=False)
	540	if not cdn_hostname or not video_path:
	541	continue
5ab3534d	542	formats, _ = self._extract_formats_and_duration(
5ab3534d	543	cdn_hostname, video_path, video_id, skip_duration=True)
3d667e00	544	if not formats:
	545	continue
	546	yield {
	547	'id': f'{video_id}-{slide_id:03d}',
	548	'title': f'{info["title"]} - Slide {slide_id:03d}',
	549	'timestamp': info['timestamp'],
	550	'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
	551	'formats': formats,
	552	}
	553
	554	return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])