[yt-dlp.git] / yt_dlp / extractor / slideslive.py

import re
import urllib.parse

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    int_or_none,
    parse_qs,
    smuggle_url,
    traverse_obj,
    unified_timestamp,
    update_url_query,
    url_or_none,
    xpath_text,
)


class SlidesLiveIE(InfoExtractor):
    _VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
    _TESTS = [{
        # service_name = yoda, only XML slides info
        'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
        'info_dict': {
            'id': '38902413',
            'ext': 'mp4',
            'title': 'GCC IA16 backend',
            'timestamp': 1648189972,
            'upload_date': '20220325',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:42',
            'chapters': 'count:41',
            'duration': 1638,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda, /v7/ slides
        'url': 'https://slideslive.com/38935785',
        'info_dict': {
            'id': '38935785',
            'ext': 'mp4',
            'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
            'upload_date': '20211115',
            'timestamp': 1636996003,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:640',
            'chapters': 'count:639',
            'duration': 9832,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda, /v1/ slides
        'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
        'info_dict': {
            'id': '38973182',
            'ext': 'mp4',
            'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
            'upload_date': '20220201',
            'thumbnail': r're:^https?://.*\.jpg',
            'timestamp': 1643728135,
            'thumbnails': 'count:3',
            'chapters': 'count:2',
            'duration': 5889,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = youtube, only XML slides info
        'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
        'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
        'info_dict': {
            'id': 'jmg02wCJD5M',
            'display_id': '38897546',
            'ext': 'mp4',
            'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
            'description': 'Watch full version of this video at https://slideslive.com/38897546.',
            'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
            'channel': 'SlidesLive Videos - G1',
            'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
            'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
            'uploader': 'SlidesLive Videos - G1',
            'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
            'live_status': 'not_live',
            'upload_date': '20160710',
            'timestamp': 1618786715,
            'duration': 6827,
            'like_count': int,
            'view_count': int,
            'comment_count': int,
            'channel_follower_count': int,
            'age_limit': 0,
            'thumbnail': r're:^https?://.*\.(?:jpg|webp)',
            'thumbnails': 'count:169',
            'playable_in_embed': True,
            'availability': 'unlisted',
            'tags': [],
            'categories': ['People & Blogs'],
            'chapters': 'count:168',
        },
    }, {
        # embed-only presentation, only XML slides info
        'url': 'https://slideslive.com/embed/presentation/38925850',
        'info_dict': {
            'id': '38925850',
            'ext': 'mp4',
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
            'timestamp': 1629671508,
            'upload_date': '20210822',
            'chapters': 'count:7',
            'duration': 326,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # embed-only presentation, only JSON slides info, /v5/ slides (.png)
        'url': 'https://slideslive.com/38979920/',
        'info_dict': {
            'id': '38979920',
            'ext': 'mp4',
            'title': 'MoReL: Multi-omics Relational Learning',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:7',
            'timestamp': 1654714970,
            'upload_date': '20220608',
            'chapters': 'count:6',
            'duration': 171,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v2/ slides (.jpg)
        'url': 'https://slideslive.com/38954074',
        'info_dict': {
            'id': '38954074',
            'ext': 'mp4',
            'title': 'Decentralized Attribution of Generative Models',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:16',
            'timestamp': 1622806321,
            'upload_date': '20210604',
            'chapters': 'count:15',
            'duration': 306,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v4/ slides (.png)
        'url': 'https://slideslive.com/38979570/',
        'info_dict': {
            'id': '38979570',
            'ext': 'mp4',
            'title': 'Efficient Active Search for Combinatorial Optimization Problems',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:9',
            'timestamp': 1654714896,
            'upload_date': '20220608',
            'chapters': 'count:8',
            'duration': 295,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v10/ slides
        'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
        'info_dict': {
            'id': '38979880',
            'ext': 'mp4',
            'title': 'The Representation Power of Neural Networks',
            'timestamp': 1654714962,
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:22',
            'upload_date': '20220608',
            'chapters': 'count:21',
            'duration': 294,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v7/ slides, 2 video slides
        'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
        'playlist_count': 3,
        'info_dict': {
            'id': '38979682-playlist',
            'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
        },
        'playlist': [{
            'info_dict': {
                'id': '38979682',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
                'timestamp': 1654714920,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:30',
                'upload_date': '20220608',
                'chapters': 'count:31',
                'duration': 272,
            },
        }, {
            'info_dict': {
                'id': '38979682-021',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
                'duration': 3,
                'timestamp': 1654714920,
                'upload_date': '20220608',
            },
        }, {
            'info_dict': {
                'id': '38979682-024',
                'ext': 'mp4',
                'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
                'duration': 4,
                'timestamp': 1654714920,
                'upload_date': '20220608',
            },
        }],
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v6/ slides, 1 video slide, edit.videoken.com embed
        'url': 'https://slideslive.com/38979481/',
        'playlist_count': 2,
        'info_dict': {
            'id': '38979481-playlist',
            'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
        },
        'playlist': [{
            'info_dict': {
                'id': '38979481',
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
                'timestamp': 1654714877,
                'thumbnail': r're:^https?://.*\.(?:jpg|png)',
                'thumbnails': 'count:43',
                'upload_date': '20220608',
                'chapters': 'count:43',
                'duration': 315,
            },
        }, {
            'info_dict': {
                'id': '38979481-013',
                'ext': 'mp4',
                'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
                'duration': 3,
                'timestamp': 1654714877,
                'upload_date': '20220608',
            },
        }],
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v3/ slides, .jpg and .png, service_name = youtube
        'url': 'https://slideslive.com/embed/38932460/',
        'info_dict': {
            'id': 'RTPdrgkyTiE',
            'display_id': '38932460',
            'ext': 'mp4',
            'title': 'Active Learning for Hierarchical Multi-Label Classification',
            'description': 'Watch full version of this video at https://slideslive.com/38932460.',
            'channel': 'SlidesLive Videos - A',
            'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
            'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
            'uploader': 'SlidesLive Videos - A',
            'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
            'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
            'upload_date': '20200903',
            'timestamp': 1602599092,
            'duration': 942,
            'age_limit': 0,
            'live_status': 'not_live',
            'playable_in_embed': True,
            'availability': 'unlisted',
            'categories': ['People & Blogs'],
            'tags': [],
            'channel_follower_count': int,
            'like_count': int,
            'view_count': int,
            'thumbnail': r're:^https?://.*\.(?:jpg|png|webp)',
            'thumbnails': 'count:21',
            'chapters': 'count:20',
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # /v3/ slides, .png only, service_name = yoda
        'url': 'https://slideslive.com/38983994',
        'info_dict': {
            'id': '38983994',
            'ext': 'mp4',
            'title': 'Zero-Shot AutoML with Pretrained Models',
            'timestamp': 1662384834,
            'upload_date': '20220905',
            'thumbnail': r're:^https?://.*\.(?:jpg|png)',
            'thumbnails': 'count:23',
            'chapters': 'count:22',
            'duration': 295,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }, {
        # service_name = yoda
        'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
        'only_matching': True,
    }, {
        # dead link, service_name = url
        'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
        'only_matching': True,
    }, {
        # dead link, service_name = vimeo
        'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
        'only_matching': True,
    }]

    _WEBPAGE_TESTS = [{
        # only XML slides info
        'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
        'info_dict': {
            'id': '38925850',
            'ext': 'mp4',
            'title': 'Towards a Deep Network Architecture for Structured Smoothness',
            'thumbnail': r're:^https?://.*\.jpg',
            'thumbnails': 'count:8',
            'timestamp': 1629671508,
            'upload_date': '20210822',
            'chapters': 'count:7',
            'duration': 326,
        },
        'params': {
            'skip_download': 'm3u8',
        },
    }]

    @classmethod
    def _extract_embed_urls(cls, url, webpage):
        # Reference: https://slideslive.com/embed_presentation.js
        for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s*\([^)]+\bpresentationId:\s*["\'](\d+)["\']', webpage):
            url_parsed = urllib.parse.urlparse(url)
            origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
            yield update_url_query(
                f'https://slideslive.com/embed/presentation/{embed_id}', {
                    'embed_parent_url': url,
                    'embed_container_origin': origin,
                })

    def _download_embed_webpage_handle(self, video_id, headers):
        return self._download_webpage_handle(
            f'https://slideslive.com/embed/presentation/{video_id}', video_id,
            headers=headers, query=traverse_obj(headers, {
                'embed_parent_url': 'Referer',
                'embed_container_origin': 'Origin',
            }))

    def _extract_custom_m3u8_info(self, m3u8_data):
        m3u8_dict = {}

        lookup = {
            'PRESENTATION-TITLE': 'title',
            'PRESENTATION-UPDATED-AT': 'timestamp',
            'PRESENTATION-THUMBNAIL': 'thumbnail',
            'PLAYLIST-TYPE': 'playlist_type',
            'VOD-VIDEO-SERVICE-NAME': 'service_name',
            'VOD-VIDEO-ID': 'service_id',
            'VOD-VIDEO-SERVERS': 'video_servers',
            'VOD-SUBTITLES': 'subtitles',
            'VOD-SLIDES-JSON-URL': 'slides_json_url',
            'VOD-SLIDES-XML-URL': 'slides_xml_url',
        }

        for line in m3u8_data.splitlines():
            if not line.startswith('#EXT-SL-'):
                continue
            tag, _, value = line.partition(':')
            key = lookup.get(tag.lstrip('#EXT-SL-'))
            if not key:
                continue
            m3u8_dict[key] = value

        # Some values are stringified JSON arrays
        for key in ('video_servers', 'subtitles'):
            if key in m3u8_dict:
                m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []

        return m3u8_dict

    def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
        formats, duration = [], None

        hls_formats = self._extract_m3u8_formats(
            f'https://{cdn_hostname}/{path}/master.m3u8',
            video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
        if hls_formats:
            if not skip_duration:
                duration = self._extract_m3u8_vod_duration(
                    hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
            formats.extend(hls_formats)

        dash_formats = self._extract_mpd_formats(
            f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
        if dash_formats:
            if not duration and not skip_duration:
                duration = self._extract_mpd_vod_duration(
                    f'https://{cdn_hostname}/{path}/master.mpd', video_id,
                    note='Extracting duration from DASH manifest')
            formats.extend(dash_formats)

        return formats, duration

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage, urlh = self._download_embed_webpage_handle(
            video_id, headers=traverse_obj(parse_qs(url), {
                'Referer': ('embed_parent_url', -1),
                'Origin': ('embed_container_origin', -1)}))
        redirect_url = urlh.geturl()
        if 'domain_not_allowed' in redirect_url:
            domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
            if not domain:
                raise ExtractorError(
                    'This is an embed-only presentation. Try passing --referer', expected=True)
            webpage, _ = self._download_embed_webpage_handle(video_id, headers={
                'Referer': f'https://{domain}/',
                'Origin': f'https://{domain}',
            })

        player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
        player_data = self._download_webpage(
            f'https://ben.slideslive.com/player/{video_id}', video_id,
            note='Downloading player info', query={'player_token': player_token})
        player_info = self._extract_custom_m3u8_info(player_data)

        service_name = player_info['service_name'].lower()
        assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
        service_id = player_info['service_id']

        slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
        slides, slides_info = {}, []

        if player_info.get('slides_json_url'):
            slides = self._download_json(
                player_info['slides_json_url'], video_id, fatal=False,
                note='Downloading slides JSON', errnote=False) or {}
            slide_ext_default = '.png'
            slide_quality = traverse_obj(slides, ('slide_qualities', 0))
            if slide_quality:
                slide_ext_default = '.jpg'
                slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
            for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
                slides_info.append((
                    slide_id, traverse_obj(slide, ('image', 'name')),
                    traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
                    int_or_none(slide.get('time'), scale=1000)))

        if not slides and player_info.get('slides_xml_url'):
            slides = self._download_xml(
                player_info['slides_xml_url'], video_id, fatal=False,
                note='Downloading slides XML', errnote='Failed to download slides info')
            slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
            for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
                slides_info.append((
                    slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
                    int_or_none(xpath_text(slide, './timeSec', 'time'))))

        chapters, thumbnails = [], []
        if url_or_none(player_info.get('thumbnail')):
            thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
        for slide_id, slide_path, slide_ext, start_time in slides_info:
            if slide_path:
                thumbnails.append({
                    'id': f'{slide_id:03d}',
                    'url': slide_url_template % (video_id, slide_path, slide_ext),
                })
            chapters.append({
                'title': f'Slide {slide_id:03d}',
                'start_time': start_time,
            })

        subtitles = {}
        for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
            webvtt_url = url_or_none(sub.get('webvtt_url'))
            if not webvtt_url:
                continue
            subtitles.setdefault(sub.get('language') or 'en', []).append({
                'url': webvtt_url,
                'ext': 'vtt',
            })

        info = {
            'id': video_id,
            'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
            'timestamp': unified_timestamp(player_info.get('timestamp')),
            'is_live': player_info.get('playlist_type') != 'vod',
            'thumbnails': thumbnails,
            'chapters': chapters,
            'subtitles': subtitles,
        }

        if service_name == 'url':
            info['url'] = service_id
        elif service_name == 'yoda':
            formats, duration = self._extract_formats_and_duration(
                player_info['video_servers'][0], service_id, video_id)
            info.update({
                'duration': duration,
                'formats': formats,
            })
        else:
            info.update({
                '_type': 'url_transparent',
                'url': service_id,
                'ie_key': service_name.capitalize(),
                'display_id': video_id,
            })
            if service_name == 'vimeo':
                info['url'] = smuggle_url(
                    f'https://player.vimeo.com/video/{service_id}',
                    {'http_headers': {'Referer': url}})

        video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
        if not video_slides:
            return info

        def entries():
            yield info

            service_data = self._download_json(
                f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
                video_id, fatal=False, query={
                    'player_token': player_token,
                    'videos': ','.join(video_slides),
                }, note='Downloading video slides info', errnote='Failed to download video slides info') or {}

            for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
                if not traverse_obj(slide, ('video', 'service')) == 'yoda':
                    continue
                video_path = traverse_obj(slide, ('video', 'id'))
                cdn_hostname = traverse_obj(service_data, (
                    video_path, 'video_servers', ...), get_all=False)
                if not cdn_hostname or not video_path:
                    continue
                formats, _ = self._extract_formats_and_duration(
                    cdn_hostname, video_path, video_id, skip_duration=True)
                if not formats:
                    continue
                yield {
                    'id': f'{video_id}-{slide_id:03d}',
                    'title': f'{info["title"]} - Slide {slide_id:03d}',
                    'timestamp': info['timestamp'],
                    'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
                    'formats': formats,
                }

        return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])
Commit	Line	Data
3d667e00	1	import re
	2	import urllib.parse
	3
d0f2d641	4	from .common import InfoExtractor
29f7c58a	5	from ..utils import (
3d667e00	6	ExtractorError,
	7	int_or_none,
	8	parse_qs,
29f7c58a	9	smuggle_url,
f69b0554	10	traverse_obj,
f69b0554	11	unified_timestamp,
3d667e00	12	update_url_query,
29f7c58a	13	url_or_none,
3d667e00	14	xpath_text,
29f7c58a	15	)
d0f2d641 JW	16
	17
	18	class SlidesLiveIE(InfoExtractor):
3d667e00	19	_VALID_URL = r'https?://slideslive\.com/(?:embed/(?:presentation/)?)?(?P<id>[0-9]+)'
d0f2d641	20	_TESTS = [{
3d667e00	21	# service_name = yoda, only XML slides info
d0f2d641	22	'url': 'https://slideslive.com/38902413/gcc-ia16-backend',
d0f2d641	23	'info_dict': {
f69b0554	24	'id': '38902413',
d0f2d641	25	'ext': 'mp4',
b33a05d2	26	'title': 'GCC IA16 backend',
f69b0554	27	'timestamp': 1648189972,
	28	'upload_date': '20220325',
	29	'thumbnail': r're:^https?://.*\.jpg',
3d667e00	30	'thumbnails': 'count:42',
3d667e00	31	'chapters': 'count:41',
5ab3534d	32	'duration': 1638,
f69b0554	33	},
	34	'params': {
	35	'skip_download': 'm3u8',
	36	},
29f7c58a	37	}, {
3d667e00	38	# service_name = yoda, /v7/ slides
29f7c58a	39	'url': 'https://slideslive.com/38935785',
29f7c58a	40	'info_dict': {
f69b0554	41	'id': '38935785',
29f7c58a	42	'ext': 'mp4',
29f7c58a	43	'title': 'Offline Reinforcement Learning: From Algorithms to Practical Challenges',
f69b0554	44	'upload_date': '20211115',
f69b0554	45	'timestamp': 1636996003,
3d667e00	46	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	47	'thumbnails': 'count:640',
	48	'chapters': 'count:639',
5ab3534d	49	'duration': 9832,
f69b0554	50	},
	51	'params': {
	52	'skip_download': 'm3u8',
	53	},
	54	}, {
3d667e00	55	# service_name = yoda, /v1/ slides
f69b0554	56	'url': 'https://slideslive.com/38973182/how-should-a-machine-learning-researcher-think-about-ai-ethics',
	57	'info_dict': {
	58	'id': '38973182',
	59	'ext': 'mp4',
	60	'title': 'How Should a Machine Learning Researcher Think About AI Ethics?',
	61	'upload_date': '20220201',
	62	'thumbnail': r're:^https?://.*\.jpg',
	63	'timestamp': 1643728135,
3d667e00	64	'thumbnails': 'count:3',
3d667e00	65	'chapters': 'count:2',
5ab3534d	66	'duration': 5889,
f69b0554	67	},
	68	'params': {
	69	'skip_download': 'm3u8',
29f7c58a	70	},
aa1d5eb9	71	}, {
3d667e00	72	# service_name = youtube, only XML slides info
f69b0554	73	'url': 'https://slideslive.com/38897546/special-metaprednaska-petra-ludwiga-hodnoty-pro-lepsi-spolecnost',
	74	'md5': '8a79b5e3d700837f40bd2afca3c8fa01',
	75	'info_dict': {
	76	'id': 'jmg02wCJD5M',
	77	'display_id': '38897546',
	78	'ext': 'mp4',
	79	'title': 'SPECIÁL: Meta-přednáška Petra Ludwiga - Hodnoty pro lepší společnost',
	80	'description': 'Watch full version of this video at https://slideslive.com/38897546.',
	81	'channel_url': 'https://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
	82	'channel': 'SlidesLive Videos - G1',
	83	'channel_id': 'UCZWdAkNYFncuX0khyvhqnxw',
	84	'uploader_id': 'UCZWdAkNYFncuX0khyvhqnxw',
	85	'uploader': 'SlidesLive Videos - G1',
	86	'uploader_url': 'http://www.youtube.com/channel/UCZWdAkNYFncuX0khyvhqnxw',
	87	'live_status': 'not_live',
	88	'upload_date': '20160710',
	89	'timestamp': 1618786715,
	90	'duration': 6827,
	91	'like_count': int,
	92	'view_count': int,
	93	'comment_count': int,
	94	'channel_follower_count': int,
	95	'age_limit': 0,
3d667e00	96	'thumbnail': r're:^https?://.*\.(?:jpg\|webp)',
3d667e00	97	'thumbnails': 'count:169',
f69b0554	98	'playable_in_embed': True,
	99	'availability': 'unlisted',
	100	'tags': [],
	101	'categories': ['People & Blogs'],
3d667e00	102	'chapters': 'count:168',
	103	},
	104	}, {
	105	# embed-only presentation, only XML slides info
	106	'url': 'https://slideslive.com/embed/presentation/38925850',
	107	'info_dict': {
	108	'id': '38925850',
	109	'ext': 'mp4',
	110	'title': 'Towards a Deep Network Architecture for Structured Smoothness',
	111	'thumbnail': r're:^https?://.*\.jpg',
	112	'thumbnails': 'count:8',
	113	'timestamp': 1629671508,
	114	'upload_date': '20210822',
	115	'chapters': 'count:7',
5ab3534d	116	'duration': 326,
3d667e00	117	},
	118	'params': {
	119	'skip_download': 'm3u8',
f69b0554	120	},
f69b0554	121	}, {
3d667e00	122	# embed-only presentation, only JSON slides info, /v5/ slides (.png)
	123	'url': 'https://slideslive.com/38979920/',
	124	'info_dict': {
	125	'id': '38979920',
	126	'ext': 'mp4',
	127	'title': 'MoReL: Multi-omics Relational Learning',
	128	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	129	'thumbnails': 'count:7',
	130	'timestamp': 1654714970,
	131	'upload_date': '20220608',
	132	'chapters': 'count:6',
5ab3534d	133	'duration': 171,
3d667e00	134	},
	135	'params': {
	136	'skip_download': 'm3u8',
	137	},
	138	}, {
	139	# /v2/ slides (.jpg)
	140	'url': 'https://slideslive.com/38954074',
	141	'info_dict': {
	142	'id': '38954074',
	143	'ext': 'mp4',
	144	'title': 'Decentralized Attribution of Generative Models',
	145	'thumbnail': r're:^https?://.*\.jpg',
	146	'thumbnails': 'count:16',
	147	'timestamp': 1622806321,
	148	'upload_date': '20210604',
	149	'chapters': 'count:15',
5ab3534d	150	'duration': 306,
3d667e00	151	},
	152	'params': {
	153	'skip_download': 'm3u8',
	154	},
	155	}, {
	156	# /v4/ slides (.png)
	157	'url': 'https://slideslive.com/38979570/',
	158	'info_dict': {
	159	'id': '38979570',
	160	'ext': 'mp4',
	161	'title': 'Efficient Active Search for Combinatorial Optimization Problems',
	162	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	163	'thumbnails': 'count:9',
	164	'timestamp': 1654714896,
	165	'upload_date': '20220608',
	166	'chapters': 'count:8',
5ab3534d	167	'duration': 295,
3d667e00	168	},
	169	'params': {
	170	'skip_download': 'm3u8',
	171	},
	172	}, {
	173	# /v10/ slides
	174	'url': 'https://slideslive.com/embed/presentation/38979880?embed_parent_url=https%3A%2F%2Fedit.videoken.com%2F',
	175	'info_dict': {
	176	'id': '38979880',
	177	'ext': 'mp4',
	178	'title': 'The Representation Power of Neural Networks',
	179	'timestamp': 1654714962,
	180	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	181	'thumbnails': 'count:22',
	182	'upload_date': '20220608',
	183	'chapters': 'count:21',
5ab3534d	184	'duration': 294,
3d667e00	185	},
	186	'params': {
	187	'skip_download': 'm3u8',
	188	},
	189	}, {
	190	# /v7/ slides, 2 video slides
	191	'url': 'https://slideslive.com/embed/presentation/38979682?embed_container_origin=https%3A%2F%2Fedit.videoken.com',
	192	'playlist_count': 3,
	193	'info_dict': {
	194	'id': '38979682-playlist',
	195	'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
	196	},
	197	'playlist': [{
	198	'info_dict': {
	199	'id': '38979682',
	200	'ext': 'mp4',
	201	'title': 'LoRA: Low-Rank Adaptation of Large Language Models',
	202	'timestamp': 1654714920,
	203	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	204	'thumbnails': 'count:30',
	205	'upload_date': '20220608',
	206	'chapters': 'count:31',
5ab3534d	207	'duration': 272,
3d667e00	208	},
	209	}, {
	210	'info_dict': {
	211	'id': '38979682-021',
	212	'ext': 'mp4',
	213	'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 021',
	214	'duration': 3,
	215	'timestamp': 1654714920,
	216	'upload_date': '20220608',
	217	},
	218	}, {
	219	'info_dict': {
	220	'id': '38979682-024',
	221	'ext': 'mp4',
	222	'title': 'LoRA: Low-Rank Adaptation of Large Language Models - Slide 024',
	223	'duration': 4,
	224	'timestamp': 1654714920,
	225	'upload_date': '20220608',
	226	},
	227	}],
	228	'params': {
	229	'skip_download': 'm3u8',
	230	},
	231	}, {
	232	# /v6/ slides, 1 video slide, edit.videoken.com embed
	233	'url': 'https://slideslive.com/38979481/',
	234	'playlist_count': 2,
	235	'info_dict': {
	236	'id': '38979481-playlist',
	237	'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
	238	},
	239	'playlist': [{
	240	'info_dict': {
	241	'id': '38979481',
	242	'ext': 'mp4',
	243	'title': 'How to Train Your MAML to Excel in Few-Shot Classification',
	244	'timestamp': 1654714877,
	245	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	246	'thumbnails': 'count:43',
	247	'upload_date': '20220608',
	248	'chapters': 'count:43',
5ab3534d	249	'duration': 315,
3d667e00	250	},
	251	}, {
	252	'info_dict': {
	253	'id': '38979481-013',
	254	'ext': 'mp4',
	255	'title': 'How to Train Your MAML to Excel in Few-Shot Classification - Slide 013',
	256	'duration': 3,
	257	'timestamp': 1654714877,
	258	'upload_date': '20220608',
	259	},
	260	}],
	261	'params': {
	262	'skip_download': 'm3u8',
	263	},
	264	}, {
	265	# /v3/ slides, .jpg and .png, service_name = youtube
	266	'url': 'https://slideslive.com/embed/38932460/',
	267	'info_dict': {
	268	'id': 'RTPdrgkyTiE',
	269	'display_id': '38932460',
	270	'ext': 'mp4',
	271	'title': 'Active Learning for Hierarchical Multi-Label Classification',
	272	'description': 'Watch full version of this video at https://slideslive.com/38932460.',
	273	'channel': 'SlidesLive Videos - A',
	274	'channel_id': 'UC62SdArr41t_-_fX40QCLRw',
	275	'channel_url': 'https://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
	276	'uploader': 'SlidesLive Videos - A',
	277	'uploader_id': 'UC62SdArr41t_-_fX40QCLRw',
	278	'uploader_url': 'http://www.youtube.com/channel/UC62SdArr41t_-_fX40QCLRw',
	279	'upload_date': '20200903',
	280	'timestamp': 1602599092,
	281	'duration': 942,
	282	'age_limit': 0,
	283	'live_status': 'not_live',
	284	'playable_in_embed': True,
	285	'availability': 'unlisted',
	286	'categories': ['People & Blogs'],
	287	'tags': [],
	288	'channel_follower_count': int,
	289	'like_count': int,
	290	'view_count': int,
	291	'thumbnail': r're:^https?://.*\.(?:jpg\|png\|webp)',
	292	'thumbnails': 'count:21',
	293	'chapters': 'count:20',
	294	},
	295	'params': {
	296	'skip_download': 'm3u8',
	297	},
5ab3534d	298	}, {
	299	# /v3/ slides, .png only, service_name = yoda
	300	'url': 'https://slideslive.com/38983994',
	301	'info_dict': {
	302	'id': '38983994',
	303	'ext': 'mp4',
	304	'title': 'Zero-Shot AutoML with Pretrained Models',
	305	'timestamp': 1662384834,
	306	'upload_date': '20220905',
	307	'thumbnail': r're:^https?://.*\.(?:jpg\|png)',
	308	'thumbnails': 'count:23',
	309	'chapters': 'count:22',
	310	'duration': 295,
	311	},
	312	'params': {
	313	'skip_download': 'm3u8',
	314	},
3d667e00	315	}, {
3d667e00	316	# service_name = yoda
aa1d5eb9 RA	317	'url': 'https://slideslive.com/38903721/magic-a-scientific-resurrection-of-an-esoteric-legend',
aa1d5eb9 RA	318	'only_matching': True,
73d8f3a6	319	}, {
3d667e00	320	# dead link, service_name = url
73d8f3a6 RA	321	'url': 'https://slideslive.com/38922070/learning-transferable-skills-1',
	322	'only_matching': True,
	323	}, {
3d667e00	324	# dead link, service_name = vimeo
73d8f3a6 RA	325	'url': 'https://slideslive.com/38921896/retrospectives-a-venue-for-selfreflection-in-ml-research-3',
73d8f3a6 RA	326	'only_matching': True,
d0f2d641 JW	327	}]
d0f2d641 JW	328
3d667e00	329	_WEBPAGE_TESTS = [{
	330	# only XML slides info
	331	'url': 'https://iclr.cc/virtual_2020/poster_Hklr204Fvr.html',
	332	'info_dict': {
	333	'id': '38925850',
	334	'ext': 'mp4',
	335	'title': 'Towards a Deep Network Architecture for Structured Smoothness',
	336	'thumbnail': r're:^https?://.*\.jpg',
	337	'thumbnails': 'count:8',
	338	'timestamp': 1629671508,
	339	'upload_date': '20210822',
	340	'chapters': 'count:7',
5ab3534d	341	'duration': 326,
3d667e00	342	},
	343	'params': {
	344	'skip_download': 'm3u8',
	345	},
	346	}]
	347
	348	@classmethod
	349	def _extract_embed_urls(cls, url, webpage):
	350	# Reference: https://slideslive.com/embed_presentation.js
	351	for embed_id in re.findall(r'(?s)new\s+SlidesLiveEmbed\s\([^)]+\bpresentationId:\s["\'](\d+)["\']', webpage):
	352	url_parsed = urllib.parse.urlparse(url)
	353	origin = f'{url_parsed.scheme}://{url_parsed.netloc}'
	354	yield update_url_query(
	355	f'https://slideslive.com/embed/presentation/{embed_id}', {
	356	'embed_parent_url': url,
	357	'embed_container_origin': origin,
	358	})
	359
	360	def _download_embed_webpage_handle(self, video_id, headers):
	361	return self._download_webpage_handle(
	362	f'https://slideslive.com/embed/presentation/{video_id}', video_id,
	363	headers=headers, query=traverse_obj(headers, {
	364	'embed_parent_url': 'Referer',
	365	'embed_container_origin': 'Origin',
	366	}))
	367
f69b0554	368	def _extract_custom_m3u8_info(self, m3u8_data):
	369	m3u8_dict = {}
	370
	371	lookup = {
	372	'PRESENTATION-TITLE': 'title',
	373	'PRESENTATION-UPDATED-AT': 'timestamp',
	374	'PRESENTATION-THUMBNAIL': 'thumbnail',
	375	'PLAYLIST-TYPE': 'playlist_type',
	376	'VOD-VIDEO-SERVICE-NAME': 'service_name',
	377	'VOD-VIDEO-ID': 'service_id',
	378	'VOD-VIDEO-SERVERS': 'video_servers',
	379	'VOD-SUBTITLES': 'subtitles',
3d667e00	380	'VOD-SLIDES-JSON-URL': 'slides_json_url',
3d667e00	381	'VOD-SLIDES-XML-URL': 'slides_xml_url',
f69b0554	382	}
	383
	384	for line in m3u8_data.splitlines():
	385	if not line.startswith('#EXT-SL-'):
	386	continue
	387	tag, _, value = line.partition(':')
	388	key = lookup.get(tag.lstrip('#EXT-SL-'))
	389	if not key:
	390	continue
	391	m3u8_dict[key] = value
	392
	393	# Some values are stringified JSON arrays
	394	for key in ('video_servers', 'subtitles'):
	395	if key in m3u8_dict:
	396	m3u8_dict[key] = self._parse_json(m3u8_dict[key], None, fatal=False) or []
	397
	398	return m3u8_dict
	399
5ab3534d	400	def _extract_formats_and_duration(self, cdn_hostname, path, video_id, skip_duration=False):
	401	formats, duration = [], None
	402
	403	hls_formats = self._extract_m3u8_formats(
3d667e00	404	f'https://{cdn_hostname}/{path}/master.m3u8',
5ab3534d	405	video_id, 'mp4', m3u8_id='hls', fatal=False, live=True)
	406	if hls_formats:
	407	if not skip_duration:
	408	duration = self._extract_m3u8_vod_duration(
	409	hls_formats[0]['url'], video_id, note='Extracting duration from HLS manifest')
	410	formats.extend(hls_formats)
	411
	412	dash_formats = self._extract_mpd_formats(
	413	f'https://{cdn_hostname}/{path}/master.mpd', video_id, mpd_id='dash', fatal=False)
	414	if dash_formats:
	415	if not duration and not skip_duration:
	416	duration = self._extract_mpd_vod_duration(
	417	f'https://{cdn_hostname}/{path}/master.mpd', video_id,
	418	note='Extracting duration from DASH manifest')
	419	formats.extend(dash_formats)
	420
	421	return formats, duration
3d667e00	422
d0f2d641 JW	423	def _real_extract(self, url):
d0f2d641 JW	424	video_id = self._match_id(url)
3d667e00	425	webpage, urlh = self._download_embed_webpage_handle(
	426	video_id, headers=traverse_obj(parse_qs(url), {
	427	'Referer': ('embed_parent_url', -1),
	428	'Origin': ('embed_container_origin', -1)}))
	429	redirect_url = urlh.geturl()
	430	if 'domain_not_allowed' in redirect_url:
	431	domain = traverse_obj(parse_qs(redirect_url), ('allowed_domains[]', ...), get_all=False)
	432	if not domain:
	433	raise ExtractorError(
	434	'This is an embed-only presentation. Try passing --referer', expected=True)
	435	webpage, _ = self._download_embed_webpage_handle(video_id, headers={
	436	'Referer': f'https://{domain}/',
	437	'Origin': f'https://{domain}',
	438	})
	439
f69b0554	440	player_token = self._search_regex(r'data-player-token="([^"]+)"', webpage, 'player token')
	441	player_data = self._download_webpage(
	442	f'https://ben.slideslive.com/player/{video_id}', video_id,
	443	note='Downloading player info', query={'player_token': player_token})
	444	player_info = self._extract_custom_m3u8_info(player_data)
	445
	446	service_name = player_info['service_name'].lower()
29f7c58a	447	assert service_name in ('url', 'yoda', 'vimeo', 'youtube')
f69b0554	448	service_id = player_info['service_id']
f69b0554	449
5ab3534d	450	slide_url_template = 'https://slides.slideslive.com/%s/slides/original/%s%s'
	451	slides, slides_info = {}, []
	452
3d667e00	453	if player_info.get('slides_json_url'):
5ab3534d	454	slides = self._download_json(
	455	player_info['slides_json_url'], video_id, fatal=False,
	456	note='Downloading slides JSON', errnote=False) or {}
	457	slide_ext_default = '.png'
	458	slide_quality = traverse_obj(slides, ('slide_qualities', 0))
	459	if slide_quality:
	460	slide_ext_default = '.jpg'
	461	slide_url_template = f'https://cdn.slideslive.com/data/presentations/%s/slides/{slide_quality}/%s%s'
	462	for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...), expected_type=dict), 1):
3d667e00	463	slides_info.append((
3d667e00	464	slide_id, traverse_obj(slide, ('image', 'name')),
5ab3534d	465	traverse_obj(slide, ('image', 'extname'), default=slide_ext_default),
3d667e00	466	int_or_none(slide.get('time'), scale=1000)))
	467
	468	if not slides and player_info.get('slides_xml_url'):
3d667e00	469	slides = self._download_xml(
5ab3534d	470	player_info['slides_xml_url'], video_id, fatal=False,
3d667e00	471	note='Downloading slides XML', errnote='Failed to download slides info')
5ab3534d	472	slide_url_template = 'https://cdn.slideslive.com/data/presentations/%s/slides/big/%s%s'
5ab3534d	473	for slide_id, slide in enumerate(slides.findall('./slide') if slides else [], 1):
3d667e00	474	slides_info.append((
5ab3534d	475	slide_id, xpath_text(slide, './slideName', 'name'), '.jpg',
3d667e00	476	int_or_none(xpath_text(slide, './timeSec', 'time'))))
3d667e00	477
3d667e00	478	chapters, thumbnails = [], []
	479	if url_or_none(player_info.get('thumbnail')):
	480	thumbnails.append({'id': 'cover', 'url': player_info['thumbnail']})
5ab3534d	481	for slide_id, slide_path, slide_ext, start_time in slides_info:
3d667e00	482	if slide_path:
	483	thumbnails.append({
	484	'id': f'{slide_id:03d}',
5ab3534d	485	'url': slide_url_template % (video_id, slide_path, slide_ext),
3d667e00	486	})
	487	chapters.append({
	488	'title': f'Slide {slide_id:03d}',
	489	'start_time': start_time,
	490	})
	491
29f7c58a	492	subtitles = {}
f69b0554	493	for sub in traverse_obj(player_info, ('subtitles', ...), expected_type=dict):
29f7c58a	494	webvtt_url = url_or_none(sub.get('webvtt_url'))
	495	if not webvtt_url:
	496	continue
f69b0554	497	subtitles.setdefault(sub.get('language') or 'en', []).append({
29f7c58a	498	'url': webvtt_url,
f69b0554	499	'ext': 'vtt',
29f7c58a	500	})
f69b0554	501
73d8f3a6 RA	502	info = {
73d8f3a6 RA	503	'id': video_id,
f69b0554	504	'title': player_info.get('title') or self._html_search_meta('title', webpage, default=''),
	505	'timestamp': unified_timestamp(player_info.get('timestamp')),
	506	'is_live': player_info.get('playlist_type') != 'vod',
3d667e00	507	'thumbnails': thumbnails,
3d667e00	508	'chapters': chapters,
29f7c58a	509	'subtitles': subtitles,
73d8f3a6	510	}
f69b0554	511
3d667e00	512	if service_name == 'url':
	513	info['url'] = service_id
	514	elif service_name == 'yoda':
5ab3534d	515	formats, duration = self._extract_formats_and_duration(
	516	player_info['video_servers'][0], service_id, video_id)
	517	info.update({
	518	'duration': duration,
	519	'formats': formats,
	520	})
73d8f3a6 RA	521	else:
73d8f3a6 RA	522	info.update({
b33a05d2	523	'_type': 'url_transparent',
29f7c58a	524	'url': service_id,
73d8f3a6	525	'ie_key': service_name.capitalize(),
f69b0554	526	'display_id': video_id,
73d8f3a6 RA	527	})
	528	if service_name == 'vimeo':
	529	info['url'] = smuggle_url(
f69b0554	530	f'https://player.vimeo.com/video/{service_id}',
73d8f3a6	531	{'http_headers': {'Referer': url}})
f69b0554	532
5ab3534d	533	video_slides = traverse_obj(slides, ('slides', ..., 'video', 'id'))
3d667e00	534	if not video_slides:
	535	return info
	536
	537	def entries():
	538	yield info
	539
	540	service_data = self._download_json(
	541	f'https://ben.slideslive.com/player/{video_id}/slides_video_service_data',
	542	video_id, fatal=False, query={
	543	'player_token': player_token,
	544	'videos': ','.join(video_slides),
	545	}, note='Downloading video slides info', errnote='Failed to download video slides info') or {}
	546
5ab3534d	547	for slide_id, slide in enumerate(traverse_obj(slides, ('slides', ...)), 1):
3d667e00	548	if not traverse_obj(slide, ('video', 'service')) == 'yoda':
	549	continue
	550	video_path = traverse_obj(slide, ('video', 'id'))
	551	cdn_hostname = traverse_obj(service_data, (
	552	video_path, 'video_servers', ...), get_all=False)
	553	if not cdn_hostname or not video_path:
	554	continue
5ab3534d	555	formats, _ = self._extract_formats_and_duration(
5ab3534d	556	cdn_hostname, video_path, video_id, skip_duration=True)
3d667e00	557	if not formats:
	558	continue
	559	yield {
	560	'id': f'{video_id}-{slide_id:03d}',
	561	'title': f'{info["title"]} - Slide {slide_id:03d}',
	562	'timestamp': info['timestamp'],
	563	'duration': int_or_none(traverse_obj(slide, ('video', 'duration_ms')), scale=1000),
	564	'formats': formats,
	565	}
	566
	567	return self.playlist_result(entries(), f'{video_id}-playlist', info['title'])