[yt-dlp.git] / yt_dlp / extractor / clipchamp.py

from .common import InfoExtractor
from ..utils import (
    ExtractorError,
    traverse_obj,
    unified_timestamp,
    url_or_none,
)


class ClipchampIE(InfoExtractor):
    _VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
    _TESTS = [{
        'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
        'info_dict': {
            'id': 'gRXZ4ZhdDaU',
            'ext': 'mp4',
            'title': 'Untitled video',
            'uploader': 'Alexander Schwartz',
            'timestamp': 1680805580,
            'upload_date': '20230406',
            'thumbnail': r're:^https?://.+\.jpg',
        },
        'params': {'skip_download': 'm3u8'},
    }]

    _STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
    _STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}

    def _real_extract(self, url):
        video_id = self._match_id(url)
        webpage = self._download_webpage(url, video_id)
        data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']

        storage_location = data.get('storage_location')
        if storage_location != 'cf_stream':
            raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')

        path = data['download_url']
        iframe = self._download_webpage(
            f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
        subdomain = self._search_regex(
            r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
            'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'

        formats = self._extract_mpd_formats(
            self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
            query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
        formats.extend(self._extract_m3u8_formats(
            self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
            query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))

        return {
            'id': video_id,
            'formats': formats,
            'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
            **traverse_obj(data, {
                'title': ('project', 'project_name', {str}),
                'timestamp': ('created_at', {unified_timestamp}),
                'thumbnail': ('thumbnail_url', {url_or_none}),
            }),
        }
Commit	Line	Data
2f07c4c1	1	from .common import InfoExtractor
	2	from ..utils import (
	3	ExtractorError,
	4	traverse_obj,
	5	unified_timestamp,
	6	url_or_none,
	7	)
	8
	9
	10	class ClipchampIE(InfoExtractor):
	11	_VALID_URL = r'https?://(?:www\.)?clipchamp\.com/watch/(?P<id>[\w-]+)'
	12	_TESTS = [{
	13	'url': 'https://clipchamp.com/watch/gRXZ4ZhdDaU',
	14	'info_dict': {
	15	'id': 'gRXZ4ZhdDaU',
	16	'ext': 'mp4',
	17	'title': 'Untitled video',
	18	'uploader': 'Alexander Schwartz',
	19	'timestamp': 1680805580,
	20	'upload_date': '20230406',
	21	'thumbnail': r're:^https?://.+\.jpg',
	22	},
	23	'params': {'skip_download': 'm3u8'},
	24	}]
	25
	26	_STREAM_URL_TMPL = 'https://%s.cloudflarestream.com/%s/manifest/video.%s'
	27	_STREAM_URL_QUERY = {'parentOrigin': 'https://clipchamp.com'}
	28
	29	def _real_extract(self, url):
	30	video_id = self._match_id(url)
	31	webpage = self._download_webpage(url, video_id)
	32	data = self._search_nextjs_data(webpage, video_id)['props']['pageProps']['video']
	33
	34	storage_location = data.get('storage_location')
	35	if storage_location != 'cf_stream':
	36	raise ExtractorError(f'Unsupported clip storage location "{storage_location}"')
	37
	38	path = data['download_url']
	39	iframe = self._download_webpage(
	40	f'https://iframe.cloudflarestream.com/{path}', video_id, 'Downloading player iframe')
	41	subdomain = self._search_regex(
	42	r'\bcustomer-domain-prefix=["\']([\w-]+)["\']', iframe,
	43	'subdomain', fatal=False) or 'customer-2ut9yn3y6fta1yxe'
	44
	45	formats = self._extract_mpd_formats(
	46	self._STREAM_URL_TMPL % (subdomain, path, 'mpd'), video_id,
	47	query=self._STREAM_URL_QUERY, fatal=False, mpd_id='dash')
	48	formats.extend(self._extract_m3u8_formats(
	49	self._STREAM_URL_TMPL % (subdomain, path, 'm3u8'), video_id, 'mp4',
	50	query=self._STREAM_URL_QUERY, fatal=False, m3u8_id='hls'))
	51
	52	return {
	53	'id': video_id,
	54	'formats': formats,
	55	'uploader': ' '.join(traverse_obj(data, ('creator', ('first_name', 'last_name'), {str}))) or None,
	56	**traverse_obj(data, {
	57	'title': ('project', 'project_name', {str}),
	58	'timestamp': ('created_at', {unified_timestamp}),
	59	'thumbnail': ('thumbnail_url', {url_or_none}),
	60	}),
	61	}