[yt-dlp.git] / yt_dlp / extractor / veo.py

from .common import InfoExtractor

from ..utils import (
    int_or_none,
    mimetype2ext,
    str_or_none,
    unified_timestamp,
    url_or_none,
)


class VeoIE(InfoExtractor):
    _VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-_]+)'

    _TESTS = [{
        'url': 'https://app.veo.co/matches/20201027-last-period/',
        'info_dict': {
            'id': '20201027-last-period',
            'ext': 'mp4',
            'title': 'Akidemy u11s v Bradford Boys u11s (Game 3)',
            'thumbnail': 're:https://c.veocdn.com/.+/thumbnail.jpg',
            'upload_date': '20201028',
            'timestamp': 1603847208,
            'duration': 1916,
            'view_count': int,
        }
    }, {
        'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        video_id = self._match_id(url)

        metadata = self._download_json(
            'https://app.veo.co/api/app/matches/%s' % video_id, video_id)

        video_data = self._download_json(
            'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data')

        formats = []
        for fmt in video_data:
            mimetype = str_or_none(fmt.get('mime_type'))
            format_url = url_or_none(fmt.get('url'))
            # skip configuration file for panoramic video
            if not format_url or mimetype == 'video/mp2t':
                continue

            height = int_or_none(fmt.get('height'))
            render_type = str_or_none(fmt.get('render_type'))
            format_id = f'{render_type}-{height}p' if render_type and height else None

            # Veo returns panoramic video information even if panoramic video is not available.
            # e.g. https://app.veo.co/matches/20201027-last-period/
            if render_type == 'panorama':
                if not self._is_valid_url(format_url, video_id, format_id):
                    continue

            formats.append({
                'url': format_url,
                'format_id': format_id,
                'ext': mimetype2ext(mimetype),
                'width': int_or_none(fmt.get('width')),
                'height': height,
                'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
            })

        return {
            'id': video_id,
            'title': str_or_none(metadata.get('title')),
            'formats': formats,
            'thumbnail': url_or_none(metadata.get('thumbnail')),
            'timestamp': unified_timestamp(metadata.get('created')),
            'view_count': int_or_none(metadata.get('view_count')),
            'duration': int_or_none(metadata.get('duration')),
        }
Commit	Line	Data
2333ea10	1	from .common import InfoExtractor
	2
	3	from ..utils import (
	4	int_or_none,
	5	mimetype2ext,
f4ad9192	6	str_or_none,
2333ea10	7	unified_timestamp,
	8	url_or_none,
	9	)
	10
	11
	12	class VeoIE(InfoExtractor):
5a373d97	13	_VALID_URL = r'https?://app\.veo\.co/matches/(?P<id>[0-9A-Za-z-_]+)'
2333ea10	14
	15	_TESTS = [{
	16	'url': 'https://app.veo.co/matches/20201027-last-period/',
	17	'info_dict': {
	18	'id': '20201027-last-period',
	19	'ext': 'mp4',
	20	'title': 'Akidemy u11s v Bradford Boys u11s (Game 3)',
	21	'thumbnail': 're:https://c.veocdn.com/.+/thumbnail.jpg',
	22	'upload_date': '20201028',
	23	'timestamp': 1603847208,
	24	'duration': 1916,
f4ad9192	25	'view_count': int,
2333ea10	26	}
5a373d97	27	}, {
	28	'url': 'https://app.veo.co/matches/20220313-2022-03-13_u15m-plsjq-vs-csl/',
	29	'only_matching': True,
2333ea10	30	}]
	31
	32	def _real_extract(self, url):
	33	video_id = self._match_id(url)
	34
	35	metadata = self._download_json(
	36	'https://app.veo.co/api/app/matches/%s' % video_id, video_id)
	37
	38	video_data = self._download_json(
	39	'https://app.veo.co/api/app/matches/%s/videos' % video_id, video_id, 'Downloading video data')
	40
2333ea10	41	formats = []
2333ea10	42	for fmt in video_data:
f4ad9192	43	mimetype = str_or_none(fmt.get('mime_type'))
f4ad9192	44	format_url = url_or_none(fmt.get('url'))
2333ea10	45	# skip configuration file for panoramic video
f4ad9192	46	if not format_url or mimetype == 'video/mp2t':
2333ea10	47	continue
f4ad9192	48
2333ea10	49	height = int_or_none(fmt.get('height'))
f4ad9192	50	render_type = str_or_none(fmt.get('render_type'))
	51	format_id = f'{render_type}-{height}p' if render_type and height else None
	52
	53	# Veo returns panoramic video information even if panoramic video is not available.
	54	# e.g. https://app.veo.co/matches/20201027-last-period/
	55	if render_type == 'panorama':
	56	if not self._is_valid_url(format_url, video_id, format_id):
	57	continue
	58
2333ea10	59	formats.append({
f4ad9192	60	'url': format_url,
f4ad9192	61	'format_id': format_id,
2333ea10	62	'ext': mimetype2ext(mimetype),
	63	'width': int_or_none(fmt.get('width')),
	64	'height': height,
f4ad9192	65	'vbr': int_or_none(fmt.get('bit_rate'), scale=1000),
2333ea10	66	})
2333ea10	67
2333ea10	68	return {
2333ea10	69	'id': video_id,
f4ad9192	70	'title': str_or_none(metadata.get('title')),
2333ea10	71	'formats': formats,
f4ad9192	72	'thumbnail': url_or_none(metadata.get('thumbnail')),
	73	'timestamp': unified_timestamp(metadata.get('created')),
	74	'view_count': int_or_none(metadata.get('view_count')),
	75	'duration': int_or_none(metadata.get('duration')),
2333ea10	76	}