[yt-dlp.git] / yt_dlp / extractor / sportdeutschland.py

from .common import InfoExtractor
from ..utils import (
    clean_html,
    float_or_none,
    int_or_none,
    parse_iso8601,
    parse_qs,
    strip_or_none,
    try_get,
)


class SportDeutschlandIE(InfoExtractor):
    _VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
    _TESTS = [{
        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
        'info_dict': {
            'id': '5318cac0275701382770543d7edaf0a0',
            'ext': 'mp4',
            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
            'duration': 16106.36,
        },
        'params': {
            'noplaylist': True,
            # m3u8 download
            'skip_download': True,
        },
    }, {
        'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
        'info_dict': {
            'id': 'c6e2fdd01f63013854c47054d2ab776f',
            'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
            'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
            'duration': 31397,
        },
        'playlist_count': 2,
    }, {
        'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
        'only_matching': True,
    }]

    def _real_extract(self, url):
        display_id = self._match_id(url)
        data = self._download_json(
            'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
            display_id, query={'access_token': 'true'})
        asset = data['asset']
        title = (asset.get('title') or asset['label']).strip()
        asset_id = asset.get('id') or asset.get('uuid')
        info = {
            'id': asset_id,
            'title': title,
            'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
            'duration': int_or_none(asset.get('seconds')),
        }
        videos = asset.get('videos') or []
        if len(videos) > 1:
            playlist_id = parse_qs(url).get('playlistId', [None])[0]
            if not self._yes_playlist(playlist_id, asset_id):
                videos = [videos[int(playlist_id)]]

            def entries():
                for i, video in enumerate(videos, 1):
                    video_id = video.get('uuid')
                    video_url = video.get('url')
                    if not (video_id and video_url):
                        continue
                    formats = self._extract_m3u8_formats(
                        video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
                    if not formats and not self.get_param('ignore_no_formats'):
                        continue
                    yield {
                        'id': video_id,
                        'formats': formats,
                        'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
                        'duration': float_or_none(video.get('duration')),
                    }
            info.update({
                '_type': 'multi_video',
                'entries': entries(),
            })
        else:
            formats = self._extract_m3u8_formats(
                videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
            section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
            info.update({
                'formats': formats,
                'display_id': asset.get('permalink'),
                'thumbnail': try_get(asset, lambda x: x['images'][0]),
                'categories': [section_title] if section_title else None,
                'view_count': int_or_none(asset.get('views')),
                'is_live': asset.get('is_live') is True,
                'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
            })
        return info
Commit	Line	Data
704df56d	1	from .common import InfoExtractor
1cc79574	2	from ..utils import (
10db0d2f	3	clean_html,
	4	float_or_none,
	5	int_or_none,
704df56d	6	parse_iso8601,
4dfbf869	7	parse_qs,
10db0d2f	8	strip_or_none,
10db0d2f	9	try_get,
704df56d PH	10	)
	11
	12
	13	class SportDeutschlandIE(InfoExtractor):
10db0d2f	14	_VALID_URL = r'https?://sportdeutschland\.tv/(?P<id>(?:[^/]+/)?[^?#/&]+)'
3524cc25	15	_TESTS = [{
0d006fac	16	'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
704df56d	17	'info_dict': {
10db0d2f	18	'id': '5318cac0275701382770543d7edaf0a0',
704df56d	19	'ext': 'mp4',
10db0d2f	20	'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals - Teil 1',
10db0d2f	21	'duration': 16106.36,
704df56d	22	},
10db0d2f	23	'params': {
	24	'noplaylist': True,
	25	# m3u8 download
	26	'skip_download': True,
	27	},
	28	}, {
	29	'url': 'https://sportdeutschland.tv/badminton/re-live-deutsche-meisterschaften-2020-halbfinals?playlistId=0',
	30	'info_dict': {
	31	'id': 'c6e2fdd01f63013854c47054d2ab776f',
	32	'title': 'Re-live: Deutsche Meisterschaften 2020 - Halbfinals',
	33	'description': 'md5:5263ff4c31c04bb780c9f91130b48530',
	34	'duration': 31397,
	35	},
	36	'playlist_count': 2,
	37	}, {
	38	'url': 'https://sportdeutschland.tv/freeride-world-tour-2021-fieberbrunn-oesterreich',
	39	'only_matching': True,
3524cc25	40	}]
704df56d PH	41
704df56d PH	42	def _real_extract(self, url):
10db0d2f	43	display_id = self._match_id(url)
	44	data = self._download_json(
	45	'https://backend.sportdeutschland.tv/api/permalinks/' + display_id,
	46	display_id, query={'access_token': 'true'})
704df56d	47	asset = data['asset']
10db0d2f	48	title = (asset.get('title') or asset['label']).strip()
	49	asset_id = asset.get('id') or asset.get('uuid')
	50	info = {
	51	'id': asset_id,
	52	'title': title,
	53	'description': clean_html(asset.get('body') or asset.get('description')) or asset.get('teaser'),
	54	'duration': int_or_none(asset.get('seconds')),
	55	}
	56	videos = asset.get('videos') or []
	57	if len(videos) > 1:
4dfbf869	58	playlist_id = parse_qs(url).get('playlistId', [None])[0]
f40ee5e9	59	if not self._yes_playlist(playlist_id, asset_id):
f40ee5e9	60	videos = [videos[int(playlist_id)]]
3524cc25	61
10db0d2f	62	def entries():
	63	for i, video in enumerate(videos, 1):
	64	video_id = video.get('uuid')
	65	video_url = video.get('url')
	66	if not (video_id and video_url):
	67	continue
	68	formats = self._extract_m3u8_formats(
	69	video_url.replace('.smil', '.m3u8'), video_id, 'mp4', fatal=False)
a06916d9	70	if not formats and not self.get_param('ignore_no_formats'):
10db0d2f	71	continue
	72	yield {
	73	'id': video_id,
	74	'formats': formats,
	75	'title': title + ' - ' + (video.get('label') or 'Teil %d' % i),
	76	'duration': float_or_none(video.get('duration')),
	77	}
	78	info.update({
	79	'_type': 'multi_video',
	80	'entries': entries(),
	81	})
3524cc25	82	else:
10db0d2f	83	formats = self._extract_m3u8_formats(
	84	videos[0]['url'].replace('.smil', '.m3u8'), asset_id, 'mp4')
	85	section_title = strip_or_none(try_get(data, lambda x: x['section']['title']))
	86	info.update({
	87	'formats': formats,
	88	'display_id': asset.get('permalink'),
	89	'thumbnail': try_get(asset, lambda x: x['images'][0]),
	90	'categories': [section_title] if section_title else None,
	91	'view_count': int_or_none(asset.get('views')),
	92	'is_live': asset.get('is_live') is True,
	93	'timestamp': parse_iso8601(asset.get('date') or asset.get('published_at')),
	94	})
	95	return info