]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/magentamusik.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / magentamusik.py
1 from .common import InfoExtractor
2 from ..utils import ExtractorError, int_or_none, join_nonempty, url_or_none
3 from ..utils.traversal import traverse_obj
4
5
6 class MagentaMusikIE(InfoExtractor):
7 _VALID_URL = r'https?://(?:www\.)?magentamusik\.de/(?P<id>[^/?#]+)'
8
9 _TESTS = [{
10 'url': 'https://www.magentamusik.de/marty-friedman-woa-2023-9208205928595409235',
11 'md5': 'd82dd4748f55fc91957094546aaf8584',
12 'info_dict': {
13 'id': '9208205928595409235',
14 'display_id': 'marty-friedman-woa-2023-9208205928595409235',
15 'ext': 'mp4',
16 'title': 'Marty Friedman: W:O:A 2023',
17 'alt_title': 'Konzert vom: 05.08.2023 13:00',
18 'duration': 2760,
19 'categories': ['Musikkonzert'],
20 'release_year': 2023,
21 'location': 'Deutschland',
22 }
23 }]
24
25 def _real_extract(self, url):
26 display_id = self._match_id(url)
27 webpage = self._download_webpage(url, display_id)
28 player_config = self._search_json(
29 r'data-js-element="o-video-player__config">', webpage, 'player config', display_id, fatal=False)
30 if not player_config:
31 raise ExtractorError('No video found', expected=True)
32
33 asset_id = player_config['assetId']
34 asset_details = self._download_json(
35 f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/assetdetails/58938/{asset_id}',
36 display_id, note='Downloading asset details')
37
38 video_id = traverse_obj(
39 asset_details, ('content', 'partnerInformation', ..., 'reference', {str}), get_all=False)
40 if not video_id:
41 raise ExtractorError('Unable to extract video id')
42
43 vod_data = self._download_json(
44 f'https://wcps.t-online.de/cvss/magentamusic/vodclient/v2/player/58935/{video_id}/Main%20Movie', video_id)
45 smil_url = traverse_obj(
46 vod_data, ('content', 'feature', 'representations', ...,
47 'contentPackages', ..., 'media', 'href', {url_or_none}), get_all=False)
48
49 return {
50 'id': video_id,
51 'display_id': display_id,
52 'formats': self._extract_smil_formats(smil_url, video_id),
53 **traverse_obj(vod_data, ('content', 'feature', 'metadata', {
54 'title': 'title',
55 'alt_title': 'originalTitle',
56 'description': 'longDescription',
57 'duration': ('runtimeInSeconds', {int_or_none}),
58 'location': ('countriesOfProduction', {list}, {lambda x: join_nonempty(*x, delim=', ')}),
59 'release_year': ('yearOfProduction', {int_or_none}),
60 'categories': ('mainGenre', {str}, {lambda x: x and [x]}),
61 })),
62 }