]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/servus.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / servus.py
1 from .common import InfoExtractor
2 from ..utils import (
3 ExtractorError,
4 float_or_none,
5 format_field,
6 int_or_none,
7 join_nonempty,
8 traverse_obj,
9 unescapeHTML,
10 unified_timestamp,
11 )
12
13
14 class ServusIE(InfoExtractor):
15 _VALID_URL = r'''(?x)
16 https?://
17 (?:www\.)?
18 (?:
19 servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
20 (?:servustv|pm-wissen)\.com/(?:[^/]+/)?v(?:ideos)?
21 )
22 /(?P<id>[aA]{2}-?\w+|\d+-\d+)
23 '''
24 _TESTS = [{
25 # URL schema v3
26 'url': 'https://www.servustv.com/natur/v/aa-28bycqnh92111/',
27 'info_dict': {
28 'id': 'AA-28BYCQNH92111',
29 'ext': 'mp4',
30 'title': 'Klettersteige in den Alpen',
31 'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
32 'thumbnail': r're:^https?://.*\.jpg',
33 'duration': 2823,
34 'timestamp': 1655752333,
35 'upload_date': '20220620',
36 'series': 'Bergwelten',
37 'season': 'Season 11',
38 'season_number': 11,
39 'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen',
40 'episode_number': 8,
41 },
42 'params': {'skip_download': 'm3u8'}
43 }, {
44 'url': 'https://www.servustv.com/natur/v/aa-1xg5xwmgw2112/',
45 'only_matching': True,
46 }, {
47 'url': 'https://www.servustv.com/natur/v/aansszcx3yi9jmlmhdc1/',
48 'only_matching': True,
49 }, {
50 # URL schema v2
51 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
52 'only_matching': True,
53 }, {
54 # URL schema v1
55 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
56 'only_matching': True,
57 }, {
58 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
59 'only_matching': True,
60 }, {
61 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
62 'only_matching': True,
63 }, {
64 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
65 'only_matching': True,
66 }, {
67 'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
68 'only_matching': True,
69 }]
70
71 def _real_extract(self, url):
72 video_id = self._match_id(url).upper()
73
74 video = self._download_json(
75 'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
76 video_id, 'Downloading video JSON', query={'videoId': video_id})
77 if not video.get('videoUrl'):
78 self._report_errors(video)
79 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
80 video['videoUrl'], video_id, 'mp4', m3u8_id='hls')
81
82 season = video.get('season')
83 season_number = int_or_none(self._search_regex(
84 r'Season (\d+)', season or '', 'season number', default=None))
85 episode = video.get('chapter')
86 episode_number = int_or_none(self._search_regex(
87 r'Episode (\d+)', episode or '', 'episode number', default=None))
88
89 return {
90 'id': video_id,
91 'title': video.get('title'),
92 'description': self._get_description(video_id) or video.get('description'),
93 'thumbnail': video.get('poster'),
94 'duration': float_or_none(video.get('duration')),
95 'timestamp': unified_timestamp(video.get('currentSunrise')),
96 'series': video.get('label'),
97 'season': season,
98 'season_number': season_number,
99 'episode': episode,
100 'episode_number': episode_number,
101 'formats': formats,
102 'subtitles': subtitles,
103 }
104
105 def _get_description(self, video_id):
106 info = self._download_json(
107 f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
108 video_id, fatal=False)
109
110 return join_nonempty(*traverse_obj(info, (
111 ('stv_short_description', 'stv_long_description'),
112 {lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
113
114 def _report_errors(self, video):
115 playability_errors = traverse_obj(video, ('playabilityErrors', ...))
116 if not playability_errors:
117 raise ExtractorError('No videoUrl and no information about errors')
118
119 elif 'FSK_BLOCKED' in playability_errors:
120 details = traverse_obj(video, ('playabilityErrorDetails', 'FSK_BLOCKED'), expected_type=dict)
121 message = format_field(''.join((
122 format_field(details, 'minEveningHour', ' from %02d:00'),
123 format_field(details, 'maxMorningHour', ' to %02d:00'),
124 format_field(details, 'minAge', ' (Minimum age %d)'),
125 )), None, 'Only available%s') or 'Blocked by FSK with unknown availability'
126
127 elif 'NOT_YET_AVAILABLE' in playability_errors:
128 message = format_field(
129 video, (('playabilityErrorDetails', 'NOT_YET_AVAILABLE', 'availableFrom'), 'currentSunrise'),
130 'Only available from %s') or 'Video not yet available with unknown availability'
131
132 else:
133 message = f'Video unavailable: {", ".join(playability_errors)}'
134
135 raise ExtractorError(message, expected=True)