]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/servus.py
[ie/orf:on] Improve extraction (#9677)
[yt-dlp.git] / yt_dlp / extractor / servus.py
CommitLineData
e1d168e5 1from .common import InfoExtractor
8bdd16b4 2from ..utils import (
f40e32fb 3 ExtractorError,
8bdd16b4 4 float_or_none,
f40e32fb 5 format_field,
8bdd16b4 6 int_or_none,
f40e32fb
SL
7 join_nonempty,
8 traverse_obj,
9 unescapeHTML,
8bdd16b4 10 unified_timestamp,
8bdd16b4 11)
e1d168e5
AS
12
13
14class ServusIE(InfoExtractor):
3b5399ce
S
15 _VALID_URL = r'''(?x)
16 https?://
17 (?:www\.)?
18 (?:
19 servus\.com/(?:(?:at|de)/p/[^/]+|tv/videos)|
f40e32fb 20 (?:servustv|pm-wissen)\.com/(?:[^/]+/)?v(?:ideos)?
3b5399ce 21 )
f40e32fb 22 /(?P<id>[aA]{2}-?\w+|\d+-\d+)
3b5399ce 23 '''
e1d168e5 24 _TESTS = [{
f40e32fb
SL
25 # URL schema v3
26 'url': 'https://www.servustv.com/natur/v/aa-28bycqnh92111/',
e1d168e5 27 'info_dict': {
f40e32fb 28 'id': 'AA-28BYCQNH92111',
e1d168e5 29 'ext': 'mp4',
f40e32fb
SL
30 'title': 'Klettersteige in den Alpen',
31 'description': 'md5:25e47ddd83a009a0f9789ba18f2850ce',
55b8588f 32 'thumbnail': r're:^https?://.*\.jpg',
f40e32fb
SL
33 'duration': 2823,
34 'timestamp': 1655752333,
35 'upload_date': '20220620',
36 'series': 'Bergwelten',
37 'season': 'Season 11',
38 'season_number': 11,
39 'episode': 'Episode 8 - Vie Ferrate – Klettersteige in den Alpen',
40 'episode_number': 8,
41 },
42 'params': {'skip_download': 'm3u8'}
43 }, {
44 'url': 'https://www.servustv.com/natur/v/aa-1xg5xwmgw2112/',
45 'only_matching': True,
46 }, {
47 'url': 'https://www.servustv.com/natur/v/aansszcx3yi9jmlmhdc1/',
48 'only_matching': True,
3b5399ce 49 }, {
f40e32fb
SL
50 # URL schema v2
51 'url': 'https://www.servustv.com/videos/aa-1t6vbu5pw1w12/',
52 'only_matching': True,
53 }, {
54 # URL schema v1
3b5399ce
S
55 'url': 'https://www.servus.com/de/p/Die-Gr%C3%BCnen-aus-Sicht-des-Volkes/AA-1T6VBU5PW1W12/',
56 'only_matching': True,
e1d168e5
AS
57 }, {
58 'url': 'https://www.servus.com/at/p/Wie-das-Leben-beginnt/1309984137314-381415152/',
59 'only_matching': True,
55b8588f
S
60 }, {
61 'url': 'https://www.servus.com/tv/videos/aa-1t6vbu5pw1w12/',
62 'only_matching': True,
63 }, {
64 'url': 'https://www.servus.com/tv/videos/1380889096408-1235196658/',
65 'only_matching': True,
8bdd16b4 66 }, {
67 'url': 'https://www.pm-wissen.com/videos/aa-24mus4g2w2112/',
68 'only_matching': True,
e1d168e5
AS
69 }]
70
71 def _real_extract(self, url):
55b8588f 72 video_id = self._match_id(url).upper()
e1d168e5 73
8bdd16b4 74 video = self._download_json(
f40e32fb
SL
75 'https://api-player.redbull.com/stv/servus-tv?timeZone=Europe/Berlin',
76 video_id, 'Downloading video JSON', query={'videoId': video_id})
77 if not video.get('videoUrl'):
78 self._report_errors(video)
79 formats, subtitles = self._extract_m3u8_formats_and_subtitles(
80 video['videoUrl'], video_id, 'mp4', m3u8_id='hls')
8bdd16b4 81
f40e32fb 82 season = video.get('season')
8bdd16b4 83 season_number = int_or_none(self._search_regex(
84 r'Season (\d+)', season or '', 'season number', default=None))
f40e32fb 85 episode = video.get('chapter')
8bdd16b4 86 episode_number = int_or_none(self._search_regex(
87 r'Episode (\d+)', episode or '', 'episode number', default=None))
88
e1d168e5
AS
89 return {
90 'id': video_id,
f40e32fb
SL
91 'title': video.get('title'),
92 'description': self._get_description(video_id) or video.get('description'),
93 'thumbnail': video.get('poster'),
94 'duration': float_or_none(video.get('duration')),
95 'timestamp': unified_timestamp(video.get('currentSunrise')),
96 'series': video.get('label'),
8bdd16b4 97 'season': season,
98 'season_number': season_number,
99 'episode': episode,
100 'episode_number': episode_number,
e1d168e5 101 'formats': formats,
f40e32fb 102 'subtitles': subtitles,
e1d168e5 103 }
f40e32fb
SL
104
105 def _get_description(self, video_id):
106 info = self._download_json(
107 f'https://backend.servustv.com/wp-json/rbmh/v2/media_asset/aa_id/{video_id}?fieldset=page',
108 video_id, fatal=False)
109
110 return join_nonempty(*traverse_obj(info, (
111 ('stv_short_description', 'stv_long_description'),
112 {lambda x: unescapeHTML(x.replace('\n\n', '\n'))})), delim='\n\n')
113
114 def _report_errors(self, video):
115 playability_errors = traverse_obj(video, ('playabilityErrors', ...))
116 if not playability_errors:
117 raise ExtractorError('No videoUrl and no information about errors')
118
119 elif 'FSK_BLOCKED' in playability_errors:
120 details = traverse_obj(video, ('playabilityErrorDetails', 'FSK_BLOCKED'), expected_type=dict)
121 message = format_field(''.join((
122 format_field(details, 'minEveningHour', ' from %02d:00'),
123 format_field(details, 'maxMorningHour', ' to %02d:00'),
124 format_field(details, 'minAge', ' (Minimum age %d)'),
125 )), None, 'Only available%s') or 'Blocked by FSK with unknown availability'
126
127 elif 'NOT_YET_AVAILABLE' in playability_errors:
128 message = format_field(
129 video, (('playabilityErrorDetails', 'NOT_YET_AVAILABLE', 'availableFrom'), 'currentSunrise'),
130 'Only available from %s') or 'Video not yet available with unknown availability'
131
132 else:
133 message = f'Video unavailable: {", ".join(playability_errors)}'
134
135 raise ExtractorError(message, expected=True)