]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bibeltv.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / bibeltv.py
CommitLineData
e3a3ed8a 1import functools
4ad58667 2
00dd0cd5 3from .common import InfoExtractor
4ad58667
M
4from ..utils import (
5 ExtractorError,
6 clean_html,
7 determine_ext,
8 format_field,
9 int_or_none,
10 js_to_json,
11 orderedSet,
12 parse_iso8601,
13 traverse_obj,
14 url_or_none,
15)
16
17
18class BibelTVBaseIE(InfoExtractor):
19 _GEO_COUNTRIES = ['AT', 'CH', 'DE']
20 _GEO_BYPASS = False
21
22 API_URL = 'https://www.bibeltv.de/mediathek/api'
23 AUTH_TOKEN = 'j88bRXY8DsEqJ9xmTdWhrByVi5Hm'
24
25 def _extract_formats_and_subtitles(self, data, crn_id, *, is_live=False):
26 formats = []
27 subtitles = {}
28 for media_url in traverse_obj(data, (..., 'src', {url_or_none})):
29 media_ext = determine_ext(media_url)
30 if media_ext == 'm3u8':
31 m3u8_formats, m3u8_subs = self._extract_m3u8_formats_and_subtitles(
32 media_url, crn_id, live=is_live)
33 formats.extend(m3u8_formats)
34 subtitles.update(m3u8_subs)
35 elif media_ext == 'mpd':
36 mpd_formats, mpd_subs = self._extract_mpd_formats_and_subtitles(media_url, crn_id)
37 formats.extend(mpd_formats)
38 subtitles.update(mpd_subs)
39 elif media_ext == 'mp4':
40 formats.append({'url': media_url})
41 else:
42 self.report_warning(f'Unknown format {media_ext!r}')
43
44 return formats, subtitles
45
46 @staticmethod
47 def _extract_base_info(data):
48 return {
49 'id': data['crn'],
50 **traverse_obj(data, {
51 'title': 'title',
52 'description': 'description',
e3a3ed8a 53 'duration': ('duration', {functools.partial(int_or_none, scale=1000)}),
4ad58667
M
54 'timestamp': ('schedulingStart', {parse_iso8601}),
55 'season_number': 'seasonNumber',
56 'episode_number': 'episodeNumber',
57 'view_count': 'viewCount',
58 'like_count': 'likeCount',
59 }),
60 'thumbnails': orderedSet(traverse_obj(data, ('images', ..., {
61 'url': ('url', {url_or_none}),
62 }))),
63 }
64
65 def _extract_url_info(self, data):
66 return {
67 '_type': 'url',
68 'url': format_field(data, 'slug', 'https://www.bibeltv.de/mediathek/videos/%s'),
69 **self._extract_base_info(data),
70 }
71
72 def _extract_video_info(self, data):
73 crn_id = data['crn']
00dd0cd5 74
4ad58667
M
75 if data.get('drm'):
76 self.report_drm(crn_id)
77
78 json_data = self._download_json(
79 format_field(data, 'id', f'{self.API_URL}/video/%s'), crn_id,
80 headers={'Authorization': self.AUTH_TOKEN}, fatal=False,
81 errnote='No formats available') or {}
82
83 formats, subtitles = self._extract_formats_and_subtitles(
84 traverse_obj(json_data, ('video', 'videoUrls', ...)), crn_id)
85
86 return {
87 '_type': 'video',
88 **self._extract_base_info(data),
89 'formats': formats,
90 'subtitles': subtitles,
91 }
92
93
94class BibelTVVideoIE(BibelTVBaseIE):
95 IE_DESC = 'BibelTV single video'
96 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/videos/(?P<id>\d+)[\w-]+'
97 IE_NAME = 'bibeltv:video'
00dd0cd5 98
00dd0cd5 99 _TESTS = [{
4ad58667
M
100 'url': 'https://www.bibeltv.de/mediathek/videos/344436-alte-wege',
101 'md5': 'ec1c07efe54353780512e8a4103b612e',
00dd0cd5 102 'info_dict': {
4ad58667 103 'id': '344436',
00dd0cd5 104 'ext': 'mp4',
4ad58667
M
105 'title': 'Alte Wege',
106 'description': 'md5:2f4eb7294c9797a47b8fd13cccca22e9',
107 'timestamp': 1677877071,
108 'duration': 150.0,
109 'upload_date': '20230303',
110 'thumbnail': r're:https://bibeltv\.imgix\.net/[\w-]+\.jpg',
111 'episode': 'Episode 1',
112 'episode_number': 1,
113 'view_count': int,
114 'like_count': int,
115 },
116 'params': {
117 'format': '6',
118 },
119 }]
120
121 def _real_extract(self, url):
122 crn_id = self._match_id(url)
123 video_data = traverse_obj(
124 self._search_nextjs_data(self._download_webpage(url, crn_id), crn_id),
125 ('props', 'pageProps', 'videoPageData', 'videos', 0, {dict}))
126 if not video_data:
127 raise ExtractorError('Missing video data.')
128
129 return self._extract_video_info(video_data)
130
131
132class BibelTVSeriesIE(BibelTVBaseIE):
133 IE_DESC = 'BibelTV series playlist'
134 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/mediathek/serien/(?P<id>\d+)[\w-]+'
135 IE_NAME = 'bibeltv:series'
136
137 _TESTS = [{
138 'url': 'https://www.bibeltv.de/mediathek/serien/333485-ein-wunder-fuer-jeden-tag',
139 'playlist_mincount': 400,
140 'info_dict': {
141 'id': '333485',
142 'title': 'Ein Wunder für jeden Tag',
143 'description': 'Tägliche Kurzandacht mit Déborah Rosenkranz.',
144 },
145 }]
146
147 def _real_extract(self, url):
148 crn_id = self._match_id(url)
149 webpage = self._download_webpage(url, crn_id)
150 nextjs_data = self._search_nextjs_data(webpage, crn_id)
151 series_data = traverse_obj(nextjs_data, ('props', 'pageProps', 'seriePageData', {dict}))
152 if not series_data:
153 raise ExtractorError('Missing series data.')
154
155 return self.playlist_result(
156 traverse_obj(series_data, ('videos', ..., {dict}, {self._extract_url_info})),
157 crn_id, series_data.get('title'), clean_html(series_data.get('description')))
158
159
160class BibelTVLiveIE(BibelTVBaseIE):
161 IE_DESC = 'BibelTV live program'
162 _VALID_URL = r'https?://(?:www\.)?bibeltv\.de/livestreams/(?P<id>[\w-]+)'
163 IE_NAME = 'bibeltv:live'
164
165 _TESTS = [{
166 'url': 'https://www.bibeltv.de/livestreams/bibeltv/',
167 'info_dict': {
168 'id': 'bibeltv',
169 'ext': 'mp4',
170 'title': 're:Bibel TV',
171 'live_status': 'is_live',
172 'thumbnail': 'https://streampreview.bibeltv.de/bibeltv.webp',
173 },
174 'params': {'skip_download': 'm3u8'},
00dd0cd5 175 }, {
4ad58667 176 'url': 'https://www.bibeltv.de/livestreams/impuls/',
00dd0cd5 177 'only_matching': True,
178 }]
00dd0cd5 179
180 def _real_extract(self, url):
4ad58667
M
181 stream_id = self._match_id(url)
182 webpage = self._download_webpage(url, stream_id)
183 stream_data = self._search_json(
184 r'\\"video\\":', webpage, 'bibeltvData', stream_id,
185 transform_source=lambda jstring: js_to_json(jstring.replace('\\"', '"')))
186
187 formats, subtitles = self._extract_formats_and_subtitles(
188 traverse_obj(stream_data, ('src', ...)), stream_id, is_live=True)
189
190 return {
191 'id': stream_id,
192 'title': stream_data.get('title'),
193 'thumbnail': stream_data.get('poster'),
194 'is_live': True,
195 'formats': formats,
196 'subtitles': subtitles,
197 }