]>
Commit | Line | Data |
---|---|---|
e3a3ed8a | 1 | import functools |
d5822b96 PH |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
5f009a09 | 6 | OnDemandPagedList, |
b6951271 | 7 | bug_reports_message, |
f9b85496 | 8 | determine_ext, |
6d3d3fc0 | 9 | int_or_none, |
5f009a09 | 10 | join_nonempty, |
c099ec93 | 11 | jwt_decode_hs256, |
5f009a09 | 12 | make_archive_id, |
6d3d3fc0 | 13 | parse_duration, |
5f009a09 | 14 | parse_iso8601, |
15 | remove_start, | |
75258218 | 16 | str_or_none, |
6d3d3fc0 | 17 | unified_strdate, |
31eeab9f | 18 | update_url_query, |
3052a30d | 19 | url_or_none, |
75258218 | 20 | xpath_text, |
d5822b96 | 21 | ) |
5f009a09 | 22 | from ..utils.traversal import traverse_obj |
d5822b96 | 23 | |
f9b85496 | 24 | |
c968f738 RA |
25 | class ARDMediathekBaseIE(InfoExtractor): |
26 | _GEO_COUNTRIES = ['DE'] | |
1c821227 | 27 | |
e37c92ec S |
28 | def _extract_media_info(self, media_info_url, webpage, video_id): |
29 | media_info = self._download_json( | |
30 | media_info_url, video_id, 'Downloading media JSON') | |
c968f738 | 31 | return self._parse_media_info(media_info, video_id, '"fsk"' in webpage) |
e37c92ec | 32 | |
c968f738 | 33 | def _parse_media_info(self, media_info, video_id, fsk): |
e37c92ec S |
34 | formats = self._extract_formats(media_info, video_id) |
35 | ||
36 | if not formats: | |
c968f738 | 37 | if fsk: |
b7da73eb | 38 | self.raise_no_formats( |
e37c92ec S |
39 | 'This video is only available after 20:00', expected=True) |
40 | elif media_info.get('_geoblocked'): | |
c968f738 RA |
41 | self.raise_geo_restricted( |
42 | 'This video is not available due to geoblocking', | |
b7da73eb | 43 | countries=self._GEO_COUNTRIES, metadata_available=True) |
e37c92ec | 44 | |
e37c92ec S |
45 | subtitles = {} |
46 | subtitle_url = media_info.get('_subtitleUrl') | |
47 | if subtitle_url: | |
48 | subtitles['de'] = [{ | |
ffa2cecf | 49 | 'ext': 'ttml', |
e37c92ec | 50 | 'url': subtitle_url, |
d61ef7f3 GM |
51 | }, { |
52 | 'ext': 'vtt', | |
53 | 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt', | |
e37c92ec S |
54 | }] |
55 | ||
56 | return { | |
57 | 'id': video_id, | |
c968f738 RA |
58 | 'duration': int_or_none(media_info.get('_duration')), |
59 | 'thumbnail': media_info.get('_previewImage'), | |
60 | 'is_live': media_info.get('_isLive') is True, | |
e37c92ec S |
61 | 'formats': formats, |
62 | 'subtitles': subtitles, | |
63 | } | |
64 | ||
65 | def _extract_formats(self, media_info, video_id): | |
66 | type_ = media_info.get('_type') | |
67 | media_array = media_info.get('_mediaArray', []) | |
68 | formats = [] | |
69 | for num, media in enumerate(media_array): | |
70 | for stream in media.get('_mediaStreamArray', []): | |
71 | stream_urls = stream.get('_stream') | |
72 | if not stream_urls: | |
73 | continue | |
74 | if not isinstance(stream_urls, list): | |
75 | stream_urls = [stream_urls] | |
76 | quality = stream.get('_quality') | |
77 | server = stream.get('_server') | |
78 | for stream_url in stream_urls: | |
3052a30d | 79 | if not url_or_none(stream_url): |
91328f26 | 80 | continue |
e37c92ec | 81 | ext = determine_ext(stream_url) |
1fc0b47f | 82 | if quality != 'auto' and ext in ('f4m', 'm3u8'): |
83 | continue | |
e37c92ec | 84 | if ext == 'f4m': |
7e5edcfd | 85 | formats.extend(self._extract_f4m_formats( |
31eeab9f RA |
86 | update_url_query(stream_url, { |
87 | 'hdcore': '3.1.1', | |
88 | 'plugin': 'aasp-3.1.1.69.124' | |
c968f738 | 89 | }), video_id, f4m_id='hds', fatal=False)) |
e37c92ec | 90 | elif ext == 'm3u8': |
7e5edcfd | 91 | formats.extend(self._extract_m3u8_formats( |
c968f738 RA |
92 | stream_url, video_id, 'mp4', 'm3u8_native', |
93 | m3u8_id='hls', fatal=False)) | |
e37c92ec S |
94 | else: |
95 | if server and server.startswith('rtmp'): | |
96 | f = { | |
97 | 'url': server, | |
98 | 'play_path': stream_url, | |
99 | 'format_id': 'a%s-rtmp-%s' % (num, quality), | |
100 | } | |
91328f26 | 101 | else: |
e37c92ec S |
102 | f = { |
103 | 'url': stream_url, | |
104 | 'format_id': 'a%s-%s-%s' % (num, ext, quality) | |
105 | } | |
c968f738 RA |
106 | m = re.search( |
107 | r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', | |
108 | stream_url) | |
e37c92ec S |
109 | if m: |
110 | f.update({ | |
111 | 'width': int(m.group('width')), | |
112 | 'height': int(m.group('height')), | |
113 | }) | |
114 | if type_ == 'audio': | |
115 | f['vcodec'] = 'none' | |
116 | formats.append(f) | |
117 | return formats | |
118 | ||
c968f738 | 119 | |
6d3d3fc0 | 120 | class ARDIE(InfoExtractor): |
14eb1ee1 | 121 | _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' |
ad29ef04 | 122 | _TESTS = [{ |
d61ef7f3 GM |
123 | # available till 7.12.2023 |
124 | 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', | |
1f8bd8eb | 125 | 'md5': '94812e6438488fb923c361a44469614b', |
ad29ef04 | 126 | 'info_dict': { |
d61ef7f3 GM |
127 | 'id': 'maischberger-video-424', |
128 | 'display_id': 'maischberger-video-424', | |
ad29ef04 | 129 | 'ext': 'mp4', |
d61ef7f3 GM |
130 | 'duration': 4452.0, |
131 | 'title': 'maischberger am 07.12.2022', | |
132 | 'upload_date': '20221207', | |
ad29ef04 W |
133 | 'thumbnail': r're:^https?://.*\.jpg$', |
134 | }, | |
a54c5f83 | 135 | }, { |
14eb1ee1 | 136 | 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html', |
137 | 'only_matching': True, | |
138 | }, { | |
139 | 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html', | |
a54c5f83 | 140 | 'only_matching': True, |
f17c7022 OF |
141 | }, { |
142 | 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html', | |
143 | 'only_matching': True, | |
d6a03502 | 144 | }, { |
6d3d3fc0 | 145 | 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', |
d6a03502 | 146 | 'only_matching': True, |
14eb1ee1 | 147 | }, { |
148 | 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html', | |
149 | 'only_matching': True, | |
150 | }, { | |
151 | 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html', | |
152 | 'only_matching': True, | |
ad29ef04 | 153 | }] |
6d3d3fc0 PH |
154 | |
155 | def _real_extract(self, url): | |
5ad28e7f | 156 | mobj = self._match_valid_url(url) |
14eb1ee1 | 157 | display_id = mobj.group('id') |
6d3d3fc0 PH |
158 | |
159 | player_url = mobj.group('mainurl') + '~playerXml.xml' | |
160 | doc = self._download_xml(player_url, display_id) | |
161 | video_node = doc.find('./video') | |
bf0ff932 PH |
162 | upload_date = unified_strdate(xpath_text( |
163 | video_node, './broadcastDate')) | |
164 | thumbnail = xpath_text(video_node, './/teaserImage//variant/url') | |
6d3d3fc0 PH |
165 | |
166 | formats = [] | |
167 | for a in video_node.findall('.//asset'): | |
bc2ca1bb | 168 | file_name = xpath_text(a, './fileName', default=None) |
169 | if not file_name: | |
170 | continue | |
171 | format_type = a.attrib.get('type') | |
172 | format_url = url_or_none(file_name) | |
173 | if format_url: | |
174 | ext = determine_ext(file_name) | |
175 | if ext == 'm3u8': | |
176 | formats.extend(self._extract_m3u8_formats( | |
177 | format_url, display_id, 'mp4', entry_protocol='m3u8_native', | |
178 | m3u8_id=format_type or 'hls', fatal=False)) | |
179 | continue | |
180 | elif ext == 'f4m': | |
181 | formats.extend(self._extract_f4m_formats( | |
182 | update_url_query(format_url, {'hdcore': '3.7.0'}), | |
183 | display_id, f4m_id=format_type or 'hds', fatal=False)) | |
184 | continue | |
6d3d3fc0 | 185 | f = { |
bc2ca1bb | 186 | 'format_id': format_type, |
187 | 'width': int_or_none(xpath_text(a, './frameWidth')), | |
188 | 'height': int_or_none(xpath_text(a, './frameHeight')), | |
189 | 'vbr': int_or_none(xpath_text(a, './bitrateVideo')), | |
190 | 'abr': int_or_none(xpath_text(a, './bitrateAudio')), | |
191 | 'vcodec': xpath_text(a, './codecVideo'), | |
192 | 'tbr': int_or_none(xpath_text(a, './totalBitrate')), | |
6d3d3fc0 | 193 | } |
bc2ca1bb | 194 | server_prefix = xpath_text(a, './serverPrefix', default=None) |
195 | if server_prefix: | |
196 | f.update({ | |
197 | 'url': server_prefix, | |
198 | 'playpath': file_name, | |
199 | }) | |
6d3d3fc0 | 200 | else: |
bc2ca1bb | 201 | if not format_url: |
202 | continue | |
203 | f['url'] = format_url | |
6d3d3fc0 | 204 | formats.append(f) |
6d3d3fc0 | 205 | |
ad9158d5 F |
206 | _SUB_FORMATS = ( |
207 | ('./dataTimedText', 'ttml'), | |
208 | ('./dataTimedTextNoOffset', 'ttml'), | |
209 | ('./dataTimedTextVtt', 'vtt'), | |
210 | ) | |
211 | ||
212 | subtitles = {} | |
213 | for subsel, subext in _SUB_FORMATS: | |
214 | for node in video_node.findall(subsel): | |
215 | subtitles.setdefault('de', []).append({ | |
216 | 'url': node.attrib['url'], | |
217 | 'ext': subext, | |
218 | }) | |
219 | ||
6d3d3fc0 | 220 | return { |
14eb1ee1 | 221 | 'id': xpath_text(video_node, './videoId', default=display_id), |
6d3d3fc0 | 222 | 'formats': formats, |
ad9158d5 | 223 | 'subtitles': subtitles, |
6d3d3fc0 PH |
224 | 'display_id': display_id, |
225 | 'title': video_node.find('./title').text, | |
226 | 'duration': parse_duration(video_node.find('./duration').text), | |
227 | 'upload_date': upload_date, | |
228 | 'thumbnail': thumbnail, | |
229 | } | |
c1a37eb2 PH |
230 | |
231 | ||
5f009a09 | 232 | class ARDBetaMediathekIE(InfoExtractor): |
233 | IE_NAME = 'ARDMediathek' | |
14a08605 | 234 | _VALID_URL = r'''(?x)https:// |
235 | (?:(?:beta|www)\.)?ardmediathek\.de/ | |
5f009a09 | 236 | (?:[^/]+/)? |
237 | (?:player|live|video)/ | |
b6951271 | 238 | (?:[^?#]+/)? |
5f009a09 | 239 | (?P<id>[a-zA-Z0-9]+) |
240 | /?(?:[?#]|$)''' | |
241 | _GEO_COUNTRIES = ['DE'] | |
c099ec93 | 242 | _TOKEN_URL = 'https://sso.ardmediathek.de/sso/token' |
14a08605 | 243 | |
c1a37eb2 | 244 | _TESTS = [{ |
1f8bd8eb LS |
245 | 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', |
246 | 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', | |
f78eb41e | 247 | 'info_dict': { |
b6951271 SS |
248 | 'display_id': 'Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', |
249 | 'id': '12939099', | |
1f8bd8eb LS |
250 | 'title': 'Liebe auf vier Pfoten', |
251 | 'description': r're:^Claudia Schmitt, Anwältin in Salzburg', | |
252 | 'duration': 5222, | |
253 | 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', | |
254 | 'timestamp': 1701343800, | |
255 | 'upload_date': '20231130', | |
f78eb41e | 256 | 'ext': 'mp4', |
1f8bd8eb | 257 | 'episode': 'Liebe auf vier Pfoten', |
5f009a09 | 258 | 'series': 'Filme im MDR', |
259 | 'age_limit': 0, | |
260 | 'channel': 'MDR', | |
b6951271 | 261 | '_old_archive_ids': ['ardbetamediathek Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0'], |
f78eb41e SL |
262 | }, |
263 | }, { | |
a820dc72 RA |
264 | 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', |
265 | 'md5': 'a1dc75a39c61601b980648f7c9f9f71d', | |
c1a37eb2 PH |
266 | 'info_dict': { |
267 | 'display_id': 'die-robuste-roswita', | |
a820dc72 | 268 | 'id': '78566716', |
c968f738 | 269 | 'title': 'Die robuste Roswita', |
a820dc72 | 270 | 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita', |
c1a37eb2 | 271 | 'duration': 5316, |
a820dc72 RA |
272 | 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard', |
273 | 'timestamp': 1596658200, | |
274 | 'upload_date': '20200805', | |
c1a37eb2 PH |
275 | 'ext': 'mp4', |
276 | }, | |
14a08605 | 277 | 'skip': 'Error', |
278 | }, { | |
279 | 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', | |
f78eb41e | 280 | 'md5': '1e73ded21cb79bac065117e80c81dc88', |
14a08605 | 281 | 'info_dict': { |
b6951271 | 282 | 'id': '10049223', |
14a08605 | 283 | 'ext': 'mp4', |
284 | 'title': 'tagesschau, 20:00 Uhr', | |
285 | 'timestamp': 1636398000, | |
286 | 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', | |
287 | 'upload_date': '20211108', | |
b6951271 | 288 | 'display_id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', |
f78eb41e SL |
289 | 'duration': 915, |
290 | 'episode': 'tagesschau, 20:00 Uhr', | |
291 | 'series': 'tagesschau', | |
1f8bd8eb | 292 | 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', |
5f009a09 | 293 | 'channel': 'ARD-Aktuell', |
b6951271 | 294 | '_old_archive_ids': ['ardbetamediathek Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll'], |
5f009a09 | 295 | }, |
296 | }, { | |
297 | 'url': 'https://www.ardmediathek.de/video/7-tage/7-tage-unter-harten-jungs/hr-fernsehen/N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', | |
298 | 'md5': 'c428b9effff18ff624d4f903bda26315', | |
299 | 'info_dict': { | |
b6951271 | 300 | 'id': '94834686', |
5f009a09 | 301 | 'ext': 'mp4', |
302 | 'duration': 2700, | |
303 | 'episode': '7 Tage ... unter harten Jungs', | |
304 | 'description': 'md5:0f215470dcd2b02f59f4bd10c963f072', | |
305 | 'upload_date': '20231005', | |
306 | 'timestamp': 1696491171, | |
b6951271 | 307 | 'display_id': 'N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3', |
5f009a09 | 308 | 'series': '7 Tage ...', |
309 | 'channel': 'HR', | |
310 | 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:f6e6d5ffac41925c?w=960&ch=fa32ba69bc87989a', | |
311 | 'title': '7 Tage ... unter harten Jungs', | |
b6951271 | 312 | '_old_archive_ids': ['ardbetamediathek N2I2YmM5MzgtNWFlOS00ZGFlLTg2NzMtYzNjM2JlNjk4MDg3'], |
409cdd1e | 313 | }, |
fe515e5c S |
314 | }, { |
315 | 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | |
316 | 'only_matching': True, | |
317 | }, { | |
318 | 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/', | |
319 | 'only_matching': True, | |
320 | }, { | |
321 | 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/', | |
322 | 'only_matching': True, | |
1c821227 S |
323 | }, { |
324 | 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', | |
325 | 'only_matching': True, | |
326 | }, { | |
327 | 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', | |
328 | 'only_matching': True, | |
5f009a09 | 329 | }, { |
330 | 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', | |
331 | 'only_matching': True, | |
332 | }] | |
333 | ||
334 | def _extract_episode_info(self, title): | |
335 | patterns = [ | |
336 | # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" | |
337 | # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw | |
338 | r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*', | |
339 | # E.g.: title="Fritjof aus Norwegen (2) (AD)" | |
340 | # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ | |
341 | r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*', | |
342 | r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*', | |
343 | # E.g.: title="Folge 25/42: Symmetrie" | |
344 | # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ | |
345 | # E.g.: title="Folge 1063 - Vertrauen" | |
346 | # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ | |
347 | r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*', | |
348 | # As a fallback use the full title | |
349 | r'(?P<title>.*)', | |
350 | ] | |
351 | ||
e3a3ed8a | 352 | return traverse_obj(patterns, (..., {functools.partial(re.match, string=title)}, { |
5f009a09 | 353 | 'season_number': ('season_number', {int_or_none}), |
354 | 'episode_number': ('episode_number', {int_or_none}), | |
355 | 'episode': (( | |
356 | ('episode', {str_or_none}), | |
357 | ('ep_info', {lambda x: title.replace(x, '')}), | |
358 | ('title', {str}), | |
359 | ), {str.strip}), | |
360 | }), get_all=False) | |
361 | ||
362 | def _real_extract(self, url): | |
b6951271 | 363 | display_id = self._match_id(url) |
c099ec93 SL |
364 | query = {'embedded': 'false', 'mcV6': 'true'} |
365 | headers = {} | |
366 | ||
367 | if self._get_cookies(self._TOKEN_URL).get('ams'): | |
368 | token = self._download_json( | |
369 | self._TOKEN_URL, display_id, 'Fetching token for age verification', | |
370 | 'Unable to fetch age verification token', fatal=False) | |
371 | id_token = traverse_obj(token, ('idToken', {str})) | |
372 | decoded_token = traverse_obj(id_token, ({jwt_decode_hs256}, {dict})) | |
373 | user_id = traverse_obj(decoded_token, (('user_id', 'sub'), {str}), get_all=False) | |
374 | if not user_id: | |
375 | self.report_warning('Unable to extract token, continuing without authentication') | |
376 | else: | |
377 | headers['x-authorization'] = f'Bearer {id_token}' | |
378 | query['userId'] = user_id | |
379 | if decoded_token.get('age_rating') != 18: | |
380 | self.report_warning('Account is not verified as 18+; video may be unavailable') | |
5f009a09 | 381 | |
382 | page_data = self._download_json( | |
c099ec93 SL |
383 | f'https://api.ardmediathek.de/page-gateway/pages/ard/item/{display_id}', |
384 | display_id, query=query, headers=headers) | |
5f009a09 | 385 | |
b6951271 SS |
386 | # For user convenience we use the old contentId instead of the longer crid |
387 | # Ref: https://github.com/yt-dlp/yt-dlp/issues/8731#issuecomment-1874398283 | |
388 | old_id = traverse_obj(page_data, ('tracking', 'atiCustomVars', 'contentId', {int})) | |
389 | if old_id is not None: | |
390 | video_id = str(old_id) | |
391 | archive_ids = [make_archive_id(ARDBetaMediathekIE, display_id)] | |
392 | else: | |
393 | self.report_warning(f'Could not extract contentId{bug_reports_message()}') | |
394 | video_id = display_id | |
395 | archive_ids = None | |
396 | ||
5f009a09 | 397 | player_data = traverse_obj( |
398 | page_data, ('widgets', lambda _, v: v['type'] in ('player_ondemand', 'player_live'), {dict}), get_all=False) | |
399 | is_live = player_data.get('type') == 'player_live' | |
400 | media_data = traverse_obj(player_data, ('mediaCollection', 'embedded', {dict})) | |
401 | ||
402 | if player_data.get('blockedByFsk'): | |
c099ec93 | 403 | self.raise_login_required('This video is only available for age verified users or after 22:00') |
5f009a09 | 404 | |
405 | formats = [] | |
406 | subtitles = {} | |
407 | for stream in traverse_obj(media_data, ('streams', ..., {dict})): | |
408 | kind = stream.get('kind') | |
409 | # Prioritize main stream over sign language and others | |
410 | preference = 1 if kind == 'main' else None | |
411 | for media in traverse_obj(stream, ('media', lambda _, v: url_or_none(v['url']))): | |
412 | media_url = media['url'] | |
413 | ||
414 | audio_kind = traverse_obj(media, ( | |
415 | 'audios', 0, 'kind', {str}), default='').replace('standard', '') | |
416 | lang_code = traverse_obj(media, ('audios', 0, 'languageCode', {str})) or 'deu' | |
417 | lang = join_nonempty(lang_code, audio_kind) | |
418 | language_preference = 10 if lang == 'deu' else -10 | |
419 | ||
420 | if determine_ext(media_url) == 'm3u8': | |
421 | fmts, subs = self._extract_m3u8_formats_and_subtitles( | |
422 | media_url, video_id, m3u8_id=f'hls-{kind}', preference=preference, fatal=False, live=is_live) | |
423 | for f in fmts: | |
424 | f['language'] = lang | |
425 | f['language_preference'] = language_preference | |
426 | formats.extend(fmts) | |
427 | self._merge_subtitles(subs, target=subtitles) | |
428 | else: | |
429 | formats.append({ | |
430 | 'url': media_url, | |
431 | 'format_id': f'http-{kind}', | |
432 | 'preference': preference, | |
433 | 'language': lang, | |
434 | 'language_preference': language_preference, | |
435 | **traverse_obj(media, { | |
436 | 'format_note': ('forcedLabel', {str}), | |
437 | 'width': ('maxHResolutionPx', {int_or_none}), | |
438 | 'height': ('maxVResolutionPx', {int_or_none}), | |
439 | 'vcodec': ('videoCodec', {str}), | |
440 | }), | |
441 | }) | |
442 | ||
443 | for sub in traverse_obj(media_data, ('subtitles', ..., {dict})): | |
444 | for sources in traverse_obj(sub, ('sources', lambda _, v: url_or_none(v['url']))): | |
445 | subtitles.setdefault(sub.get('languageCode') or 'deu', []).append({ | |
446 | 'url': sources['url'], | |
447 | 'ext': {'webvtt': 'vtt', 'ebutt': 'ttml'}.get(sources.get('kind')), | |
448 | }) | |
449 | ||
450 | age_limit = traverse_obj(page_data, ('fskRating', {lambda x: remove_start(x, 'FSK')}, {int_or_none})) | |
5f009a09 | 451 | return { |
452 | 'id': video_id, | |
453 | 'display_id': display_id, | |
454 | 'formats': formats, | |
455 | 'subtitles': subtitles, | |
456 | 'is_live': is_live, | |
457 | 'age_limit': age_limit, | |
458 | **traverse_obj(media_data, ('meta', { | |
459 | 'title': 'title', | |
460 | 'description': 'synopsis', | |
461 | 'timestamp': ('broadcastedOnDateTime', {parse_iso8601}), | |
462 | 'series': 'seriesTitle', | |
463 | 'thumbnail': ('images', 0, 'url', {url_or_none}), | |
464 | 'duration': ('durationSeconds', {int_or_none}), | |
465 | 'channel': 'clipSourceName', | |
466 | })), | |
467 | **self._extract_episode_info(page_data.get('title')), | |
b6951271 | 468 | '_old_archive_ids': archive_ids, |
5f009a09 | 469 | } |
470 | ||
471 | ||
472 | class ARDMediathekCollectionIE(InfoExtractor): | |
473 | _VALID_URL = r'''(?x)https:// | |
474 | (?:(?:beta|www)\.)?ardmediathek\.de/ | |
475 | (?:[^/?#]+/)? | |
476 | (?P<playlist>sendung|serie|sammlung)/ | |
477 | (?:(?P<display_id>[^?#]+?)/)? | |
478 | (?P<id>[a-zA-Z0-9]+) | |
479 | (?:/(?P<season>\d+)(?:/(?P<version>OV|AD))?)?/?(?:[?#]|$)''' | |
480 | _GEO_COUNTRIES = ['DE'] | |
481 | ||
482 | _TESTS = [{ | |
483 | 'url': 'https://www.ardmediathek.de/serie/quiz/staffel-1-originalversion/Y3JpZDovL3dkci5kZS9vbmUvcXVpeg/1/OV', | |
484 | 'info_dict': { | |
485 | 'id': 'Y3JpZDovL3dkci5kZS9vbmUvcXVpeg_1_OV', | |
486 | 'display_id': 'quiz/staffel-1-originalversion', | |
487 | 'title': 'Staffel 1 Originalversion', | |
488 | }, | |
489 | 'playlist_count': 3, | |
490 | }, { | |
491 | 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-4-mit-audiodeskription/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/4/AD', | |
492 | 'info_dict': { | |
493 | 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_4_AD', | |
494 | 'display_id': 'babylon-berlin/staffel-4-mit-audiodeskription', | |
495 | 'title': 'Staffel 4 mit Audiodeskription', | |
496 | }, | |
497 | 'playlist_count': 12, | |
498 | }, { | |
499 | 'url': 'https://www.ardmediathek.de/serie/babylon-berlin/staffel-1/Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu/1/', | |
500 | 'info_dict': { | |
501 | 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL2JhYnlsb24tYmVybGlu_1', | |
502 | 'display_id': 'babylon-berlin/staffel-1', | |
503 | 'title': 'Staffel 1', | |
504 | }, | |
505 | 'playlist_count': 8, | |
506 | }, { | |
507 | 'url': 'https://www.ardmediathek.de/sendung/tatort/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', | |
508 | 'info_dict': { | |
509 | 'id': 'Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydA', | |
510 | 'display_id': 'tatort', | |
511 | 'title': 'Tatort', | |
512 | }, | |
513 | 'playlist_mincount': 500, | |
514 | }, { | |
515 | 'url': 'https://www.ardmediathek.de/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2', | |
516 | 'info_dict': { | |
517 | 'id': '5eOHzt8XB2sqeFXbIoJlg2', | |
518 | 'display_id': 'die-kirche-bleibt-im-dorf', | |
519 | 'title': 'Die Kirche bleibt im Dorf', | |
520 | 'description': 'Die Kirche bleibt im Dorf', | |
521 | }, | |
522 | 'playlist_count': 4, | |
e6e5d98c | 523 | }, { |
524 | # playlist of type 'sendung' | |
525 | 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', | |
526 | 'only_matching': True, | |
1f8bd8eb LS |
527 | }, { |
528 | # playlist of type 'serie' | |
529 | 'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1', | |
530 | 'only_matching': True, | |
e6e5d98c | 531 | }, { |
532 | # playlist of type 'sammlung' | |
533 | 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', | |
534 | 'only_matching': True, | |
c1a37eb2 PH |
535 | }] |
536 | ||
5f009a09 | 537 | _PAGE_SIZE = 100 |
e6e5d98c | 538 | |
c1a37eb2 | 539 | def _real_extract(self, url): |
5f009a09 | 540 | playlist_id, display_id, playlist_type, season_number, version = self._match_valid_url(url).group( |
541 | 'id', 'display_id', 'playlist', 'season', 'version') | |
542 | ||
543 | def call_api(page_num): | |
544 | api_path = 'compilations/ard' if playlist_type == 'sammlung' else 'widgets/ard/asset' | |
545 | return self._download_json( | |
546 | f'https://api.ardmediathek.de/page-gateway/{api_path}/{playlist_id}', playlist_id, | |
547 | f'Downloading playlist page {page_num}', query={ | |
548 | 'pageNumber': page_num, | |
549 | 'pageSize': self._PAGE_SIZE, | |
550 | **({ | |
551 | 'seasoned': 'true', | |
552 | 'seasonNumber': season_number, | |
553 | 'withOriginalversion': 'true' if version == 'OV' else 'false', | |
554 | 'withAudiodescription': 'true' if version == 'AD' else 'false', | |
555 | } if season_number else {}), | |
556 | }) | |
557 | ||
558 | def fetch_page(page_num): | |
559 | for item in traverse_obj(call_api(page_num), ('teasers', ..., {dict})): | |
560 | item_id = traverse_obj(item, ('links', 'target', ('urlId', 'id')), 'id', get_all=False) | |
561 | if not item_id or item_id == playlist_id: | |
562 | continue | |
563 | item_mode = 'sammlung' if item.get('type') == 'compilation' else 'video' | |
564 | yield self.url_result( | |
565 | f'https://www.ardmediathek.de/{item_mode}/{item_id}', | |
566 | ie=(ARDMediathekCollectionIE if item_mode == 'sammlung' else ARDBetaMediathekIE), | |
567 | **traverse_obj(item, { | |
568 | 'id': ('id', {str}), | |
569 | 'title': ('longTitle', {str}), | |
570 | 'duration': ('duration', {int_or_none}), | |
571 | 'timestamp': ('broadcastedOn', {parse_iso8601}), | |
572 | })) | |
573 | ||
574 | page_data = call_api(0) | |
575 | full_id = join_nonempty(playlist_id, season_number, version, delim='_') | |
576 | ||
577 | return self.playlist_result( | |
578 | OnDemandPagedList(fetch_page, self._PAGE_SIZE), full_id, display_id=display_id, | |
579 | title=page_data.get('title'), description=page_data.get('synopsis')) |