]>
Commit | Line | Data |
---|---|---|
c968f738 | 1 | import json |
d5822b96 PH |
2 | import re |
3 | ||
4 | from .common import InfoExtractor | |
3741302a | 5 | from .generic import GenericIE |
d5822b96 | 6 | from ..utils import ( |
f9b85496 | 7 | determine_ext, |
d5822b96 | 8 | ExtractorError, |
6d3d3fc0 PH |
9 | int_or_none, |
10 | parse_duration, | |
75258218 S |
11 | qualities, |
12 | str_or_none, | |
13 | try_get, | |
6d3d3fc0 | 14 | unified_strdate, |
75258218 | 15 | unified_timestamp, |
f78eb41e | 16 | update_url, |
31eeab9f | 17 | update_url_query, |
3052a30d | 18 | url_or_none, |
75258218 | 19 | xpath_text, |
d5822b96 | 20 | ) |
f7854627 | 21 | from ..compat import compat_etree_fromstring |
d5822b96 | 22 | |
f9b85496 | 23 | |
c968f738 RA |
24 | class ARDMediathekBaseIE(InfoExtractor): |
25 | _GEO_COUNTRIES = ['DE'] | |
1c821227 | 26 | |
e37c92ec S |
27 | def _extract_media_info(self, media_info_url, webpage, video_id): |
28 | media_info = self._download_json( | |
29 | media_info_url, video_id, 'Downloading media JSON') | |
c968f738 | 30 | return self._parse_media_info(media_info, video_id, '"fsk"' in webpage) |
e37c92ec | 31 | |
c968f738 | 32 | def _parse_media_info(self, media_info, video_id, fsk): |
e37c92ec S |
33 | formats = self._extract_formats(media_info, video_id) |
34 | ||
35 | if not formats: | |
c968f738 | 36 | if fsk: |
b7da73eb | 37 | self.raise_no_formats( |
e37c92ec S |
38 | 'This video is only available after 20:00', expected=True) |
39 | elif media_info.get('_geoblocked'): | |
c968f738 RA |
40 | self.raise_geo_restricted( |
41 | 'This video is not available due to geoblocking', | |
b7da73eb | 42 | countries=self._GEO_COUNTRIES, metadata_available=True) |
e37c92ec | 43 | |
e37c92ec S |
44 | subtitles = {} |
45 | subtitle_url = media_info.get('_subtitleUrl') | |
46 | if subtitle_url: | |
47 | subtitles['de'] = [{ | |
ffa2cecf | 48 | 'ext': 'ttml', |
e37c92ec | 49 | 'url': subtitle_url, |
d61ef7f3 GM |
50 | }, { |
51 | 'ext': 'vtt', | |
52 | 'url': subtitle_url.replace('/ebutt/', '/webvtt/') + '.vtt', | |
e37c92ec S |
53 | }] |
54 | ||
55 | return { | |
56 | 'id': video_id, | |
c968f738 RA |
57 | 'duration': int_or_none(media_info.get('_duration')), |
58 | 'thumbnail': media_info.get('_previewImage'), | |
59 | 'is_live': media_info.get('_isLive') is True, | |
e37c92ec S |
60 | 'formats': formats, |
61 | 'subtitles': subtitles, | |
62 | } | |
63 | ||
e6e5d98c | 64 | def _ARD_extract_episode_info(self, title): |
65 | """Try to extract season/episode data from the title.""" | |
66 | res = {} | |
67 | if not title: | |
68 | return res | |
69 | ||
70 | for pattern in [ | |
71 | # Pattern for title like "Homo sapiens (S06/E07) - Originalversion" | |
72 | # from: https://www.ardmediathek.de/one/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw | |
73 | r'.*(?P<ep_info> \(S(?P<season_number>\d+)/E(?P<episode_number>\d+)\)).*', | |
74 | # E.g.: title="Fritjof aus Norwegen (2) (AD)" | |
75 | # from: https://www.ardmediathek.de/ard/sammlung/der-krieg-und-ich/68cMkqJdllm639Skj4c7sS/ | |
76 | r'.*(?P<ep_info> \((?:Folge |Teil )?(?P<episode_number>\d+)(?:/\d+)?\)).*', | |
77 | r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:\:| -|) )\"(?P<episode>.+)\".*', | |
78 | # E.g.: title="Folge 25/42: Symmetrie" | |
79 | # from: https://www.ardmediathek.de/ard/video/grips-mathe/folge-25-42-symmetrie/ard-alpha/Y3JpZDovL2JyLmRlL3ZpZGVvLzMyYzI0ZjczLWQ1N2MtNDAxNC05ZmZhLTFjYzRkZDA5NDU5OQ/ | |
80 | # E.g.: title="Folge 1063 - Vertrauen" | |
81 | # from: https://www.ardmediathek.de/ard/sendung/die-fallers/Y3JpZDovL3N3ci5kZS8yMzAyMDQ4/ | |
82 | r'.*(?P<ep_info>Folge (?P<episode_number>\d+)(?:/\d+)?(?:\:| -|) ).*', | |
83 | ]: | |
84 | m = re.match(pattern, title) | |
85 | if m: | |
86 | groupdict = m.groupdict() | |
87 | res['season_number'] = int_or_none(groupdict.get('season_number')) | |
88 | res['episode_number'] = int_or_none(groupdict.get('episode_number')) | |
89 | res['episode'] = str_or_none(groupdict.get('episode')) | |
90 | # Build the episode title by removing numeric episode information: | |
91 | if groupdict.get('ep_info') and not res['episode']: | |
92 | res['episode'] = str_or_none( | |
93 | title.replace(groupdict.get('ep_info'), '')) | |
94 | if res['episode']: | |
95 | res['episode'] = res['episode'].strip() | |
96 | break | |
97 | ||
98 | # As a fallback use the whole title as the episode name: | |
99 | if not res.get('episode'): | |
100 | res['episode'] = title.strip() | |
101 | return res | |
102 | ||
e37c92ec S |
103 | def _extract_formats(self, media_info, video_id): |
104 | type_ = media_info.get('_type') | |
105 | media_array = media_info.get('_mediaArray', []) | |
106 | formats = [] | |
107 | for num, media in enumerate(media_array): | |
108 | for stream in media.get('_mediaStreamArray', []): | |
109 | stream_urls = stream.get('_stream') | |
110 | if not stream_urls: | |
111 | continue | |
112 | if not isinstance(stream_urls, list): | |
113 | stream_urls = [stream_urls] | |
114 | quality = stream.get('_quality') | |
115 | server = stream.get('_server') | |
116 | for stream_url in stream_urls: | |
3052a30d | 117 | if not url_or_none(stream_url): |
91328f26 | 118 | continue |
e37c92ec | 119 | ext = determine_ext(stream_url) |
1fc0b47f | 120 | if quality != 'auto' and ext in ('f4m', 'm3u8'): |
121 | continue | |
e37c92ec | 122 | if ext == 'f4m': |
7e5edcfd | 123 | formats.extend(self._extract_f4m_formats( |
31eeab9f RA |
124 | update_url_query(stream_url, { |
125 | 'hdcore': '3.1.1', | |
126 | 'plugin': 'aasp-3.1.1.69.124' | |
c968f738 | 127 | }), video_id, f4m_id='hds', fatal=False)) |
e37c92ec | 128 | elif ext == 'm3u8': |
7e5edcfd | 129 | formats.extend(self._extract_m3u8_formats( |
c968f738 RA |
130 | stream_url, video_id, 'mp4', 'm3u8_native', |
131 | m3u8_id='hls', fatal=False)) | |
e37c92ec S |
132 | else: |
133 | if server and server.startswith('rtmp'): | |
134 | f = { | |
135 | 'url': server, | |
136 | 'play_path': stream_url, | |
137 | 'format_id': 'a%s-rtmp-%s' % (num, quality), | |
138 | } | |
91328f26 | 139 | else: |
e37c92ec S |
140 | f = { |
141 | 'url': stream_url, | |
142 | 'format_id': 'a%s-%s-%s' % (num, ext, quality) | |
143 | } | |
c968f738 RA |
144 | m = re.search( |
145 | r'_(?P<width>\d+)x(?P<height>\d+)\.mp4$', | |
146 | stream_url) | |
e37c92ec S |
147 | if m: |
148 | f.update({ | |
149 | 'width': int(m.group('width')), | |
150 | 'height': int(m.group('height')), | |
151 | }) | |
152 | if type_ == 'audio': | |
153 | f['vcodec'] = 'none' | |
154 | formats.append(f) | |
155 | return formats | |
156 | ||
c968f738 RA |
157 | |
158 | class ARDMediathekIE(ARDMediathekBaseIE): | |
159 | IE_NAME = 'ARD:mediathek' | |
160 | _VALID_URL = r'^https?://(?:(?:(?:www|classic)\.)?ardmediathek\.de|mediathek\.(?:daserste|rbb-online)\.de|one\.ard\.de)/(?:.*/)(?P<video_id>[0-9]+|[^0-9][^/\?]+)[^/\?]*(?:\?.*)?' | |
161 | ||
162 | _TESTS = [{ | |
163 | # available till 26.07.2022 | |
164 | 'url': 'http://www.ardmediathek.de/tv/S%C3%9CDLICHT/Was-ist-die-Kunst-der-Zukunft-liebe-Ann/BR-Fernsehen/Video?bcastId=34633636&documentId=44726822', | |
165 | 'info_dict': { | |
166 | 'id': '44726822', | |
167 | 'ext': 'mp4', | |
168 | 'title': 'Was ist die Kunst der Zukunft, liebe Anna McCarthy?', | |
169 | 'description': 'md5:4ada28b3e3b5df01647310e41f3a62f5', | |
170 | 'duration': 1740, | |
171 | }, | |
172 | 'params': { | |
173 | # m3u8 download | |
174 | 'skip_download': True, | |
175 | } | |
176 | }, { | |
177 | 'url': 'https://one.ard.de/tv/Mord-mit-Aussicht/Mord-mit-Aussicht-6-39-T%C3%B6dliche-Nach/ONE/Video?bcastId=46384294&documentId=55586872', | |
178 | 'only_matching': True, | |
179 | }, { | |
180 | # audio | |
181 | 'url': 'http://www.ardmediathek.de/tv/WDR-H%C3%B6rspiel-Speicher/Tod-eines-Fu%C3%9Fballers/WDR-3/Audio-Podcast?documentId=28488308&bcastId=23074086', | |
182 | 'only_matching': True, | |
183 | }, { | |
184 | 'url': 'http://mediathek.daserste.de/sendungen_a-z/328454_anne-will/22429276_vertrauen-ist-gut-spionieren-ist-besser-geht', | |
185 | 'only_matching': True, | |
186 | }, { | |
187 | # audio | |
188 | 'url': 'http://mediathek.rbb-online.de/radio/Hörspiel/Vor-dem-Fest/kulturradio/Audio?documentId=30796318&topRessort=radio&bcastId=9839158', | |
189 | 'only_matching': True, | |
190 | }, { | |
191 | 'url': 'https://classic.ardmediathek.de/tv/Panda-Gorilla-Co/Panda-Gorilla-Co-Folge-274/Das-Erste/Video?bcastId=16355486&documentId=58234698', | |
192 | 'only_matching': True, | |
193 | }] | |
194 | ||
195 | @classmethod | |
196 | def suitable(cls, url): | |
197 | return False if ARDBetaMediathekIE.suitable(url) else super(ARDMediathekIE, cls).suitable(url) | |
198 | ||
d5822b96 PH |
199 | def _real_extract(self, url): |
200 | # determine video id from url | |
5ad28e7f | 201 | m = self._match_valid_url(url) |
d5822b96 | 202 | |
ca127ab2 S |
203 | document_id = None |
204 | ||
d5822b96 PH |
205 | numid = re.search(r'documentId=([0-9]+)', url) |
206 | if numid: | |
ca127ab2 | 207 | document_id = video_id = numid.group(1) |
d5822b96 PH |
208 | else: |
209 | video_id = m.group('video_id') | |
210 | ||
5622f29a | 211 | webpage = self._download_webpage(url, video_id) |
f9b85496 | 212 | |
3791d84a S |
213 | ERRORS = ( |
214 | ('>Leider liegt eine Störung vor.', 'Video %s is unavailable'), | |
215 | ('>Der gewünschte Beitrag ist nicht mehr verfügbar.<', | |
216 | 'Video %s is no longer available'), | |
3791d84a S |
217 | ) |
218 | ||
219 | for pattern, message in ERRORS: | |
220 | if pattern in webpage: | |
221 | raise ExtractorError(message % video_id, expected=True) | |
39aa42ff | 222 | |
bfd91588 | 223 | if re.search(r'[\?&]rss($|[=&])', url): |
f7854627 | 224 | doc = compat_etree_fromstring(webpage.encode('utf-8')) |
3741302a OE |
225 | if doc.tag == 'rss': |
226 | return GenericIE()._extract_rss(url, video_id, doc) | |
227 | ||
a820dc72 | 228 | title = self._og_search_title(webpage, default=None) or self._html_search_regex( |
0f97c9a0 | 229 | [r'<h1(?:\s+class="boxTopHeadline")?>(.*?)</h1>', |
197224b7 | 230 | r'<meta name="dcterms\.title" content="(.*?)"/>', |
8c587971 AS |
231 | r'<h4 class="headline">(.*?)</h4>', |
232 | r'<title[^>]*>(.*?)</title>'], | |
0f97c9a0 | 233 | webpage, 'title') |
a820dc72 | 234 | description = self._og_search_description(webpage, default=None) or self._html_search_meta( |
29546b34 PH |
235 | 'dcterms.abstract', webpage, 'description', default=None) |
236 | if description is None: | |
237 | description = self._html_search_meta( | |
8c587971 AS |
238 | 'description', webpage, 'meta description', default=None) |
239 | if description is None: | |
240 | description = self._html_search_regex( | |
241 | r'<p\s+class="teasertext">(.+?)</p>', | |
242 | webpage, 'teaser text', default=None) | |
29546b34 PH |
243 | |
244 | # Thumbnail is sometimes not present. | |
245 | # It is in the mobile version, but that seems to use a different URL | |
246 | # structure altogether. | |
247 | thumbnail = self._og_search_thumbnail(webpage, default=None) | |
248 | ||
249 | media_streams = re.findall(r'''(?x) | |
250 | mediaCollection\.addMediaStream\([0-9]+,\s*[0-9]+,\s*"[^"]*",\s* | |
251 | "([^"]+)"''', webpage) | |
252 | ||
253 | if media_streams: | |
254 | QUALITIES = qualities(['lo', 'hi', 'hq']) | |
255 | formats = [] | |
256 | for furl in set(media_streams): | |
257 | if furl.endswith('.f4m'): | |
258 | fid = 'f4m' | |
259 | else: | |
260 | fid_m = re.match(r'.*\.([^.]+)\.[^.]+$', furl) | |
261 | fid = fid_m.group(1) if fid_m else None | |
262 | formats.append({ | |
263 | 'quality': QUALITIES(fid), | |
264 | 'format_id': fid, | |
265 | 'url': furl, | |
266 | }) | |
e37c92ec S |
267 | info = { |
268 | 'formats': formats, | |
269 | } | |
29546b34 | 270 | else: # request JSON file |
ca127ab2 S |
271 | if not document_id: |
272 | video_id = self._search_regex( | |
3f6a90eb | 273 | (r'/play/(?:config|media|sola)/(\d+)', r'contentId["\']\s*:\s*(\d+)'), |
274 | webpage, 'media id', default=None) | |
e37c92ec | 275 | info = self._extract_media_info( |
ca127ab2 S |
276 | 'http://www.ardmediathek.de/play/media/%s' % video_id, |
277 | webpage, video_id) | |
f9b85496 | 278 | |
e37c92ec | 279 | info.update({ |
f9b85496 | 280 | 'id': video_id, |
39ca3b5c | 281 | 'title': title, |
f9b85496 | 282 | 'description': description, |
f9b85496 | 283 | 'thumbnail': thumbnail, |
e37c92ec | 284 | }) |
e6e5d98c | 285 | info.update(self._ARD_extract_episode_info(info['title'])) |
e37c92ec S |
286 | |
287 | return info | |
6d3d3fc0 PH |
288 | |
289 | ||
290 | class ARDIE(InfoExtractor): | |
14eb1ee1 | 291 | _VALID_URL = r'(?P<mainurl>https?://(?:www\.)?daserste\.de/(?:[^/?#&]+/)+(?P<id>[^/?#&]+))\.html' |
ad29ef04 | 292 | _TESTS = [{ |
d61ef7f3 GM |
293 | # available till 7.12.2023 |
294 | 'url': 'https://www.daserste.de/information/talk/maischberger/videos/maischberger-video-424.html', | |
1f8bd8eb | 295 | 'md5': '94812e6438488fb923c361a44469614b', |
ad29ef04 | 296 | 'info_dict': { |
d61ef7f3 GM |
297 | 'id': 'maischberger-video-424', |
298 | 'display_id': 'maischberger-video-424', | |
ad29ef04 | 299 | 'ext': 'mp4', |
d61ef7f3 GM |
300 | 'duration': 4452.0, |
301 | 'title': 'maischberger am 07.12.2022', | |
302 | 'upload_date': '20221207', | |
ad29ef04 W |
303 | 'thumbnail': r're:^https?://.*\.jpg$', |
304 | }, | |
a54c5f83 | 305 | }, { |
14eb1ee1 | 306 | 'url': 'https://www.daserste.de/information/politik-weltgeschehen/morgenmagazin/videosextern/dominik-kahun-aus-der-nhl-direkt-zur-weltmeisterschaft-100.html', |
307 | 'only_matching': True, | |
308 | }, { | |
309 | 'url': 'https://www.daserste.de/information/nachrichten-wetter/tagesthemen/videosextern/tagesthemen-17736.html', | |
a54c5f83 | 310 | 'only_matching': True, |
f17c7022 OF |
311 | }, { |
312 | 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/videos/diversity-tag-sanam-afrashteh100.html', | |
313 | 'only_matching': True, | |
d6a03502 | 314 | }, { |
6d3d3fc0 | 315 | 'url': 'http://www.daserste.de/information/reportage-dokumentation/dokus/videos/die-story-im-ersten-mission-unter-falscher-flagge-100.html', |
d6a03502 | 316 | 'only_matching': True, |
14eb1ee1 | 317 | }, { |
318 | 'url': 'https://www.daserste.de/unterhaltung/serie/in-aller-freundschaft-die-jungen-aerzte/Drehpause-100.html', | |
319 | 'only_matching': True, | |
320 | }, { | |
321 | 'url': 'https://www.daserste.de/unterhaltung/film/filmmittwoch-im-ersten/videos/making-ofwendezeit-video-100.html', | |
322 | 'only_matching': True, | |
ad29ef04 | 323 | }] |
6d3d3fc0 PH |
324 | |
325 | def _real_extract(self, url): | |
5ad28e7f | 326 | mobj = self._match_valid_url(url) |
14eb1ee1 | 327 | display_id = mobj.group('id') |
6d3d3fc0 PH |
328 | |
329 | player_url = mobj.group('mainurl') + '~playerXml.xml' | |
330 | doc = self._download_xml(player_url, display_id) | |
331 | video_node = doc.find('./video') | |
bf0ff932 PH |
332 | upload_date = unified_strdate(xpath_text( |
333 | video_node, './broadcastDate')) | |
334 | thumbnail = xpath_text(video_node, './/teaserImage//variant/url') | |
6d3d3fc0 PH |
335 | |
336 | formats = [] | |
337 | for a in video_node.findall('.//asset'): | |
bc2ca1bb | 338 | file_name = xpath_text(a, './fileName', default=None) |
339 | if not file_name: | |
340 | continue | |
341 | format_type = a.attrib.get('type') | |
342 | format_url = url_or_none(file_name) | |
343 | if format_url: | |
344 | ext = determine_ext(file_name) | |
345 | if ext == 'm3u8': | |
346 | formats.extend(self._extract_m3u8_formats( | |
347 | format_url, display_id, 'mp4', entry_protocol='m3u8_native', | |
348 | m3u8_id=format_type or 'hls', fatal=False)) | |
349 | continue | |
350 | elif ext == 'f4m': | |
351 | formats.extend(self._extract_f4m_formats( | |
352 | update_url_query(format_url, {'hdcore': '3.7.0'}), | |
353 | display_id, f4m_id=format_type or 'hds', fatal=False)) | |
354 | continue | |
6d3d3fc0 | 355 | f = { |
bc2ca1bb | 356 | 'format_id': format_type, |
357 | 'width': int_or_none(xpath_text(a, './frameWidth')), | |
358 | 'height': int_or_none(xpath_text(a, './frameHeight')), | |
359 | 'vbr': int_or_none(xpath_text(a, './bitrateVideo')), | |
360 | 'abr': int_or_none(xpath_text(a, './bitrateAudio')), | |
361 | 'vcodec': xpath_text(a, './codecVideo'), | |
362 | 'tbr': int_or_none(xpath_text(a, './totalBitrate')), | |
6d3d3fc0 | 363 | } |
bc2ca1bb | 364 | server_prefix = xpath_text(a, './serverPrefix', default=None) |
365 | if server_prefix: | |
366 | f.update({ | |
367 | 'url': server_prefix, | |
368 | 'playpath': file_name, | |
369 | }) | |
6d3d3fc0 | 370 | else: |
bc2ca1bb | 371 | if not format_url: |
372 | continue | |
373 | f['url'] = format_url | |
6d3d3fc0 | 374 | formats.append(f) |
6d3d3fc0 | 375 | |
ad9158d5 F |
376 | _SUB_FORMATS = ( |
377 | ('./dataTimedText', 'ttml'), | |
378 | ('./dataTimedTextNoOffset', 'ttml'), | |
379 | ('./dataTimedTextVtt', 'vtt'), | |
380 | ) | |
381 | ||
382 | subtitles = {} | |
383 | for subsel, subext in _SUB_FORMATS: | |
384 | for node in video_node.findall(subsel): | |
385 | subtitles.setdefault('de', []).append({ | |
386 | 'url': node.attrib['url'], | |
387 | 'ext': subext, | |
388 | }) | |
389 | ||
6d3d3fc0 | 390 | return { |
14eb1ee1 | 391 | 'id': xpath_text(video_node, './videoId', default=display_id), |
6d3d3fc0 | 392 | 'formats': formats, |
ad9158d5 | 393 | 'subtitles': subtitles, |
6d3d3fc0 PH |
394 | 'display_id': display_id, |
395 | 'title': video_node.find('./title').text, | |
396 | 'duration': parse_duration(video_node.find('./duration').text), | |
397 | 'upload_date': upload_date, | |
398 | 'thumbnail': thumbnail, | |
399 | } | |
c1a37eb2 PH |
400 | |
401 | ||
c968f738 | 402 | class ARDBetaMediathekIE(ARDMediathekBaseIE): |
14a08605 | 403 | _VALID_URL = r'''(?x)https:// |
404 | (?:(?:beta|www)\.)?ardmediathek\.de/ | |
405 | (?:(?P<client>[^/]+)/)? | |
1f8bd8eb | 406 | (?:player|live|video|(?P<playlist>sendung|serie|sammlung))/ |
409cdd1e | 407 | (?:(?P<display_id>(?(playlist)[^?#]+?|[^?#]+))/)? |
408 | (?P<id>(?(playlist)|Y3JpZDovL)[a-zA-Z0-9]+) | |
409 | (?(playlist)/(?P<season>\d+)?/?(?:[?#]|$))''' | |
14a08605 | 410 | |
c1a37eb2 | 411 | _TESTS = [{ |
1f8bd8eb LS |
412 | 'url': 'https://www.ardmediathek.de/video/filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen/Y3JpZDovL21kci5kZS9zZW5kdW5nLzI4MjA0MC80MjIwOTEtNDAyNTM0', |
413 | 'md5': 'b6e8ab03f2bcc6e1f9e6cef25fcc03c4', | |
f78eb41e | 414 | 'info_dict': { |
1f8bd8eb LS |
415 | 'display_id': 'filme-im-mdr/liebe-auf-vier-pfoten/mdr-fernsehen', |
416 | 'id': '12939099', | |
417 | 'title': 'Liebe auf vier Pfoten', | |
418 | 'description': r're:^Claudia Schmitt, Anwältin in Salzburg', | |
419 | 'duration': 5222, | |
420 | 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:aee7cbf8f06de976?w=960&ch=ae4d0f2ee47d8b9b', | |
421 | 'timestamp': 1701343800, | |
422 | 'upload_date': '20231130', | |
f78eb41e | 423 | 'ext': 'mp4', |
1f8bd8eb | 424 | 'episode': 'Liebe auf vier Pfoten', |
f78eb41e SL |
425 | 'series': 'Filme im MDR' |
426 | }, | |
427 | }, { | |
a820dc72 RA |
428 | 'url': 'https://www.ardmediathek.de/mdr/video/die-robuste-roswita/Y3JpZDovL21kci5kZS9iZWl0cmFnL2Ntcy84MWMxN2MzZC0wMjkxLTRmMzUtODk4ZS0wYzhlOWQxODE2NGI/', |
429 | 'md5': 'a1dc75a39c61601b980648f7c9f9f71d', | |
c1a37eb2 PH |
430 | 'info_dict': { |
431 | 'display_id': 'die-robuste-roswita', | |
a820dc72 | 432 | 'id': '78566716', |
c968f738 | 433 | 'title': 'Die robuste Roswita', |
a820dc72 | 434 | 'description': r're:^Der Mord.*totgeglaubte Ehefrau Roswita', |
c1a37eb2 | 435 | 'duration': 5316, |
a820dc72 RA |
436 | 'thumbnail': 'https://img.ardmediathek.de/standard/00/78/56/67/84/575672121/16x9/960?mandant=ard', |
437 | 'timestamp': 1596658200, | |
438 | 'upload_date': '20200805', | |
c1a37eb2 PH |
439 | 'ext': 'mp4', |
440 | }, | |
14a08605 | 441 | 'skip': 'Error', |
442 | }, { | |
443 | 'url': 'https://www.ardmediathek.de/video/tagesschau-oder-tagesschau-20-00-uhr/das-erste/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhZ2Vzc2NoYXUvZmM4ZDUxMjgtOTE0ZC00Y2MzLTgzNzAtNDZkNGNiZWJkOTll', | |
f78eb41e | 444 | 'md5': '1e73ded21cb79bac065117e80c81dc88', |
14a08605 | 445 | 'info_dict': { |
446 | 'id': '10049223', | |
447 | 'ext': 'mp4', | |
448 | 'title': 'tagesschau, 20:00 Uhr', | |
449 | 'timestamp': 1636398000, | |
450 | 'description': 'md5:39578c7b96c9fe50afdf5674ad985e6b', | |
451 | 'upload_date': '20211108', | |
f78eb41e SL |
452 | 'display_id': 'tagesschau-oder-tagesschau-20-00-uhr/das-erste', |
453 | 'duration': 915, | |
454 | 'episode': 'tagesschau, 20:00 Uhr', | |
455 | 'series': 'tagesschau', | |
1f8bd8eb | 456 | 'thumbnail': 'https://api.ardmediathek.de/image-service/images/urn:ard:image:fbb21142783b0a49?w=960&ch=ee69108ae344f678', |
409cdd1e | 457 | }, |
fe515e5c S |
458 | }, { |
459 | 'url': 'https://beta.ardmediathek.de/ard/video/Y3JpZDovL2Rhc2Vyc3RlLmRlL3RhdG9ydC9mYmM4NGM1NC0xNzU4LTRmZGYtYWFhZS0wYzcyZTIxNGEyMDE', | |
460 | 'only_matching': True, | |
461 | }, { | |
462 | 'url': 'https://ardmediathek.de/ard/video/saartalk/saartalk-gesellschaftsgift-haltung-gegen-hass/sr-fernsehen/Y3JpZDovL3NyLW9ubGluZS5kZS9TVF84MTY4MA/', | |
463 | 'only_matching': True, | |
464 | }, { | |
465 | 'url': 'https://www.ardmediathek.de/ard/video/trailer/private-eyes-s01-e01/one/Y3JpZDovL3dkci5kZS9CZWl0cmFnLTE1MTgwYzczLWNiMTEtNGNkMS1iMjUyLTg5MGYzOWQxZmQ1YQ/', | |
466 | 'only_matching': True, | |
1c821227 S |
467 | }, { |
468 | 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3N3ci5kZS9hZXgvbzEwNzE5MTU/', | |
469 | 'only_matching': True, | |
470 | }, { | |
471 | 'url': 'https://www.ardmediathek.de/swr/live/Y3JpZDovL3N3ci5kZS8xMzQ4MTA0Mg', | |
472 | 'only_matching': True, | |
e6e5d98c | 473 | }, { |
474 | # playlist of type 'sendung' | |
475 | 'url': 'https://www.ardmediathek.de/ard/sendung/doctor-who/Y3JpZDovL3dkci5kZS9vbmUvZG9jdG9yIHdobw/', | |
476 | 'only_matching': True, | |
1f8bd8eb LS |
477 | }, { |
478 | # playlist of type 'serie' | |
479 | 'url': 'https://www.ardmediathek.de/serie/nachtstreife/staffel-1/Y3JpZDovL3N3ci5kZS9zZGIvc3RJZC8xMjQy/1', | |
480 | 'only_matching': True, | |
e6e5d98c | 481 | }, { |
482 | # playlist of type 'sammlung' | |
483 | 'url': 'https://www.ardmediathek.de/ard/sammlung/team-muenster/5JpTzLSbWUAK8184IOvEir/', | |
484 | 'only_matching': True, | |
14a08605 | 485 | }, { |
486 | 'url': 'https://www.ardmediathek.de/video/coronavirus-update-ndr-info/astrazeneca-kurz-lockdown-und-pims-syndrom-81/ndr/Y3JpZDovL25kci5kZS84NzE0M2FjNi0wMWEwLTQ5ODEtOTE5NS1mOGZhNzdhOTFmOTI/', | |
487 | 'only_matching': True, | |
488 | }, { | |
489 | 'url': 'https://www.ardmediathek.de/ard/player/Y3JpZDovL3dkci5kZS9CZWl0cmFnLWQ2NDJjYWEzLTMwZWYtNGI4NS1iMTI2LTU1N2UxYTcxOGIzOQ/tatort-duo-koeln-leipzig-ihr-kinderlein-kommet', | |
490 | 'only_matching': True, | |
c1a37eb2 PH |
491 | }] |
492 | ||
1f8bd8eb | 493 | def _ARD_load_playlist_snippet(self, playlist_id, display_id, client, mode, page_number): |
e6e5d98c | 494 | """ Query the ARD server for playlist information |
495 | and returns the data in "raw" format """ | |
1f8bd8eb LS |
496 | assert mode in ('sendung', 'serie', 'sammlung') |
497 | if mode in ('sendung', 'serie'): | |
e6e5d98c | 498 | graphQL = json.dumps({ |
499 | 'query': '''{ | |
500 | showPage( | |
501 | client: "%s" | |
502 | showId: "%s" | |
503 | pageNumber: %d | |
504 | ) { | |
505 | pagination { | |
506 | pageSize | |
507 | totalElements | |
508 | } | |
509 | teasers { # Array | |
510 | mediumTitle | |
511 | links { target { id href title } } | |
512 | type | |
513 | } | |
1f8bd8eb | 514 | }}''' % (client, playlist_id, page_number), |
e6e5d98c | 515 | }).encode() |
516 | else: # mode == 'sammlung' | |
517 | graphQL = json.dumps({ | |
518 | 'query': '''{ | |
519 | morePage( | |
520 | client: "%s" | |
521 | compilationId: "%s" | |
522 | pageNumber: %d | |
523 | ) { | |
524 | widget { | |
525 | pagination { | |
526 | pageSize | |
527 | totalElements | |
528 | } | |
529 | teasers { # Array | |
530 | mediumTitle | |
531 | links { target { id href title } } | |
532 | type | |
533 | } | |
534 | } | |
1f8bd8eb | 535 | }}''' % (client, playlist_id, page_number), |
e6e5d98c | 536 | }).encode() |
537 | # Ressources for ARD graphQL debugging: | |
538 | # https://api-test.ardmediathek.de/public-gateway | |
539 | show_page = self._download_json( | |
540 | 'https://api.ardmediathek.de/public-gateway', | |
541 | '[Playlist] %s' % display_id, | |
542 | data=graphQL, | |
543 | headers={'Content-Type': 'application/json'})['data'] | |
544 | # align the structure of the returned data: | |
1f8bd8eb | 545 | if mode in ('sendung', 'serie'): |
e6e5d98c | 546 | show_page = show_page['showPage'] |
547 | else: # mode == 'sammlung' | |
548 | show_page = show_page['morePage']['widget'] | |
549 | return show_page | |
550 | ||
551 | def _ARD_extract_playlist(self, url, playlist_id, display_id, client, mode): | |
552 | """ Collects all playlist entries and returns them as info dict. | |
1f8bd8eb LS |
553 | Supports playlists of mode 'sendung', 'serie', and 'sammlung', |
554 | as well as nested playlists. """ | |
e6e5d98c | 555 | entries = [] |
556 | pageNumber = 0 | |
557 | while True: # iterate by pageNumber | |
1f8bd8eb | 558 | show_page = self._ARD_load_playlist_snippet( |
e6e5d98c | 559 | playlist_id, display_id, client, mode, pageNumber) |
560 | for teaser in show_page['teasers']: # process playlist items | |
561 | if '/compilation/' in teaser['links']['target']['href']: | |
562 | # alternativ cond.: teaser['type'] == "compilation" | |
563 | # => This is an nested compilation, e.g. like: | |
564 | # https://www.ardmediathek.de/ard/sammlung/die-kirche-bleibt-im-dorf/5eOHzt8XB2sqeFXbIoJlg2/ | |
565 | link_mode = 'sammlung' | |
566 | else: | |
567 | link_mode = 'video' | |
568 | ||
569 | item_url = 'https://www.ardmediathek.de/%s/%s/%s/%s/%s' % ( | |
570 | client, link_mode, display_id, | |
571 | # perform HTLM quoting of episode title similar to ARD: | |
572 | re.sub('^-|-$', '', # remove '-' from begin/end | |
573 | re.sub('[^a-zA-Z0-9]+', '-', # replace special chars by - | |
574 | teaser['links']['target']['title'].lower() | |
575 | .replace('ä', 'ae').replace('ö', 'oe') | |
576 | .replace('ü', 'ue').replace('ß', 'ss'))), | |
577 | teaser['links']['target']['id']) | |
578 | entries.append(self.url_result( | |
579 | item_url, | |
580 | ie=ARDBetaMediathekIE.ie_key())) | |
581 | ||
582 | if (show_page['pagination']['pageSize'] * (pageNumber + 1) | |
583 | >= show_page['pagination']['totalElements']): | |
584 | # we've processed enough pages to get all playlist entries | |
585 | break | |
586 | pageNumber = pageNumber + 1 | |
587 | ||
409cdd1e | 588 | return self.playlist_result(entries, playlist_id, playlist_title=display_id) |
e6e5d98c | 589 | |
c1a37eb2 | 590 | def _real_extract(self, url): |
409cdd1e | 591 | video_id, display_id, playlist_type, client, season_number = self._match_valid_url(url).group( |
592 | 'id', 'display_id', 'playlist', 'client', 'season') | |
14a08605 | 593 | display_id, client = display_id or video_id, client or 'ard' |
594 | ||
595 | if playlist_type: | |
409cdd1e | 596 | # TODO: Extract only specified season |
14a08605 | 597 | return self._ARD_extract_playlist(url, video_id, display_id, client, playlist_type) |
e6e5d98c | 598 | |
c968f738 RA |
599 | player_page = self._download_json( |
600 | 'https://api.ardmediathek.de/public-gateway', | |
b704fc1a | 601 | display_id, data=json.dumps({ |
c968f738 | 602 | 'query': '''{ |
b704fc1a | 603 | playerPage(client:"%s", clipId: "%s") { |
c968f738 RA |
604 | blockedByFsk |
605 | broadcastedOn | |
606 | maturityContentRating | |
607 | mediaCollection { | |
608 | _duration | |
609 | _geoblocked | |
610 | _isLive | |
611 | _mediaArray { | |
612 | _mediaStreamArray { | |
613 | _quality | |
614 | _server | |
615 | _stream | |
c1a37eb2 | 616 | } |
c968f738 RA |
617 | } |
618 | _previewImage | |
619 | _subtitleUrl | |
620 | _type | |
621 | } | |
622 | show { | |
623 | title | |
624 | } | |
f78eb41e SL |
625 | image { |
626 | src | |
627 | } | |
c968f738 RA |
628 | synopsis |
629 | title | |
630 | tracking { | |
631 | atiCustomVars { | |
632 | contentId | |
633 | } | |
634 | } | |
635 | } | |
14a08605 | 636 | }''' % (client, video_id), |
c968f738 RA |
637 | }).encode(), headers={ |
638 | 'Content-Type': 'application/json' | |
639 | })['data']['playerPage'] | |
640 | title = player_page['title'] | |
641 | content_id = str_or_none(try_get( | |
642 | player_page, lambda x: x['tracking']['atiCustomVars']['contentId'])) | |
643 | media_collection = player_page.get('mediaCollection') or {} | |
644 | if not media_collection and content_id: | |
645 | media_collection = self._download_json( | |
646 | 'https://www.ardmediathek.de/play/media/' + content_id, | |
647 | content_id, fatal=False) or {} | |
648 | info = self._parse_media_info( | |
649 | media_collection, content_id or video_id, | |
650 | player_page.get('blockedByFsk')) | |
651 | age_limit = None | |
652 | description = player_page.get('synopsis') | |
653 | maturity_content_rating = player_page.get('maturityContentRating') | |
654 | if maturity_content_rating: | |
655 | age_limit = int_or_none(maturity_content_rating.lstrip('FSK')) | |
f4a18db7 RA |
656 | if not age_limit and description: |
657 | age_limit = int_or_none(self._search_regex( | |
658 | r'\(FSK\s*(\d+)\)\s*$', description, 'age limit', default=None)) | |
c968f738 RA |
659 | info.update({ |
660 | 'age_limit': age_limit, | |
b704fc1a | 661 | 'display_id': display_id, |
c968f738 RA |
662 | 'title': title, |
663 | 'description': description, | |
664 | 'timestamp': unified_timestamp(player_page.get('broadcastedOn')), | |
665 | 'series': try_get(player_page, lambda x: x['show']['title']), | |
f78eb41e SL |
666 | 'thumbnail': (media_collection.get('_previewImage') |
667 | or try_get(player_page, lambda x: update_url(x['image']['src'], query=None, fragment=None)) | |
668 | or self.get_thumbnail_from_html(display_id, url)), | |
75258218 | 669 | }) |
e6e5d98c | 670 | info.update(self._ARD_extract_episode_info(info['title'])) |
c968f738 | 671 | return info |
f78eb41e SL |
672 | |
673 | def get_thumbnail_from_html(self, display_id, url): | |
674 | webpage = self._download_webpage(url, display_id, fatal=False) or '' | |
675 | return ( | |
676 | self._og_search_thumbnail(webpage, default=None) | |
677 | or self._html_search_meta('thumbnailUrl', webpage, default=None)) |