2 from __future__
import unicode_literals
6 from .common
import InfoExtractor
7 from ..compat
import compat_str
27 class ZDFBaseIE(InfoExtractor
):
28 _GEO_COUNTRIES
= ['DE']
29 _QUALITIES
= ('auto', 'low', 'med', 'high', 'veryhigh', 'hd')
31 def _call_api(self
, url
, video_id
, item
, api_token
=None, referrer
=None):
34 headers
['Api-Auth'] = 'Bearer %s' % api_token
36 headers
['Referer'] = referrer
37 return self
._download
_json
(
38 url
, video_id
, 'Downloading JSON %s' % item
, headers
=headers
)
41 def _extract_subtitles(src
):
43 for caption
in try_get(src
, lambda x
: x
['captions'], list) or []:
44 subtitle_url
= url_or_none(caption
.get('uri'))
46 lang
= caption
.get('language', 'deu')
47 subtitles
.setdefault(lang
, []).append({
52 def _extract_format(self
, video_id
, formats
, format_urls
, meta
):
53 format_url
= url_or_none(meta
.get('url'))
54 if not format_url
or format_url
in format_urls
:
56 format_urls
.add(format_url
)
58 mime_type
, ext
= meta
.get('mimeType'), determine_ext(format_url
)
59 if mime_type
== 'application/x-mpegURL' or ext
== 'm3u8':
60 new_formats
= self
._extract
_m
3u8_formats
(
61 format_url
, video_id
, 'mp4', m3u8_id
='hls',
62 entry_protocol
='m3u8_native', fatal
=False)
63 elif mime_type
== 'application/f4m+xml' or ext
== 'f4m':
64 new_formats
= self
._extract
_f
4m
_formats
(
65 update_url_query(format_url
, {'hdcore': '3.7.0'}
), video_id
, f4m_id
='hds', fatal
=False)
67 f
= parse_codecs(meta
.get('mimeCodec'))
68 if not f
and meta
.get('type'):
69 data
= meta
['type'].split('_')
70 if try_get(data
, lambda x
: x
[2]) == ext
:
71 f
= {'vcodec': data[0], 'acodec': data[1]}
74 'format_id': join_nonempty('http', meta
.get('type'), meta
.get('quality')),
77 formats
.extend(merge_dicts(f
, {
78 'format_note': join_nonempty('quality', 'class', from_dict
=meta
, delim
=', '),
79 'language': meta
.get('language'),
80 'language_preference': 10 if meta
.get('class') == 'main' else -10 if meta
.get('class') == 'ad' else -1,
81 'quality': qualities(self
._QUALITIES
)(meta
.get('quality')),
82 }) for f
in new_formats
)
84 def _extract_ptmd(self
, ptmd_url
, video_id
, api_token
, referrer
):
85 ptmd
= self
._call
_api
(
86 ptmd_url
, video_id
, 'metadata', api_token
, referrer
)
88 content_id
= ptmd
.get('basename') or ptmd_url
.split('/')[-1]
92 for p
in ptmd
['priorityList']:
93 formitaeten
= p
.get('formitaeten')
94 if not isinstance(formitaeten
, list):
97 f_qualities
= f
.get('qualities')
98 if not isinstance(f_qualities
, list):
100 for quality
in f_qualities
:
101 tracks
= try_get(quality
, lambda x
: x
['audio']['tracks'], list)
105 self
._extract
_format
(
106 content_id
, formats
, track_uris
, {
107 'url': track
.get('uri'),
108 'type': f
.get('type'),
109 'mimeType': f
.get('mimeType'),
110 'quality': quality
.get('quality'),
111 'class': track
.get('class'),
112 'language': track
.get('language'),
114 self
._sort
_formats
(formats
, ('hasaud', 'res', 'quality', 'language_preference'))
116 duration
= float_or_none(try_get(
117 ptmd
, lambda x
: x
['attributes']['duration']['value']), scale
=1000)
120 'extractor_key': ZDFIE
.ie_key(),
122 'duration': duration
,
124 'subtitles': self
._extract
_subtitles
(ptmd
),
127 def _extract_player(self
, webpage
, video_id
, fatal
=True):
128 return self
._parse
_json
(
130 r
'(?s)data-zdfplayer-jsb=(["\'])(?P
<json
>{.+?}
)\
1', webpage,
131 'player JSON
', default='{}' if not fatal else NO_DEFAULT,
136 class ZDFIE(ZDFBaseIE):
137 _VALID_URL = r'https?
://www\
.zdf\
.de
/(?
:[^
/]+/)*(?P
<id>[^
/?
#&]+)\.html'
139 'url': 'https://www.zdf.de/nachrichten/heute-journal/heute-journal-vom-30-12-2021-100.html',
141 'id': '211230_sendung_hjo',
143 'description': 'md5:47dff85977bde9fb8cba9e9c9b929839',
145 'upload_date': '20211230',
147 'thumbnail': 'md5:e65f459f741be5455c952cd820eb188e',
148 'title': 'heute journal vom 30.12.2021',
149 'timestamp': 1640897100,
152 'url': 'https://www.zdf.de/dokumentation/terra-x/die-magie-der-farben-von-koenigspurpur-und-jeansblau-100.html',
154 'id': '151025_magie_farben2_tex',
156 'title': 'Die Magie der Farben (2/2)',
157 'description': 'md5:a89da10c928c6235401066b60a6d5c1a',
159 'timestamp': 1465021200,
160 'upload_date': '20160604',
161 'thumbnail': 'https://www.zdf.de/assets/mauve-im-labor-100~768x432?cb=1464909117806',
164 'url': 'https://www.zdf.de/funk/druck-11790/funk-alles-ist-verzaubert-102.html',
165 'md5': '3d6f1049e9682178a11c54b91f3dd065',
168 'id': 'video_funk_1770473',
170 'description': 'Die Neue an der Schule verdreht Ismail den Kopf.',
171 'title': 'Alles ist verzaubert',
172 'timestamp': 1635520560,
173 'upload_date': '20211029',
174 'thumbnail': 'https://www.zdf.de/assets/teaser-funk-alles-ist-verzaubert-100~1920x1080?cb=1636466431799',
177 # Same as https://www.phoenix.de/sendungen/dokumentationen/gesten-der-maechtigen-i-a-89468.html?ref=suche
178 'url': 'https://www.zdf.de/politik/phoenix-sendungen/die-gesten-der-maechtigen-100.html',
179 'only_matching': True,
181 # Same as https://www.3sat.de/film/spielfilm/der-hauptmann-100.html
182 'url': 'https://www.zdf.de/filme/filme-sonstige/der-hauptmann-112.html',
183 'only_matching': True,
185 # Same as https://www.3sat.de/wissen/nano/nano-21-mai-2019-102.html, equal media ids
186 'url': 'https://www.zdf.de/wissen/nano/nano-21-mai-2019-102.html',
187 'only_matching': True,
189 'url': 'https://www.zdf.de/service-und-hilfe/die-neue-zdf-mediathek/zdfmediathek-trailer-100.html',
190 'only_matching': True,
192 'url': 'https://www.zdf.de/filme/taunuskrimi/die-lebenden-und-die-toten-1---ein-taunuskrimi-100.html',
193 'only_matching': True,
195 'url': 'https://www.zdf.de/dokumentation/planet-e/planet-e-uebersichtsseite-weitere-dokumentationen-von-planet-e-100.html',
196 'only_matching': True,
198 # Same as https://www.phoenix.de/sendungen/ereignisse/corona-nachgehakt/wohin-fuehrt-der-protest-in-der-pandemie-a-2050630.html
199 'url': 'https://www.zdf.de/politik/phoenix-sendungen/wohin-fuehrt-der-protest-in-der-pandemie-100.html',
200 'only_matching': True
202 # Same as https://www.3sat.de/film/ab-18/10-wochen-sommer-108.html
203 'url': 'https://www.zdf.de/dokumentation/ab-18/10-wochen-sommer-102.html',
204 'only_matching': True
207 def _extract_entry(self
, url
, player
, content
, video_id
):
208 title
= content
.get('title') or content
['teaserHeadline']
210 t
= content
['mainVideoContent']['http://zdf.de/rels/target']
212 ptmd_path
= t
.get('http://zdf.de/rels/streams/ptmd')
215 ptmd_path
= traverse_obj(
216 t
, ('streams', 'default', 'http://zdf.de/rels/streams/ptmd-template'),
217 'http://zdf.de/rels/streams/ptmd-template').replace(
218 '{playerId}', 'ngplayer_2_4')
220 info
= self
._extract
_ptmd
(
221 urljoin(url
, ptmd_path
), video_id
, player
['apiToken'], url
)
225 content
, lambda x
: x
['teaserImageRef']['layouts'], dict)
227 for layout_key
, layout_url
in layouts
.items():
228 layout_url
= url_or_none(layout_url
)
233 'format_id': layout_key
,
235 mobj
= re
.search(r
'(?P<width>\d+)x(?P<height>\d+)', layout_key
)
238 'width': int(mobj
.group('width')),
239 'height': int(mobj
.group('height')),
241 thumbnails
.append(thumbnail
)
243 chapter_marks
= t
.get('streamAnchorTag') or []
244 chapter_marks
.append({'anchorOffset': int_or_none(t.get('duration'))}
)
246 'start_time': chap
.get('anchorOffset'),
247 'end_time': next_chap
.get('anchorOffset'),
248 'title': chap
.get('anchorLabel')
249 } for chap
, next_chap
in zip(chapter_marks
, chapter_marks
[1:])]
251 return merge_dicts(info
, {
253 'description': content
.get('leadParagraph') or content
.get('teasertext'),
254 'duration': int_or_none(t
.get('duration')),
255 'timestamp': unified_timestamp(content
.get('editorialDate')),
256 'thumbnails': thumbnails
,
257 'chapters': chapters
or None
260 def _extract_regular(self
, url
, player
, video_id
):
261 content
= self
._call
_api
(
262 player
['content'], video_id
, 'content', player
['apiToken'], url
)
263 return self
._extract
_entry
(player
['content'], player
, content
, video_id
)
265 def _extract_mobile(self
, video_id
):
266 video
= self
._download
_json
(
267 'https://zdf-cdn.live.cellular.de/mediathekV2/document/%s' % video_id
,
270 document
= video
['document']
272 title
= document
['titel']
273 content_id
= document
['basename']
277 for f
in document
['formitaeten']:
278 self
._extract
_format
(content_id
, formats
, format_urls
, f
)
279 self
._sort
_formats
(formats
)
282 teaser_bild
= document
.get('teaserBild')
283 if isinstance(teaser_bild
, dict):
284 for thumbnail_key
, thumbnail
in teaser_bild
.items():
285 thumbnail_url
= try_get(
286 thumbnail
, lambda x
: x
['url'], compat_str
)
289 'url': thumbnail_url
,
291 'width': int_or_none(thumbnail
.get('width')),
292 'height': int_or_none(thumbnail
.get('height')),
298 'description': document
.get('beschreibung'),
299 'duration': int_or_none(document
.get('length')),
300 'timestamp': unified_timestamp(document
.get('date')) or unified_timestamp(
301 try_get(video
, lambda x
: x
['meta']['editorialDate'], compat_str
)),
302 'thumbnails': thumbnails
,
303 'subtitles': self
._extract
_subtitles
(document
),
307 def _real_extract(self
, url
):
308 video_id
= self
._match
_id
(url
)
310 webpage
= self
._download
_webpage
(url
, video_id
, fatal
=False)
312 player
= self
._extract
_player
(webpage
, url
, fatal
=False)
314 return self
._extract
_regular
(url
, player
, video_id
)
316 return self
._extract
_mobile
(video_id
)
319 class ZDFChannelIE(ZDFBaseIE
):
320 _VALID_URL
= r
'https?://www\.zdf\.de/(?:[^/]+/)*(?P<id>[^/?#&]+)'
322 'url': 'https://www.zdf.de/sport/das-aktuelle-sportstudio',
324 'id': 'das-aktuelle-sportstudio',
325 'title': 'das aktuelle sportstudio | ZDF',
327 'playlist_mincount': 23,
329 'url': 'https://www.zdf.de/dokumentation/planet-e',
332 'title': 'planet e.',
334 'playlist_mincount': 50,
336 'url': 'https://www.zdf.de/filme/taunuskrimi/',
337 'only_matching': True,
341 def suitable(cls
, url
):
342 return False if ZDFIE
.suitable(url
) else super(ZDFChannelIE
, cls
).suitable(url
)
344 def _real_extract(self
, url
):
345 channel_id
= self
._match
_id
(url
)
347 webpage
= self
._download
_webpage
(url
, channel_id
)
350 self
.url_result(item_url
, ie
=ZDFIE
.ie_key())
351 for item_url
in orderedSet(re
.findall(
352 r
'data-plusbar-url=["\'](http
.+?\
.html
)', webpage))]
354 return self.playlist_result(
355 entries, channel_id, self._og_search_title(webpage, fatal=False))
358 player = self._extract_player(webpage, channel_id)
360 channel_id = self._search_regex(
361 r'docId\s
*:\s
*(["\'])(?P<id>(?!\1).+?)\1', webpage,
362 'channel id', group='id')
364 channel = self._call_api(
365 'https://api.zdf.de/content/documents/%s.json' % channel_id,
366 player, url, channel_id)
369 for module in channel['module']:
370 for teaser in try_get(module, lambda x: x['teaser'], list) or []:
372 teaser, lambda x: x['http://zdf.de/rels/target'], dict)
375 items.extend(try_get(
377 lambda x: x['resultsWithVideo']['http://zdf.de/rels/search/results'],
379 items.extend(try_get(
381 lambda x: x['filterRef']['resultsWithVideo']['http://zdf.de/rels/search/results'],
387 t = try_get(item, lambda x: x['http://zdf.de/rels/target'], dict)
390 sharing_url = t.get('http://zdf.de/rels/sharing-url')
391 if not sharing_url or not isinstance(sharing_url, compat_str):
393 if sharing_url in entry_urls:
395 entry_urls.add(sharing_url)
396 entries.append(self.url_result(
397 sharing_url, ie=ZDFIE.ie_key(), video_id=t.get('id')))
399 return self.playlist_result(entries, channel_id, channel.get('title'))