]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import compat_urllib_parse_urlparse | |
5 | from ..utils import ( | |
6 | determine_ext, | |
7 | ExtractorError, | |
8 | int_or_none, | |
9 | merge_dicts, | |
10 | parse_iso8601, | |
11 | qualities, | |
12 | try_get, | |
13 | urljoin, | |
14 | ) | |
15 | ||
16 | ||
17 | class NDRBaseIE(InfoExtractor): | |
18 | def _real_extract(self, url): | |
19 | mobj = self._match_valid_url(url) | |
20 | display_id = next(group for group in mobj.groups() if group) | |
21 | webpage = self._download_webpage(url, display_id) | |
22 | return self._extract_embed(webpage, display_id, url) | |
23 | ||
24 | ||
25 | class NDRIE(NDRBaseIE): | |
26 | IE_NAME = 'ndr' | |
27 | IE_DESC = 'NDR.de - Norddeutscher Rundfunk' | |
28 | _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[^/?#]+),[\da-z]+\.html' | |
29 | _TESTS = [{ | |
30 | # httpVideo, same content id | |
31 | 'url': 'http://www.ndr.de/fernsehen/Party-Poette-und-Parade,hafengeburtstag988.html', | |
32 | 'md5': '6515bc255dc5c5f8c85bbc38e035a659', | |
33 | 'info_dict': { | |
34 | 'id': 'hafengeburtstag988', | |
35 | 'display_id': 'Party-Poette-und-Parade', | |
36 | 'ext': 'mp4', | |
37 | 'title': 'Party, Pötte und Parade', | |
38 | 'description': 'md5:ad14f9d2f91d3040b6930c697e5f6b4c', | |
39 | 'uploader': 'ndrtv', | |
40 | 'timestamp': 1431255671, | |
41 | 'upload_date': '20150510', | |
42 | 'duration': 3498, | |
43 | }, | |
44 | 'params': { | |
45 | 'skip_download': True, | |
46 | }, | |
47 | 'expected_warnings': ['Unable to download f4m manifest'], | |
48 | }, { | |
49 | # httpVideo, different content id | |
50 | 'url': 'http://www.ndr.de/sport/fussball/40-Osnabrueck-spielt-sich-in-einen-Rausch,osna270.html', | |
51 | 'md5': '1043ff203eab307f0c51702ec49e9a71', | |
52 | 'info_dict': { | |
53 | 'id': 'osna272', | |
54 | 'display_id': '40-Osnabrueck-spielt-sich-in-einen-Rausch', | |
55 | 'ext': 'mp4', | |
56 | 'title': 'Osnabrück - Wehen Wiesbaden: Die Highlights', | |
57 | 'description': 'md5:32e9b800b3d2d4008103752682d5dc01', | |
58 | 'uploader': 'ndrtv', | |
59 | 'timestamp': 1442059200, | |
60 | 'upload_date': '20150912', | |
61 | 'duration': 510, | |
62 | }, | |
63 | 'params': { | |
64 | 'skip_download': True, | |
65 | }, | |
66 | 'skip': 'No longer available', | |
67 | }, { | |
68 | # httpAudio, same content id | |
69 | 'url': 'http://www.ndr.de/info/La-Valette-entgeht-der-Hinrichtung,audio51535.html', | |
70 | 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | |
71 | 'info_dict': { | |
72 | 'id': 'audio51535', | |
73 | 'display_id': 'La-Valette-entgeht-der-Hinrichtung', | |
74 | 'ext': 'mp3', | |
75 | 'title': 'La Valette entgeht der Hinrichtung', | |
76 | 'description': 'md5:22f9541913a40fe50091d5cdd7c9f536', | |
77 | 'uploader': 'ndrinfo', | |
78 | 'timestamp': 1631711863, | |
79 | 'upload_date': '20210915', | |
80 | 'duration': 884, | |
81 | }, | |
82 | 'params': { | |
83 | 'skip_download': True, | |
84 | }, | |
85 | }, { | |
86 | # with subtitles | |
87 | 'url': 'https://www.ndr.de/fernsehen/sendungen/extra_3/extra-3-Satiremagazin-mit-Christian-Ehring,sendung1091858.html', | |
88 | 'info_dict': { | |
89 | 'id': 'extra18674', | |
90 | 'display_id': 'extra-3-Satiremagazin-mit-Christian-Ehring', | |
91 | 'ext': 'mp4', | |
92 | 'title': 'Extra 3 vom 11.11.2020 mit Christian Ehring', | |
93 | 'description': 'md5:700f6de264010585012a72f97b0ac0c9', | |
94 | 'uploader': 'ndrtv', | |
95 | 'upload_date': '20201207', | |
96 | 'timestamp': 1614349457, | |
97 | 'duration': 1749, | |
98 | 'subtitles': { | |
99 | 'de': [{ | |
100 | 'ext': 'ttml', | |
101 | 'url': r're:^https://www\.ndr\.de.+', | |
102 | }], | |
103 | }, | |
104 | }, | |
105 | 'params': { | |
106 | 'skip_download': True, | |
107 | }, | |
108 | 'expected_warnings': ['Unable to download f4m manifest'], | |
109 | }, { | |
110 | 'url': 'https://www.ndr.de/Fettes-Brot-Ferris-MC-und-Thees-Uhlmann-live-on-stage,festivalsommer116.html', | |
111 | 'only_matching': True, | |
112 | }] | |
113 | ||
114 | def _extract_embed(self, webpage, display_id, url): | |
115 | embed_url = ( | |
116 | self._html_search_meta( | |
117 | 'embedURL', webpage, 'embed URL', | |
118 | default=None) | |
119 | or self._search_regex( | |
120 | r'\bembedUrl["\']\s*:\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | |
121 | 'embed URL', group='url', default=None) | |
122 | or self._search_regex( | |
123 | r'\bvar\s*sophoraID\s*=\s*(["\'])(?P<url>(?:(?!\1).)+)\1', webpage, | |
124 | 'embed URL', group='url', default='')) | |
125 | # some more work needed if we only found sophoraID | |
126 | if re.match(r'^[a-z]+\d+$', embed_url): | |
127 | # get the initial part of the url path,. eg /panorama/archiv/2022/ | |
128 | parsed_url = compat_urllib_parse_urlparse(url) | |
129 | path = self._search_regex(r'(.+/)%s' % display_id, parsed_url.path or '', 'embed URL', default='') | |
130 | # find tell-tale image with the actual ID | |
131 | ndr_id = self._search_regex(r'%s([a-z]+\d+)(?!\.)\b' % (path, ), webpage, 'embed URL', default=None) | |
132 | # or try to use special knowledge! | |
133 | NDR_INFO_URL_TPL = 'https://www.ndr.de/info/%s-player.html' | |
134 | embed_url = 'ndr:%s' % (ndr_id, ) if ndr_id else NDR_INFO_URL_TPL % (embed_url, ) | |
135 | if not embed_url: | |
136 | raise ExtractorError('Unable to extract embedUrl') | |
137 | ||
138 | description = self._search_regex( | |
139 | r'<p[^>]+itemprop="description">([^<]+)</p>', | |
140 | webpage, 'description', default=None) or self._og_search_description(webpage) | |
141 | timestamp = parse_iso8601( | |
142 | self._search_regex( | |
143 | (r'<span[^>]+itemprop="(?:datePublished|uploadDate)"[^>]+content="(?P<cont>[^"]+)"', | |
144 | r'\bvar\s*pdt\s*=\s*(?P<q>["\'])(?P<cont>(?:(?!(?P=q)).)+)(?P=q)', ), | |
145 | webpage, 'upload date', group='cont', default=None)) | |
146 | info = self._search_json_ld(webpage, display_id, default={}) | |
147 | return merge_dicts({ | |
148 | '_type': 'url_transparent', | |
149 | 'url': embed_url, | |
150 | 'display_id': display_id, | |
151 | 'description': description, | |
152 | 'timestamp': timestamp, | |
153 | }, info) | |
154 | ||
155 | ||
156 | class NJoyIE(NDRBaseIE): | |
157 | IE_NAME = 'njoy' | |
158 | IE_DESC = 'N-JOY' | |
159 | _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?:(?P<display_id>[^/?#]+),)?(?P<id>[\da-z]+)\.html' | |
160 | _TESTS = [{ | |
161 | # httpVideo, same content id | |
162 | 'url': 'http://www.n-joy.de/entertainment/comedy/comedy_contest/Benaissa-beim-NDR-Comedy-Contest,comedycontest2480.html', | |
163 | 'md5': 'cb63be60cd6f9dd75218803146d8dc67', | |
164 | 'info_dict': { | |
165 | 'id': 'comedycontest2480', | |
166 | 'display_id': 'Benaissa-beim-NDR-Comedy-Contest', | |
167 | 'ext': 'mp4', | |
168 | 'title': 'Benaissa beim NDR Comedy Contest', | |
169 | 'description': 'md5:f057a6c4e1c728b10d33b5ffd36ddc39', | |
170 | 'uploader': 'ndrtv', | |
171 | 'upload_date': '20141129', | |
172 | 'duration': 654, | |
173 | }, | |
174 | 'params': { | |
175 | 'skip_download': True, | |
176 | }, | |
177 | 'skip': 'No longer available', | |
178 | }, { | |
179 | # httpVideo, different content id | |
180 | 'url': 'http://www.n-joy.de/musik/Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-,felixjaehn168.html', | |
181 | 'md5': '417660fffa90e6df2fda19f1b40a64d8', | |
182 | 'info_dict': { | |
183 | 'id': 'livestream283', | |
184 | 'display_id': 'Das-frueheste-DJ-Set-des-Nordens-live-mit-Felix-Jaehn-', | |
185 | 'ext': 'mp3', | |
186 | 'title': 'Das frueheste DJ Set des Nordens live mit Felix Jaehn', | |
187 | 'description': 'md5:681698f527b8601e511e7b79edde7d2c', | |
188 | 'uploader': 'njoy', | |
189 | 'upload_date': '20210830', | |
190 | }, | |
191 | 'params': { | |
192 | 'skip_download': True, | |
193 | }, | |
194 | }, { | |
195 | 'url': 'http://www.n-joy.de/radio/webradio/morningshow209.html', | |
196 | 'only_matching': True, | |
197 | }] | |
198 | ||
199 | def _extract_embed(self, webpage, display_id, url=None): | |
200 | # find tell-tale URL with the actual ID, or ... | |
201 | video_id = self._search_regex( | |
202 | (r'''\bsrc\s*=\s*["']?(?:/\w+)+/([a-z]+\d+)(?!\.)\b''', | |
203 | r'<iframe[^>]+id="pp_([\da-z]+)"', ), | |
204 | webpage, 'NDR id', default=None) | |
205 | ||
206 | description = ( | |
207 | self._html_search_meta('description', webpage) | |
208 | or self._search_regex( | |
209 | r'<div[^>]+class="subline"[^>]*>[^<]+</div>\s*<p>([^<]+)</p>', | |
210 | webpage, 'description', fatal=False)) | |
211 | return { | |
212 | '_type': 'url_transparent', | |
213 | 'ie_key': 'NDREmbedBase', | |
214 | 'url': 'ndr:%s' % video_id, | |
215 | 'display_id': display_id, | |
216 | 'description': description, | |
217 | 'title': display_id.replace('-', ' ').strip(), | |
218 | } | |
219 | ||
220 | ||
221 | class NDREmbedBaseIE(InfoExtractor): | |
222 | IE_NAME = 'ndr:embed:base' | |
223 | _VALID_URL = r'(?:ndr:(?P<id_s>[\da-z]+)|https?://www\.ndr\.de/(?P<id>[\da-z]+)-ppjson\.json)' | |
224 | _TESTS = [{ | |
225 | 'url': 'ndr:soundcheck3366', | |
226 | 'only_matching': True, | |
227 | }, { | |
228 | 'url': 'http://www.ndr.de/soundcheck3366-ppjson.json', | |
229 | 'only_matching': True, | |
230 | }] | |
231 | ||
232 | def _real_extract(self, url): | |
233 | mobj = self._match_valid_url(url) | |
234 | video_id = mobj.group('id') or mobj.group('id_s') | |
235 | ||
236 | ppjson = self._download_json( | |
237 | 'http://www.ndr.de/%s-ppjson.json' % video_id, video_id) | |
238 | ||
239 | playlist = ppjson['playlist'] | |
240 | ||
241 | formats = [] | |
242 | quality_key = qualities(('xs', 's', 'm', 'l', 'xl')) | |
243 | ||
244 | for format_id, f in playlist.items(): | |
245 | src = f.get('src') | |
246 | if not src: | |
247 | continue | |
248 | ext = determine_ext(src, None) | |
249 | if ext == 'f4m': | |
250 | formats.extend(self._extract_f4m_formats( | |
251 | src + '?hdcore=3.7.0&plugin=aasp-3.7.0.39.44', video_id, | |
252 | f4m_id='hds', fatal=False)) | |
253 | elif ext == 'm3u8': | |
254 | formats.extend(self._extract_m3u8_formats( | |
255 | src, video_id, 'mp4', m3u8_id='hls', | |
256 | entry_protocol='m3u8_native', fatal=False)) | |
257 | else: | |
258 | quality = f.get('quality') | |
259 | ff = { | |
260 | 'url': src, | |
261 | 'format_id': quality or format_id, | |
262 | 'quality': quality_key(quality), | |
263 | } | |
264 | type_ = f.get('type') | |
265 | if type_ and type_.split('/')[0] == 'audio': | |
266 | ff['vcodec'] = 'none' | |
267 | ff['ext'] = ext or 'mp3' | |
268 | formats.append(ff) | |
269 | self._sort_formats(formats) | |
270 | ||
271 | config = playlist['config'] | |
272 | ||
273 | live = playlist.get('config', {}).get('streamType') in ['httpVideoLive', 'httpAudioLive'] | |
274 | title = config['title'] | |
275 | uploader = ppjson.get('config', {}).get('branding') | |
276 | upload_date = ppjson.get('config', {}).get('publicationDate') | |
277 | duration = int_or_none(config.get('duration')) | |
278 | ||
279 | thumbnails = [] | |
280 | poster = try_get(config, lambda x: x['poster'], dict) or {} | |
281 | for thumbnail_id, thumbnail in poster.items(): | |
282 | thumbnail_url = urljoin(url, thumbnail.get('src')) | |
283 | if not thumbnail_url: | |
284 | continue | |
285 | thumbnails.append({ | |
286 | 'id': thumbnail.get('quality') or thumbnail_id, | |
287 | 'url': thumbnail_url, | |
288 | 'preference': quality_key(thumbnail.get('quality')), | |
289 | }) | |
290 | ||
291 | subtitles = {} | |
292 | tracks = config.get('tracks') | |
293 | if tracks and isinstance(tracks, list): | |
294 | for track in tracks: | |
295 | if not isinstance(track, dict): | |
296 | continue | |
297 | track_url = urljoin(url, track.get('src')) | |
298 | if not track_url: | |
299 | continue | |
300 | subtitles.setdefault(track.get('srclang') or 'de', []).append({ | |
301 | 'url': track_url, | |
302 | 'ext': 'ttml', | |
303 | }) | |
304 | ||
305 | return { | |
306 | 'id': video_id, | |
307 | 'title': title, | |
308 | 'is_live': live, | |
309 | 'uploader': uploader if uploader != '-' else None, | |
310 | 'upload_date': upload_date[0:8] if upload_date else None, | |
311 | 'duration': duration, | |
312 | 'thumbnails': thumbnails, | |
313 | 'formats': formats, | |
314 | 'subtitles': subtitles, | |
315 | } | |
316 | ||
317 | ||
318 | class NDREmbedIE(NDREmbedBaseIE): | |
319 | IE_NAME = 'ndr:embed' | |
320 | _VALID_URL = r'https?://(?:\w+\.)*ndr\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:(?:ard)?player|externalPlayer)\.html' | |
321 | _TESTS = [{ | |
322 | 'url': 'http://www.ndr.de/fernsehen/sendungen/ndr_aktuell/ndraktuell28488-player.html', | |
323 | 'md5': '8b9306142fe65bbdefb5ce24edb6b0a9', | |
324 | 'info_dict': { | |
325 | 'id': 'ndraktuell28488', | |
326 | 'ext': 'mp4', | |
327 | 'title': 'Norddeutschland begrüßt Flüchtlinge', | |
328 | 'is_live': False, | |
329 | 'uploader': 'ndrtv', | |
330 | 'upload_date': '20150907', | |
331 | 'duration': 132, | |
332 | }, | |
333 | 'skip': 'No longer available', | |
334 | }, { | |
335 | 'url': 'http://www.ndr.de/ndr2/events/soundcheck/soundcheck3366-player.html', | |
336 | 'md5': '002085c44bae38802d94ae5802a36e78', | |
337 | 'info_dict': { | |
338 | 'id': 'soundcheck3366', | |
339 | 'ext': 'mp4', | |
340 | 'title': 'Ella Henderson braucht Vergleiche nicht zu scheuen', | |
341 | 'is_live': False, | |
342 | 'uploader': 'ndr2', | |
343 | 'upload_date': '20150912', | |
344 | 'duration': 3554, | |
345 | }, | |
346 | 'params': { | |
347 | 'skip_download': True, | |
348 | }, | |
349 | 'skip': 'No longer available', | |
350 | }, { | |
351 | 'url': 'http://www.ndr.de/info/audio51535-player.html', | |
352 | 'md5': 'bb3cd38e24fbcc866d13b50ca59307b8', | |
353 | 'info_dict': { | |
354 | 'id': 'audio51535', | |
355 | 'ext': 'mp3', | |
356 | 'title': 'La Valette entgeht der Hinrichtung', | |
357 | 'is_live': False, | |
358 | 'uploader': 'ndrinfo', | |
359 | 'upload_date': '20210915', | |
360 | 'duration': 884, | |
361 | }, | |
362 | 'params': { | |
363 | 'skip_download': True, | |
364 | }, | |
365 | }, { | |
366 | 'url': 'http://www.ndr.de/fernsehen/sendungen/visite/visite11010-externalPlayer.html', | |
367 | 'md5': 'ae57f80511c1e1f2fd0d0d3d31aeae7c', | |
368 | 'info_dict': { | |
369 | 'id': 'visite11010', | |
370 | 'ext': 'mp4', | |
371 | 'title': 'Visite - die ganze Sendung', | |
372 | 'is_live': False, | |
373 | 'uploader': 'ndrtv', | |
374 | 'upload_date': '20150902', | |
375 | 'duration': 3525, | |
376 | }, | |
377 | 'params': { | |
378 | 'skip_download': True, | |
379 | }, | |
380 | 'skip': 'No longer available', | |
381 | }, { | |
382 | # httpVideoLive | |
383 | 'url': 'http://www.ndr.de/fernsehen/livestream/livestream217-externalPlayer.html', | |
384 | 'info_dict': { | |
385 | 'id': 'livestream217', | |
386 | 'ext': 'mp4', | |
387 | 'title': r're:^NDR Fernsehen Niedersachsen \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
388 | 'is_live': True, | |
389 | 'upload_date': '20210409', | |
390 | 'uploader': 'ndrtv', | |
391 | }, | |
392 | 'params': { | |
393 | 'skip_download': True, | |
394 | }, | |
395 | }, { | |
396 | 'url': 'http://www.ndr.de/ndrkultur/audio255020-player.html', | |
397 | 'only_matching': True, | |
398 | }, { | |
399 | 'url': 'http://www.ndr.de/fernsehen/sendungen/nordtour/nordtour7124-player.html', | |
400 | 'only_matching': True, | |
401 | }, { | |
402 | 'url': 'http://www.ndr.de/kultur/film/videos/videoimport10424-player.html', | |
403 | 'only_matching': True, | |
404 | }, { | |
405 | 'url': 'http://www.ndr.de/fernsehen/sendungen/hamburg_journal/hamj43006-player.html', | |
406 | 'only_matching': True, | |
407 | }, { | |
408 | 'url': 'http://www.ndr.de/fernsehen/sendungen/weltbilder/weltbilder4518-player.html', | |
409 | 'only_matching': True, | |
410 | }, { | |
411 | 'url': 'http://www.ndr.de/fernsehen/doku952-player.html', | |
412 | 'only_matching': True, | |
413 | }] | |
414 | ||
415 | ||
416 | class NJoyEmbedIE(NDREmbedBaseIE): | |
417 | IE_NAME = 'njoy:embed' | |
418 | _VALID_URL = r'https?://(?:www\.)?n-joy\.de/(?:[^/]+/)*(?P<id>[\da-z]+)-(?:player|externalPlayer)_[^/]+\.html' | |
419 | _TESTS = [{ | |
420 | # httpVideo | |
421 | 'url': 'http://www.n-joy.de/events/reeperbahnfestival/doku948-player_image-bc168e87-5263-4d6d-bd27-bb643005a6de_theme-n-joy.html', | |
422 | 'md5': '8483cbfe2320bd4d28a349d62d88bd74', | |
423 | 'info_dict': { | |
424 | 'id': 'doku948', | |
425 | 'ext': 'mp4', | |
426 | 'title': 'Zehn Jahre Reeperbahn Festival - die Doku', | |
427 | 'is_live': False, | |
428 | 'upload_date': '20200826', | |
429 | 'duration': 1011, | |
430 | }, | |
431 | 'expected_warnings': ['Unable to download f4m manifest'], | |
432 | }, { | |
433 | # httpAudio | |
434 | 'url': 'http://www.n-joy.de/news_wissen/stefanrichter100-player_image-d5e938b1-f21a-4b9a-86b8-aaba8bca3a13_theme-n-joy.html', | |
435 | 'md5': 'd989f80f28ac954430f7b8a48197188a', | |
436 | 'info_dict': { | |
437 | 'id': 'stefanrichter100', | |
438 | 'ext': 'mp3', | |
439 | 'title': 'Interview mit einem Augenzeugen', | |
440 | 'is_live': False, | |
441 | 'uploader': 'njoy', | |
442 | 'upload_date': '20150909', | |
443 | 'duration': 140, | |
444 | }, | |
445 | 'params': { | |
446 | 'skip_download': True, | |
447 | }, | |
448 | 'skip': 'No longer available', | |
449 | }, { | |
450 | # httpAudioLive, no explicit ext | |
451 | 'url': 'http://www.n-joy.de/news_wissen/webradioweltweit100-player_image-3fec0484-2244-4565-8fb8-ed25fd28b173_theme-n-joy.html', | |
452 | 'info_dict': { | |
453 | 'id': 'webradioweltweit100', | |
454 | 'ext': 'mp3', | |
455 | 'title': r're:^N-JOY Weltweit \d{4}-\d{2}-\d{2} \d{2}:\d{2}$', | |
456 | 'is_live': True, | |
457 | 'uploader': 'njoy', | |
458 | 'upload_date': '20210830', | |
459 | }, | |
460 | 'params': { | |
461 | 'skip_download': True, | |
462 | }, | |
463 | }, { | |
464 | 'url': 'http://www.n-joy.de/musik/dockville882-player_image-3905259e-0803-4764-ac72-8b7de077d80a_theme-n-joy.html', | |
465 | 'only_matching': True, | |
466 | }, { | |
467 | 'url': 'http://www.n-joy.de/radio/sendungen/morningshow/urlaubsfotos190-player_image-066a5df1-5c95-49ec-a323-941d848718db_theme-n-joy.html', | |
468 | 'only_matching': True, | |
469 | }, { | |
470 | 'url': 'http://www.n-joy.de/entertainment/comedy/krudetv290-player_image-ab261bfe-51bf-4bf3-87ba-c5122ee35b3d_theme-n-joy.html', | |
471 | 'only_matching': True, | |
472 | }] |