]>
Commit | Line | Data |
---|---|---|
0e3ae924 | 1 | import re |
0e3ae924 | 2 | |
3 | from .common import InfoExtractor | |
1a2b377c | 4 | from ..utils import ( |
af7585c8 | 5 | UnsupportedError, |
0f6e60bb | 6 | extract_attributes, |
0f6e60bb | 7 | int_or_none, |
af7585c8 M |
8 | js_to_json, |
9 | parse_iso8601, | |
10 | try_get, | |
1a2b377c | 11 | ) |
0e3ae924 | 12 | |
13 | ||
14 | class TagesschauIE(InfoExtractor): | |
854cc54b | 15 | _VALID_URL = r'https?://(?:www\.)?tagesschau\.de/(?P<path>[^/]+/(?:[^/]+/)*?(?P<id>[^/#?]+?(?:-?[0-9]+)?))(?:~_?[^/#?]+?)?\.html' |
0e3ae924 | 16 | |
17 | _TESTS = [{ | |
c51bc70e | 18 | 'url': 'http://www.tagesschau.de/multimedia/video/video-102143.html', |
af7585c8 | 19 | 'md5': 'ccb9359bf8c4795836e43759f3408a93', |
0e3ae924 | 20 | 'info_dict': { |
0f6e60bb | 21 | 'id': 'video-102143-1', |
0e3ae924 | 22 | 'ext': 'mp4', |
c51bc70e | 23 | 'title': 'Regierungsumbildung in Athen: Neue Minister in Griechenland vereidigt', |
af7585c8 | 24 | 'duration': 138, |
0e3ae924 | 25 | }, |
045c4884 PH |
26 | }, { |
27 | 'url': 'http://www.tagesschau.de/multimedia/sendung/ts-5727.html', | |
af7585c8 | 28 | 'md5': '5c15e8f3da049e48829ec9786d835536', |
045c4884 | 29 | 'info_dict': { |
0f6e60bb | 30 | 'id': 'ts-5727-1', |
045c4884 | 31 | 'ext': 'mp4', |
0f6e60bb | 32 | 'title': 'Ganze Sendung', |
af7585c8 | 33 | 'duration': 932, |
6a0f9a24 S |
34 | }, |
35 | }, { | |
36 | # exclusive audio | |
37 | 'url': 'http://www.tagesschau.de/multimedia/audio/audio-29417.html', | |
af7585c8 | 38 | 'md5': '4bff8f23504df56a0d86ed312d654182', |
6a0f9a24 | 39 | 'info_dict': { |
0f6e60bb | 40 | 'id': 'audio-29417-1', |
6a0f9a24 | 41 | 'ext': 'mp3', |
af7585c8 | 42 | 'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet', |
948199de | 43 | }, |
a47b602b | 44 | }, { |
6a0f9a24 | 45 | 'url': 'http://www.tagesschau.de/inland/bnd-303.html', |
af7585c8 | 46 | 'md5': 'f049fa1698d7564e9ca4c3325108f034', |
a47b602b | 47 | 'info_dict': { |
0f6e60bb | 48 | 'id': 'bnd-303-1', |
af7585c8 M |
49 | 'ext': 'mp3', |
50 | 'title': 'Das Siegel des Bundesnachrichtendienstes | dpa', | |
a47b602b | 51 | }, |
4c1b2e5c S |
52 | }, { |
53 | 'url': 'http://www.tagesschau.de/inland/afd-parteitag-135.html', | |
54 | 'info_dict': { | |
854cc54b | 55 | 'id': 'afd-parteitag-135', |
0f6e60bb | 56 | 'title': 'AfD', |
57 | }, | |
af7585c8 | 58 | 'playlist_mincount': 15, |
0f6e60bb | 59 | }, { |
60 | 'url': 'https://www.tagesschau.de/multimedia/audio/audio-29417~player.html', | |
61 | 'info_dict': { | |
62 | 'id': 'audio-29417-1', | |
63 | 'ext': 'mp3', | |
af7585c8 M |
64 | 'title': 'EU-Gipfel: Im Verbrennerstreit hat Deutschland maximalen Schaden angerichtet', |
65 | }, | |
66 | }, { | |
67 | 'url': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-327.html', | |
68 | 'info_dict': { | |
69 | 'id': 'podcast-11km-327', | |
70 | 'ext': 'mp3', | |
71 | 'title': 'Gewalt in der Kita – Wenn Erzieher:innen schweigen', | |
72 | 'upload_date': '20230322', | |
73 | 'timestamp': 1679482808, | |
74 | 'thumbnail': 'https://www.tagesschau.de/multimedia/audio/podcast-11km-329~_v-original.jpg', | |
75 | 'description': 'md5:dad059931fe4b3693e3656e93a249848', | |
4c1b2e5c | 76 | }, |
e89d7e30 RH |
77 | }, { |
78 | 'url': 'http://www.tagesschau.de/multimedia/sendung/tsg-3771.html', | |
948199de | 79 | 'only_matching': True, |
e89d7e30 RH |
80 | }, { |
81 | 'url': 'http://www.tagesschau.de/multimedia/sendung/tt-3827.html', | |
948199de | 82 | 'only_matching': True, |
e89d7e30 RH |
83 | }, { |
84 | 'url': 'http://www.tagesschau.de/multimedia/sendung/nm-3475.html', | |
948199de S |
85 | 'only_matching': True, |
86 | }, { | |
87 | 'url': 'http://www.tagesschau.de/multimedia/sendung/weltspiegel-3167.html', | |
88 | 'only_matching': True, | |
e89d7e30 RH |
89 | }, { |
90 | 'url': 'http://www.tagesschau.de/multimedia/tsvorzwanzig-959.html', | |
948199de | 91 | 'only_matching': True, |
3c6ae8b5 RH |
92 | }, { |
93 | 'url': 'http://www.tagesschau.de/multimedia/sendung/bab/bab-3299~_bab-sendung-209.html', | |
948199de S |
94 | 'only_matching': True, |
95 | }, { | |
96 | 'url': 'http://www.tagesschau.de/multimedia/video/video-102303~_bab-sendung-211.html', | |
97 | 'only_matching': True, | |
651ad35c S |
98 | }, { |
99 | 'url': 'http://www.tagesschau.de/100sekunden/index.html', | |
100 | 'only_matching': True, | |
68bb2fef S |
101 | }, { |
102 | # playlist article with collapsing sections | |
103 | 'url': 'http://www.tagesschau.de/wirtschaft/faq-freihandelszone-eu-usa-101.html', | |
104 | 'only_matching': True, | |
4a5b4d34 PH |
105 | }] |
106 | ||
0e3ae924 | 107 | def _real_extract(self, url): |
5ad28e7f | 108 | mobj = self._match_valid_url(url) |
651ad35c | 109 | video_id = mobj.group('id') or mobj.group('path') |
122c2f87 | 110 | display_id = video_id.lstrip('-') |
6a0f9a24 | 111 | |
0e3ae924 | 112 | webpage = self._download_webpage(url, display_id) |
113 | ||
6a0f9a24 S |
114 | title = self._html_search_regex( |
115 | r'<span[^>]*class="headline"[^>]*>(.+?)</span>', | |
0f6e60bb | 116 | webpage, 'title', default=None) or self._og_search_title(webpage, fatal=False) |
117 | ||
118 | entries = [] | |
119 | videos = re.findall(r'<div[^>]+>', webpage) | |
120 | num = 0 | |
121 | for video in videos: | |
122 | video = extract_attributes(video).get('data-config') | |
123 | if not video: | |
124 | continue | |
125 | video = self._parse_json(video, video_id, transform_source=js_to_json, fatal=False) | |
126 | video_formats = try_get(video, lambda x: x['mc']['_mediaArray'][0]['_mediaStreamArray']) | |
127 | if not video_formats: | |
128 | continue | |
129 | num += 1 | |
130 | for video_format in video_formats: | |
131 | media_url = video_format.get('_stream') or '' | |
132 | formats = [] | |
133 | if media_url.endswith('master.m3u8'): | |
134 | formats = self._extract_m3u8_formats(media_url, video_id, 'mp4', m3u8_id='hls') | |
af7585c8 | 135 | elif media_url.endswith('.mp3'): |
0f6e60bb | 136 | formats = [{ |
137 | 'url': media_url, | |
138 | 'vcodec': 'none', | |
139 | }] | |
140 | if not formats: | |
141 | continue | |
6a0f9a24 S |
142 | entries.append({ |
143 | 'id': '%s-%d' % (display_id, num), | |
0f6e60bb | 144 | 'title': try_get(video, lambda x: x['mc']['_title']), |
145 | 'duration': int_or_none(try_get(video, lambda x: x['mc']['_duration'])), | |
146 | 'formats': formats | |
6a0f9a24 | 147 | }) |
af7585c8 M |
148 | |
149 | if not entries: | |
150 | raise UnsupportedError(url) | |
151 | ||
0f6e60bb | 152 | if len(entries) > 1: |
153 | return self.playlist_result(entries, display_id, title) | |
4c1b2e5c | 154 | |
0e3ae924 | 155 | return { |
156 | 'id': display_id, | |
045c4884 | 157 | 'title': title, |
af7585c8 M |
158 | 'thumbnail': self._og_search_thumbnail(webpage), |
159 | 'formats': entries[0]['formats'], | |
160 | 'timestamp': parse_iso8601(self._html_search_meta('date', webpage)), | |
161 | 'description': self._og_search_description(webpage), | |
162 | 'duration': entries[0]['duration'], | |
0e3ae924 | 163 | } |