]>
Commit | Line | Data |
---|---|---|
7807ee66 | 1 | # -*- coding: utf-8 -*- |
b461641f S |
2 | from __future__ import unicode_literals |
3 | ||
176cf9e0 | 4 | import itertools |
b461641f S |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
1cc79574 | 8 | from ..compat import ( |
cd7481a3 | 9 | compat_parse_qs, |
becafcbf | 10 | compat_urlparse, |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
cd7481a3 | 13 | unified_strdate, |
2559b9d0 | 14 | qualities, |
b461641f S |
15 | ) |
16 | ||
17 | ||
18 | class WDRIE(InfoExtractor): | |
19 | _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | |
20 | _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | |
21 | ||
22 | _TESTS = [ | |
23 | { | |
24 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | |
25 | 'info_dict': { | |
26 | 'id': 'mdb-362427', | |
27 | 'ext': 'flv', | |
28 | 'title': 'Servicezeit', | |
29 | 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | |
30 | 'upload_date': '20140310', | |
b8988b63 | 31 | 'is_live': False |
b461641f S |
32 | }, |
33 | 'params': { | |
34 | 'skip_download': True, | |
35 | }, | |
2559b9d0 | 36 | 'skip': 'Page Not Found', |
b461641f S |
37 | }, |
38 | { | |
39 | 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | |
40 | 'info_dict': { | |
41 | 'id': 'mdb-363194', | |
42 | 'ext': 'flv', | |
43 | 'title': 'Marga Spiegel ist tot', | |
87a29e6f | 44 | 'description': 'md5:2309992a6716c347891c045be50992e4', |
b461641f | 45 | 'upload_date': '20140311', |
b8988b63 | 46 | 'is_live': False |
b461641f S |
47 | }, |
48 | 'params': { | |
49 | 'skip_download': True, | |
50 | }, | |
2559b9d0 | 51 | 'skip': 'Page Not Found', |
b461641f S |
52 | }, |
53 | { | |
54 | 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | |
55 | 'md5': '83e9e8fefad36f357278759870805898', | |
56 | 'info_dict': { | |
57 | 'id': 'mdb-194332', | |
58 | 'ext': 'mp3', | |
59 | 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | |
87a29e6f | 60 | 'description': 'md5:2309992a6716c347891c045be50992e4', |
b461641f | 61 | 'upload_date': '20091129', |
b8988b63 | 62 | 'is_live': False |
b461641f S |
63 | }, |
64 | }, | |
65 | { | |
ff1956e0 PH |
66 | 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', |
67 | 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', | |
b461641f | 68 | 'info_dict': { |
ff1956e0 | 69 | 'id': 'mdb-478135', |
b461641f | 70 | 'ext': 'mp3', |
ff1956e0 | 71 | 'title': 'Flavia Coelho: Amar é Amar', |
87a29e6f | 72 | 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', |
ff1956e0 | 73 | 'upload_date': '20140717', |
b8988b63 | 74 | 'is_live': False |
b461641f | 75 | }, |
2559b9d0 | 76 | 'skip': 'Page Not Found', |
b461641f | 77 | }, |
176cf9e0 PH |
78 | { |
79 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', | |
80 | 'playlist_mincount': 146, | |
dd8982f1 PH |
81 | 'info_dict': { |
82 | 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', | |
83 | } | |
b8988b63 AA |
84 | }, |
85 | { | |
86 | 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', | |
87 | 'info_dict': { | |
88 | 'id': 'mdb-103364', | |
2559b9d0 | 89 | 'title': 're:^WDR Fernsehen Live [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', |
b8988b63 AA |
90 | 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', |
91 | 'ext': 'flv', | |
2559b9d0 | 92 | 'upload_date': '20150101', |
b8988b63 AA |
93 | 'is_live': True |
94 | }, | |
95 | 'params': { | |
96 | 'skip_download': True, | |
97 | }, | |
176cf9e0 | 98 | } |
b461641f S |
99 | ] |
100 | ||
101 | def _real_extract(self, url): | |
102 | mobj = re.match(self._VALID_URL, url) | |
103 | page_url = mobj.group('url') | |
104 | page_id = mobj.group('id') | |
105 | ||
106 | webpage = self._download_webpage(url, page_id) | |
107 | ||
108 | if mobj.group('player') is None: | |
109 | entries = [ | |
110 | self.url_result(page_url + href, 'WDR') | |
111 | for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | |
112 | ] | |
176cf9e0 PH |
113 | |
114 | if entries: # Playlist page | |
115 | return self.playlist_result(entries, page_id) | |
116 | ||
117 | # Overview page | |
118 | entries = [] | |
119 | for page_num in itertools.count(2): | |
120 | hrefs = re.findall( | |
121 | r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"', | |
122 | webpage) | |
123 | entries.extend( | |
124 | self.url_result(page_url + href, 'WDR') | |
125 | for href in hrefs) | |
126 | next_url_m = re.search( | |
127 | r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage) | |
128 | if not next_url_m: | |
129 | break | |
130 | next_url = page_url + next_url_m.group(1) | |
131 | webpage = self._download_webpage( | |
132 | next_url, page_id, | |
133 | note='Downloading playlist page %d' % page_num) | |
b461641f S |
134 | return self.playlist_result(entries, page_id) |
135 | ||
94e8df3a | 136 | flashvars = compat_parse_qs( |
b461641f S |
137 | self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) |
138 | ||
139 | page_id = flashvars['trackerClipId'][0] | |
140 | video_url = flashvars['dslSrc'][0] | |
141 | title = flashvars['trackerClipTitle'][0] | |
142 | thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | |
b8988b63 AA |
143 | is_live = flashvars.get('isLive', ['0'])[0] == '1' |
144 | ||
145 | if is_live: | |
146 | title = self._live_title(title) | |
b461641f S |
147 | |
148 | if 'trackerClipAirTime' in flashvars: | |
149 | upload_date = flashvars['trackerClipAirTime'][0] | |
150 | else: | |
151 | upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | |
152 | ||
153 | if upload_date: | |
154 | upload_date = unified_strdate(upload_date) | |
155 | ||
2559b9d0 | 156 | formats = [] |
157 | preference = qualities(['S', 'M', 'L', 'XL']) | |
158 | ||
b461641f | 159 | if video_url.endswith('.f4m'): |
2559b9d0 | 160 | f4m_formats = self._extract_f4m_formats(video_url + '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18', page_id, f4m_id='hds', fatal=False) |
161 | if f4m_formats: | |
162 | formats.extend(f4m_formats) | |
b8988b63 | 163 | elif video_url.endswith('.smil'): |
2559b9d0 | 164 | smil_formats = self._extract_smil_formats(video_url, page_id, False, { |
165 | 'hdcore': '3.3.0', | |
166 | 'plugin': 'aasp-3.3.0.99.43', | |
167 | }) | |
168 | if smil_formats: | |
169 | formats.extend(smil_formats) | |
b461641f | 170 | else: |
2559b9d0 | 171 | formats.append({ |
172 | 'url': video_url, | |
173 | 'http_headers': { | |
174 | 'User-Agent': 'mobile', | |
175 | }, | |
176 | }) | |
177 | ||
178 | m3u8_url = self._search_regex(r'rel="adaptiv"[^>]+href="([^"]+)"', webpage, 'm3u8 url', default=None) | |
179 | if m3u8_url: | |
180 | m3u8_formats = self._extract_m3u8_formats(m3u8_url, page_id, 'mp4', 'm3u8_native', m3u8_id='hls', fatal=False) | |
181 | if m3u8_formats: | |
182 | formats.extend(m3u8_formats) | |
183 | ||
184 | direct_urls = re.findall(r'rel="web(S|M|L|XL)"[^>]+href="([^"]+)"', webpage) | |
185 | if direct_urls: | |
186 | for quality, video_url in direct_urls: | |
187 | formats.append({ | |
188 | 'url': video_url, | |
189 | 'preference': preference(quality), | |
190 | 'http_headers': { | |
191 | 'User-Agent': 'mobile', | |
192 | }, | |
193 | }) | |
194 | ||
195 | self._sort_formats(formats) | |
b461641f S |
196 | |
197 | description = self._html_search_meta('Description', webpage, 'description') | |
198 | ||
199 | return { | |
200 | 'id': page_id, | |
2559b9d0 | 201 | 'formats': formats, |
b461641f S |
202 | 'title': title, |
203 | 'description': description, | |
204 | 'thumbnail': thumbnail, | |
205 | 'upload_date': upload_date, | |
b8988b63 | 206 | 'is_live': is_live |
cd7481a3 PH |
207 | } |
208 | ||
209 | ||
e4cbb5f3 PH |
210 | class WDRMobileIE(InfoExtractor): |
211 | _VALID_URL = r'''(?x) | |
212 | https?://mobile-ondemand\.wdr\.de/ | |
213 | .*?/fsk(?P<age_limit>[0-9]+) | |
214 | /[0-9]+/[0-9]+/ | |
215 | (?P<id>[0-9]+)_(?P<title>[0-9]+)''' | |
216 | IE_NAME = 'wdr:mobile' | |
217 | _TEST = { | |
218 | 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', | |
219 | 'info_dict': { | |
220 | 'title': '4283021', | |
221 | 'id': '421735', | |
7807ee66 | 222 | 'ext': 'mp4', |
e4cbb5f3 PH |
223 | 'age_limit': 0, |
224 | }, | |
7807ee66 | 225 | 'skip': 'Problems with loading data.' |
e4cbb5f3 PH |
226 | } |
227 | ||
228 | def _real_extract(self, url): | |
229 | mobj = re.match(self._VALID_URL, url) | |
230 | return { | |
231 | 'id': mobj.group('id'), | |
232 | 'title': mobj.group('title'), | |
233 | 'age_limit': int(mobj.group('age_limit')), | |
234 | 'url': url, | |
e1554a40 JMF |
235 | 'http_headers': { |
236 | 'User-Agent': 'mobile', | |
237 | }, | |
e4cbb5f3 PH |
238 | } |
239 | ||
240 | ||
cd7481a3 | 241 | class WDRMausIE(InfoExtractor): |
9a7b072e | 242 | _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' |
cd7481a3 PH |
243 | IE_DESC = 'Sendung mit der Maus' |
244 | _TESTS = [{ | |
245 | 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', | |
246 | 'info_dict': { | |
247 | 'id': 'aktuelle-sendung', | |
248 | 'ext': 'mp4', | |
249 | 'thumbnail': 're:^http://.+\.jpg', | |
250 | 'upload_date': 're:^[0-9]{8}$', | |
251 | 'title': 're:^[0-9.]{10} - Aktuelle Sendung$', | |
252 | } | |
253 | }, { | |
254 | 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5', | |
255 | 'md5': '3b1227ca3ed28d73ec5737c65743b2a3', | |
256 | 'info_dict': { | |
257 | 'id': '40_jahre_maus', | |
258 | 'ext': 'mp4', | |
259 | 'thumbnail': 're:^http://.+\.jpg', | |
260 | 'upload_date': '20131007', | |
261 | 'title': '12.03.2011 - 40 Jahre Maus', | |
262 | } | |
263 | }] | |
264 | ||
265 | def _real_extract(self, url): | |
7b6faddf | 266 | video_id = self._match_id(url) |
cd7481a3 PH |
267 | |
268 | webpage = self._download_webpage(url, video_id) | |
269 | param_code = self._html_search_regex( | |
270 | r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters') | |
271 | ||
272 | title_date = self._search_regex( | |
273 | r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>', | |
274 | webpage, 'air date') | |
275 | title_str = self._html_search_regex( | |
276 | r'<h1>(.*?)</h1>', webpage, 'title') | |
277 | title = '%s - %s' % (title_date, title_str) | |
278 | upload_date = unified_strdate( | |
279 | self._html_search_meta('dc.date', webpage)) | |
280 | ||
281 | fields = compat_parse_qs(param_code) | |
282 | video_url = fields['firstVideo'][0] | |
283 | thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) | |
284 | ||
285 | formats = [{ | |
286 | 'format_id': 'rtmp', | |
287 | 'url': video_url, | |
288 | }] | |
289 | ||
290 | jscode = self._download_webpage( | |
291 | 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', | |
292 | video_id, fatal=False, | |
293 | note='Downloading URL translation table', | |
294 | errnote='Could not download URL translation table') | |
295 | if jscode: | |
296 | for m in re.finditer( | |
297 | r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}", | |
298 | jscode): | |
299 | if video_url.startswith(m.group('stream')): | |
300 | http_url = video_url.replace( | |
301 | m.group('stream'), m.group('dl')) | |
302 | formats.append({ | |
303 | 'format_id': 'http', | |
304 | 'url': http_url, | |
305 | }) | |
306 | break | |
307 | ||
308 | self._sort_formats(formats) | |
309 | ||
310 | return { | |
311 | 'id': video_id, | |
312 | 'title': title, | |
313 | 'formats': formats, | |
314 | 'thumbnail': thumbnail, | |
315 | 'upload_date': upload_date, | |
316 | } |