]>
Commit | Line | Data |
---|---|---|
1 | # -*- coding: utf-8 -*- | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import itertools | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..compat import ( | |
9 | compat_parse_qs, | |
10 | compat_urlparse, | |
11 | ) | |
12 | from ..utils import ( | |
13 | determine_ext, | |
14 | unified_strdate, | |
15 | ) | |
16 | ||
17 | ||
18 | class WDRIE(InfoExtractor): | |
19 | _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | |
20 | _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | |
21 | ||
22 | _TESTS = [ | |
23 | { | |
24 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | |
25 | 'info_dict': { | |
26 | 'id': 'mdb-362427', | |
27 | 'ext': 'flv', | |
28 | 'title': 'Servicezeit', | |
29 | 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | |
30 | 'upload_date': '20140310', | |
31 | 'is_live': False | |
32 | }, | |
33 | 'params': { | |
34 | 'skip_download': True, | |
35 | }, | |
36 | }, | |
37 | { | |
38 | 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | |
39 | 'info_dict': { | |
40 | 'id': 'mdb-363194', | |
41 | 'ext': 'flv', | |
42 | 'title': 'Marga Spiegel ist tot', | |
43 | 'description': 'md5:2309992a6716c347891c045be50992e4', | |
44 | 'upload_date': '20140311', | |
45 | 'is_live': False | |
46 | }, | |
47 | 'params': { | |
48 | 'skip_download': True, | |
49 | }, | |
50 | }, | |
51 | { | |
52 | 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | |
53 | 'md5': '83e9e8fefad36f357278759870805898', | |
54 | 'info_dict': { | |
55 | 'id': 'mdb-194332', | |
56 | 'ext': 'mp3', | |
57 | 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | |
58 | 'description': 'md5:2309992a6716c347891c045be50992e4', | |
59 | 'upload_date': '20091129', | |
60 | 'is_live': False | |
61 | }, | |
62 | }, | |
63 | { | |
64 | 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', | |
65 | 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', | |
66 | 'info_dict': { | |
67 | 'id': 'mdb-478135', | |
68 | 'ext': 'mp3', | |
69 | 'title': 'Flavia Coelho: Amar é Amar', | |
70 | 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', | |
71 | 'upload_date': '20140717', | |
72 | 'is_live': False | |
73 | }, | |
74 | }, | |
75 | { | |
76 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', | |
77 | 'playlist_mincount': 146, | |
78 | 'info_dict': { | |
79 | 'id': 'mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100', | |
80 | } | |
81 | }, | |
82 | { | |
83 | 'url': 'http://www1.wdr.de/mediathek/video/livestream/index.html', | |
84 | 'info_dict': { | |
85 | 'id': 'mdb-103364', | |
86 | 'title': 're:^WDR Fernsehen [0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}$', | |
87 | 'description': 'md5:ae2ff888510623bf8d4b115f95a9b7c9', | |
88 | 'ext': 'flv', | |
89 | 'upload_date': '20150212', | |
90 | 'is_live': True | |
91 | }, | |
92 | 'params': { | |
93 | 'skip_download': True, | |
94 | }, | |
95 | } | |
96 | ] | |
97 | ||
98 | def _real_extract(self, url): | |
99 | mobj = re.match(self._VALID_URL, url) | |
100 | page_url = mobj.group('url') | |
101 | page_id = mobj.group('id') | |
102 | ||
103 | webpage = self._download_webpage(url, page_id) | |
104 | ||
105 | if mobj.group('player') is None: | |
106 | entries = [ | |
107 | self.url_result(page_url + href, 'WDR') | |
108 | for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | |
109 | ] | |
110 | ||
111 | if entries: # Playlist page | |
112 | return self.playlist_result(entries, page_id) | |
113 | ||
114 | # Overview page | |
115 | entries = [] | |
116 | for page_num in itertools.count(2): | |
117 | hrefs = re.findall( | |
118 | r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"', | |
119 | webpage) | |
120 | entries.extend( | |
121 | self.url_result(page_url + href, 'WDR') | |
122 | for href in hrefs) | |
123 | next_url_m = re.search( | |
124 | r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage) | |
125 | if not next_url_m: | |
126 | break | |
127 | next_url = page_url + next_url_m.group(1) | |
128 | webpage = self._download_webpage( | |
129 | next_url, page_id, | |
130 | note='Downloading playlist page %d' % page_num) | |
131 | return self.playlist_result(entries, page_id) | |
132 | ||
133 | flashvars = compat_parse_qs( | |
134 | self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) | |
135 | ||
136 | page_id = flashvars['trackerClipId'][0] | |
137 | video_url = flashvars['dslSrc'][0] | |
138 | title = flashvars['trackerClipTitle'][0] | |
139 | thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | |
140 | is_live = flashvars.get('isLive', ['0'])[0] == '1' | |
141 | ||
142 | if is_live: | |
143 | title = self._live_title(title) | |
144 | ||
145 | if 'trackerClipAirTime' in flashvars: | |
146 | upload_date = flashvars['trackerClipAirTime'][0] | |
147 | else: | |
148 | upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | |
149 | ||
150 | if upload_date: | |
151 | upload_date = unified_strdate(upload_date) | |
152 | ||
153 | if video_url.endswith('.f4m'): | |
154 | video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' | |
155 | ext = 'flv' | |
156 | elif video_url.endswith('.smil'): | |
157 | fmt = self._extract_smil_formats(video_url, page_id)[0] | |
158 | video_url = fmt['url'] | |
159 | sep = '&' if '?' in video_url else '?' | |
160 | video_url += sep | |
161 | video_url += 'hdcore=3.3.0&plugin=aasp-3.3.0.99.43' | |
162 | ext = fmt['ext'] | |
163 | else: | |
164 | ext = determine_ext(video_url) | |
165 | ||
166 | description = self._html_search_meta('Description', webpage, 'description') | |
167 | ||
168 | return { | |
169 | 'id': page_id, | |
170 | 'url': video_url, | |
171 | 'ext': ext, | |
172 | 'title': title, | |
173 | 'description': description, | |
174 | 'thumbnail': thumbnail, | |
175 | 'upload_date': upload_date, | |
176 | 'is_live': is_live | |
177 | } | |
178 | ||
179 | ||
180 | class WDRMobileIE(InfoExtractor): | |
181 | _VALID_URL = r'''(?x) | |
182 | https?://mobile-ondemand\.wdr\.de/ | |
183 | .*?/fsk(?P<age_limit>[0-9]+) | |
184 | /[0-9]+/[0-9]+/ | |
185 | (?P<id>[0-9]+)_(?P<title>[0-9]+)''' | |
186 | IE_NAME = 'wdr:mobile' | |
187 | _TEST = { | |
188 | 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', | |
189 | 'info_dict': { | |
190 | 'title': '4283021', | |
191 | 'id': '421735', | |
192 | 'ext': 'mp4', | |
193 | 'age_limit': 0, | |
194 | }, | |
195 | 'skip': 'Problems with loading data.' | |
196 | } | |
197 | ||
198 | def _real_extract(self, url): | |
199 | mobj = re.match(self._VALID_URL, url) | |
200 | return { | |
201 | 'id': mobj.group('id'), | |
202 | 'title': mobj.group('title'), | |
203 | 'age_limit': int(mobj.group('age_limit')), | |
204 | 'url': url, | |
205 | 'http_headers': { | |
206 | 'User-Agent': 'mobile', | |
207 | }, | |
208 | } | |
209 | ||
210 | ||
211 | class WDRMausIE(InfoExtractor): | |
212 | _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' | |
213 | IE_DESC = 'Sendung mit der Maus' | |
214 | _TESTS = [{ | |
215 | 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', | |
216 | 'info_dict': { | |
217 | 'id': 'aktuelle-sendung', | |
218 | 'ext': 'mp4', | |
219 | 'thumbnail': 're:^http://.+\.jpg', | |
220 | 'upload_date': 're:^[0-9]{8}$', | |
221 | 'title': 're:^[0-9.]{10} - Aktuelle Sendung$', | |
222 | } | |
223 | }, { | |
224 | 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5', | |
225 | 'md5': '3b1227ca3ed28d73ec5737c65743b2a3', | |
226 | 'info_dict': { | |
227 | 'id': '40_jahre_maus', | |
228 | 'ext': 'mp4', | |
229 | 'thumbnail': 're:^http://.+\.jpg', | |
230 | 'upload_date': '20131007', | |
231 | 'title': '12.03.2011 - 40 Jahre Maus', | |
232 | } | |
233 | }] | |
234 | ||
235 | def _real_extract(self, url): | |
236 | video_id = self._match_id(url) | |
237 | ||
238 | webpage = self._download_webpage(url, video_id) | |
239 | param_code = self._html_search_regex( | |
240 | r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters') | |
241 | ||
242 | title_date = self._search_regex( | |
243 | r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>', | |
244 | webpage, 'air date') | |
245 | title_str = self._html_search_regex( | |
246 | r'<h1>(.*?)</h1>', webpage, 'title') | |
247 | title = '%s - %s' % (title_date, title_str) | |
248 | upload_date = unified_strdate( | |
249 | self._html_search_meta('dc.date', webpage)) | |
250 | ||
251 | fields = compat_parse_qs(param_code) | |
252 | video_url = fields['firstVideo'][0] | |
253 | thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) | |
254 | ||
255 | formats = [{ | |
256 | 'format_id': 'rtmp', | |
257 | 'url': video_url, | |
258 | }] | |
259 | ||
260 | jscode = self._download_webpage( | |
261 | 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', | |
262 | video_id, fatal=False, | |
263 | note='Downloading URL translation table', | |
264 | errnote='Could not download URL translation table') | |
265 | if jscode: | |
266 | for m in re.finditer( | |
267 | r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}", | |
268 | jscode): | |
269 | if video_url.startswith(m.group('stream')): | |
270 | http_url = video_url.replace( | |
271 | m.group('stream'), m.group('dl')) | |
272 | formats.append({ | |
273 | 'format_id': 'http', | |
274 | 'url': http_url, | |
275 | }) | |
276 | break | |
277 | ||
278 | self._sort_formats(formats) | |
279 | ||
280 | return { | |
281 | 'id': video_id, | |
282 | 'title': title, | |
283 | 'formats': formats, | |
284 | 'thumbnail': thumbnail, | |
285 | 'upload_date': upload_date, | |
286 | } |