]>
Commit | Line | Data |
---|---|---|
7807ee66 | 1 | # -*- coding: utf-8 -*- |
b461641f S |
2 | from __future__ import unicode_literals |
3 | ||
176cf9e0 | 4 | import itertools |
b461641f S |
5 | import re |
6 | ||
7 | from .common import InfoExtractor | |
1cc79574 | 8 | from ..compat import ( |
cd7481a3 | 9 | compat_parse_qs, |
becafcbf | 10 | compat_urlparse, |
1cc79574 PH |
11 | ) |
12 | from ..utils import ( | |
b461641f | 13 | determine_ext, |
cd7481a3 | 14 | unified_strdate, |
b461641f S |
15 | ) |
16 | ||
17 | ||
18 | class WDRIE(InfoExtractor): | |
19 | _PLAYER_REGEX = '-(?:video|audio)player(?:_size-[LMS])?' | |
20 | _VALID_URL = r'(?P<url>https?://www\d?\.(?:wdr\d?|funkhauseuropa)\.de/)(?P<id>.+?)(?P<player>%s)?\.html' % _PLAYER_REGEX | |
21 | ||
22 | _TESTS = [ | |
23 | { | |
24 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/servicezeit/videoservicezeit560-videoplayer_size-L.html', | |
25 | 'info_dict': { | |
26 | 'id': 'mdb-362427', | |
27 | 'ext': 'flv', | |
28 | 'title': 'Servicezeit', | |
29 | 'description': 'md5:c8f43e5e815eeb54d0b96df2fba906cb', | |
30 | 'upload_date': '20140310', | |
31 | }, | |
32 | 'params': { | |
33 | 'skip_download': True, | |
34 | }, | |
35 | }, | |
36 | { | |
37 | 'url': 'http://www1.wdr.de/themen/av/videomargaspiegelisttot101-videoplayer.html', | |
38 | 'info_dict': { | |
39 | 'id': 'mdb-363194', | |
40 | 'ext': 'flv', | |
41 | 'title': 'Marga Spiegel ist tot', | |
87a29e6f | 42 | 'description': 'md5:2309992a6716c347891c045be50992e4', |
b461641f S |
43 | 'upload_date': '20140311', |
44 | }, | |
45 | 'params': { | |
46 | 'skip_download': True, | |
47 | }, | |
48 | }, | |
49 | { | |
50 | 'url': 'http://www1.wdr.de/themen/kultur/audioerlebtegeschichtenmargaspiegel100-audioplayer.html', | |
51 | 'md5': '83e9e8fefad36f357278759870805898', | |
52 | 'info_dict': { | |
53 | 'id': 'mdb-194332', | |
54 | 'ext': 'mp3', | |
55 | 'title': 'Erlebte Geschichten: Marga Spiegel (29.11.2009)', | |
87a29e6f | 56 | 'description': 'md5:2309992a6716c347891c045be50992e4', |
b461641f S |
57 | 'upload_date': '20091129', |
58 | }, | |
59 | }, | |
60 | { | |
ff1956e0 PH |
61 | 'url': 'http://www.funkhauseuropa.de/av/audioflaviacoelhoamaramar100-audioplayer.html', |
62 | 'md5': '99a1443ff29af19f6c52cf6f4dc1f4aa', | |
b461641f | 63 | 'info_dict': { |
ff1956e0 | 64 | 'id': 'mdb-478135', |
b461641f | 65 | 'ext': 'mp3', |
ff1956e0 | 66 | 'title': 'Flavia Coelho: Amar é Amar', |
87a29e6f | 67 | 'description': 'md5:7b29e97e10dfb6e265238b32fa35b23a', |
ff1956e0 | 68 | 'upload_date': '20140717', |
b461641f S |
69 | }, |
70 | }, | |
176cf9e0 PH |
71 | { |
72 | 'url': 'http://www1.wdr.de/mediathek/video/sendungen/quarks_und_co/filterseite-quarks-und-co100.html', | |
73 | 'playlist_mincount': 146, | |
74 | } | |
b461641f S |
75 | ] |
76 | ||
77 | def _real_extract(self, url): | |
78 | mobj = re.match(self._VALID_URL, url) | |
79 | page_url = mobj.group('url') | |
80 | page_id = mobj.group('id') | |
81 | ||
82 | webpage = self._download_webpage(url, page_id) | |
83 | ||
84 | if mobj.group('player') is None: | |
85 | entries = [ | |
86 | self.url_result(page_url + href, 'WDR') | |
87 | for href in re.findall(r'<a href="/?(.+?%s\.html)" rel="nofollow"' % self._PLAYER_REGEX, webpage) | |
88 | ] | |
176cf9e0 PH |
89 | |
90 | if entries: # Playlist page | |
91 | return self.playlist_result(entries, page_id) | |
92 | ||
93 | # Overview page | |
94 | entries = [] | |
95 | for page_num in itertools.count(2): | |
96 | hrefs = re.findall( | |
97 | r'<li class="mediathekvideo"\s*>\s*<img[^>]*>\s*<a href="(/mediathek/video/[^"]+)"', | |
98 | webpage) | |
99 | entries.extend( | |
100 | self.url_result(page_url + href, 'WDR') | |
101 | for href in hrefs) | |
102 | next_url_m = re.search( | |
103 | r'<li class="nextToLast">\s*<a href="([^"]+)"', webpage) | |
104 | if not next_url_m: | |
105 | break | |
106 | next_url = page_url + next_url_m.group(1) | |
107 | webpage = self._download_webpage( | |
108 | next_url, page_id, | |
109 | note='Downloading playlist page %d' % page_num) | |
b461641f S |
110 | return self.playlist_result(entries, page_id) |
111 | ||
94e8df3a | 112 | flashvars = compat_parse_qs( |
b461641f S |
113 | self._html_search_regex(r'<param name="flashvars" value="([^"]+)"', webpage, 'flashvars')) |
114 | ||
115 | page_id = flashvars['trackerClipId'][0] | |
116 | video_url = flashvars['dslSrc'][0] | |
117 | title = flashvars['trackerClipTitle'][0] | |
118 | thumbnail = flashvars['startPicture'][0] if 'startPicture' in flashvars else None | |
119 | ||
120 | if 'trackerClipAirTime' in flashvars: | |
121 | upload_date = flashvars['trackerClipAirTime'][0] | |
122 | else: | |
123 | upload_date = self._html_search_meta('DC.Date', webpage, 'upload date') | |
124 | ||
125 | if upload_date: | |
126 | upload_date = unified_strdate(upload_date) | |
127 | ||
128 | if video_url.endswith('.f4m'): | |
129 | video_url += '?hdcore=3.2.0&plugin=aasp-3.2.0.77.18' | |
130 | ext = 'flv' | |
131 | else: | |
132 | ext = determine_ext(video_url) | |
133 | ||
134 | description = self._html_search_meta('Description', webpage, 'description') | |
135 | ||
136 | return { | |
137 | 'id': page_id, | |
138 | 'url': video_url, | |
139 | 'ext': ext, | |
140 | 'title': title, | |
141 | 'description': description, | |
142 | 'thumbnail': thumbnail, | |
143 | 'upload_date': upload_date, | |
cd7481a3 PH |
144 | } |
145 | ||
146 | ||
e4cbb5f3 PH |
147 | class WDRMobileIE(InfoExtractor): |
148 | _VALID_URL = r'''(?x) | |
149 | https?://mobile-ondemand\.wdr\.de/ | |
150 | .*?/fsk(?P<age_limit>[0-9]+) | |
151 | /[0-9]+/[0-9]+/ | |
152 | (?P<id>[0-9]+)_(?P<title>[0-9]+)''' | |
153 | IE_NAME = 'wdr:mobile' | |
154 | _TEST = { | |
155 | 'url': 'http://mobile-ondemand.wdr.de/CMS2010/mdb/ondemand/weltweit/fsk0/42/421735/421735_4283021.mp4', | |
156 | 'info_dict': { | |
157 | 'title': '4283021', | |
158 | 'id': '421735', | |
7807ee66 | 159 | 'ext': 'mp4', |
e4cbb5f3 PH |
160 | 'age_limit': 0, |
161 | }, | |
7807ee66 | 162 | 'skip': 'Problems with loading data.' |
e4cbb5f3 PH |
163 | } |
164 | ||
165 | def _real_extract(self, url): | |
166 | mobj = re.match(self._VALID_URL, url) | |
167 | return { | |
168 | 'id': mobj.group('id'), | |
169 | 'title': mobj.group('title'), | |
170 | 'age_limit': int(mobj.group('age_limit')), | |
171 | 'url': url, | |
172 | 'user_agent': 'mobile', | |
173 | } | |
174 | ||
175 | ||
cd7481a3 | 176 | class WDRMausIE(InfoExtractor): |
9a7b072e | 177 | _VALID_URL = 'http://(?:www\.)?wdrmaus\.de/(?:[^/]+/){,2}(?P<id>[^/?#]+)(?:/index\.php5|(?<!index)\.php5|/(?:$|[?#]))' |
cd7481a3 PH |
178 | IE_DESC = 'Sendung mit der Maus' |
179 | _TESTS = [{ | |
180 | 'url': 'http://www.wdrmaus.de/aktuelle-sendung/index.php5', | |
181 | 'info_dict': { | |
182 | 'id': 'aktuelle-sendung', | |
183 | 'ext': 'mp4', | |
184 | 'thumbnail': 're:^http://.+\.jpg', | |
185 | 'upload_date': 're:^[0-9]{8}$', | |
186 | 'title': 're:^[0-9.]{10} - Aktuelle Sendung$', | |
187 | } | |
188 | }, { | |
189 | 'url': 'http://www.wdrmaus.de/sachgeschichten/sachgeschichten/40_jahre_maus.php5', | |
190 | 'md5': '3b1227ca3ed28d73ec5737c65743b2a3', | |
191 | 'info_dict': { | |
192 | 'id': '40_jahre_maus', | |
193 | 'ext': 'mp4', | |
194 | 'thumbnail': 're:^http://.+\.jpg', | |
195 | 'upload_date': '20131007', | |
196 | 'title': '12.03.2011 - 40 Jahre Maus', | |
197 | } | |
198 | }] | |
199 | ||
200 | def _real_extract(self, url): | |
7b6faddf | 201 | video_id = self._match_id(url) |
cd7481a3 PH |
202 | |
203 | webpage = self._download_webpage(url, video_id) | |
204 | param_code = self._html_search_regex( | |
205 | r'<a href="\?startVideo=1&([^"]+)"', webpage, 'parameters') | |
206 | ||
207 | title_date = self._search_regex( | |
208 | r'<div class="sendedatum"><p>Sendedatum:\s*([0-9\.]+)</p>', | |
209 | webpage, 'air date') | |
210 | title_str = self._html_search_regex( | |
211 | r'<h1>(.*?)</h1>', webpage, 'title') | |
212 | title = '%s - %s' % (title_date, title_str) | |
213 | upload_date = unified_strdate( | |
214 | self._html_search_meta('dc.date', webpage)) | |
215 | ||
216 | fields = compat_parse_qs(param_code) | |
217 | video_url = fields['firstVideo'][0] | |
218 | thumbnail = compat_urlparse.urljoin(url, fields['startPicture'][0]) | |
219 | ||
220 | formats = [{ | |
221 | 'format_id': 'rtmp', | |
222 | 'url': video_url, | |
223 | }] | |
224 | ||
225 | jscode = self._download_webpage( | |
226 | 'http://www.wdrmaus.de/codebase/js/extended-medien.min.js', | |
227 | video_id, fatal=False, | |
228 | note='Downloading URL translation table', | |
229 | errnote='Could not download URL translation table') | |
230 | if jscode: | |
231 | for m in re.finditer( | |
232 | r"stream:\s*'dslSrc=(?P<stream>[^']+)',\s*download:\s*'(?P<dl>[^']+)'\s*\}", | |
233 | jscode): | |
234 | if video_url.startswith(m.group('stream')): | |
235 | http_url = video_url.replace( | |
236 | m.group('stream'), m.group('dl')) | |
237 | formats.append({ | |
238 | 'format_id': 'http', | |
239 | 'url': http_url, | |
240 | }) | |
241 | break | |
242 | ||
243 | self._sort_formats(formats) | |
244 | ||
245 | return { | |
246 | 'id': video_id, | |
247 | 'title': title, | |
248 | 'formats': formats, | |
249 | 'thumbnail': thumbnail, | |
250 | 'upload_date': upload_date, | |
251 | } |