]>
Commit | Line | Data |
---|---|---|
49f5f315 PH |
1 | import re |
2 | import xml.etree.ElementTree | |
3 | ||
4 | from .common import InfoExtractor | |
5 | ||
6 | ||
7 | class SpiegelIE(InfoExtractor): | |
8 | _VALID_URL = r'https?://(?:www\.)?spiegel\.de/video/[^/]*-(?P<videoID>[0-9]+)(?:\.html)?(?:#.*)?$' | |
6f5ac90c PH |
9 | _TEST = { |
10 | u'url': u'http://www.spiegel.de/video/vulkan-tungurahua-in-ecuador-ist-wieder-aktiv-video-1259285.html', | |
11 | u'file': u'1259285.mp4', | |
12 | u'md5': u'2c2754212136f35fb4b19767d242f66e', | |
13 | u'info_dict': { | |
14 | u"title": u"Vulkanausbruch in Ecuador: Der \"Feuerschlund\" ist wieder aktiv" | |
15 | } | |
16 | } | |
49f5f315 PH |
17 | |
18 | def _real_extract(self, url): | |
19 | m = re.match(self._VALID_URL, url) | |
20 | video_id = m.group('videoID') | |
21 | ||
22 | webpage = self._download_webpage(url, video_id) | |
23 | ||
24 | video_title = self._html_search_regex(r'<div class="module-title">(.*?)</div>', | |
25 | webpage, u'title') | |
26 | ||
27 | xml_url = u'http://video2.spiegel.de/flash/' + video_id + u'.xml' | |
28 | xml_code = self._download_webpage(xml_url, video_id, | |
29 | note=u'Downloading XML', errnote=u'Failed to download XML') | |
30 | ||
31 | idoc = xml.etree.ElementTree.fromstring(xml_code) | |
32 | last_type = idoc[-1] | |
33 | filename = last_type.findall('./filename')[0].text | |
34 | duration = float(last_type.findall('./duration')[0].text) | |
35 | ||
36 | video_url = 'http://video2.spiegel.de/flash/' + filename | |
37 | video_ext = filename.rpartition('.')[2] | |
38 | info = { | |
39 | 'id': video_id, | |
40 | 'url': video_url, | |
41 | 'ext': video_ext, | |
42 | 'title': video_title, | |
43 | 'duration': duration, | |
44 | } | |
45 | return [info] |