]>
Commit | Line | Data |
---|---|---|
643fe727 OJ |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
4 | from .common import InfoExtractor | |
5 | ||
6 | import urllib2 | |
7 | import xml.etree.ElementTree as ET | |
8 | import re | |
9 | ||
10 | ||
11 | class DHMIE(InfoExtractor): | |
12 | _VALID_URL = r'http://www\.dhm\.de/filmarchiv/(?P<id>.*?)' | |
13 | ||
14 | _TEST = { | |
15 | 'url': 'http://www.dhm.de/filmarchiv/die-filme/the-marshallplan-at-work-in-west-germany/', | |
16 | 'md5': '11c475f670209bf6acca0b2b7ef51827', | |
17 | 'info_dict': { | |
18 | 'id': 'marshallwg', | |
19 | 'ext': 'flv', | |
20 | 'title': 'MARSHALL PLAN AT WORK IN WESTERN GERMANY, THE', | |
21 | 'thumbnail': 'http://www.dhm.de/filmarchiv/video/mpworkwg.jpg', | |
22 | } | |
23 | } | |
24 | ||
25 | def _real_extract(self, url): | |
26 | video_id = '' | |
27 | webpage = self._download_webpage(url, video_id) | |
28 | ||
29 | title = self._html_search_regex( | |
30 | r'dc:title=\"(.*?)\"', webpage, 'title') | |
31 | ||
32 | playlist_url = self._html_search_regex( | |
33 | r'file: \'(.*?)\'', webpage, 'playlist URL') | |
34 | ||
35 | xml_file = urllib2.urlopen(playlist_url) | |
36 | data = xml_file.read() | |
37 | xml_file.close() | |
38 | ||
39 | root = ET.fromstring(data) | |
40 | video_url = root[0][0][0].text | |
41 | thumbnail = root[0][0][2].text | |
42 | ||
43 | m = re.search('video/(.+?).flv', video_url) | |
44 | if m: | |
45 | video_id = m.group(1) | |
46 | ||
47 | return { | |
48 | 'id': video_id, | |
49 | 'title': title, | |
50 | 'url': video_url, | |
51 | 'thumbnail': thumbnail, | |
52 | } |