]>
Commit | Line | Data |
---|---|---|
1 | # encoding: utf-8 | |
2 | import re | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | determine_ext, | |
7 | ) | |
8 | ||
9 | ||
10 | class FazIE(InfoExtractor): | |
11 | IE_NAME = u'faz.net' | |
12 | _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+)\.html' | |
13 | ||
14 | _TEST = { | |
15 | u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', | |
16 | u'file': u'12610585.mp4', | |
17 | u'info_dict': { | |
18 | u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', | |
19 | u'description': u'md5:1453fbf9a0d041d985a47306192ea253', | |
20 | }, | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | self.to_screen(video_id) | |
27 | webpage = self._download_webpage(url, video_id) | |
28 | config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, | |
29 | u'config xml url') | |
30 | config = self._download_xml(config_xml_url, video_id, | |
31 | u'Downloading config xml') | |
32 | ||
33 | encodings = config.find('ENCODINGS') | |
34 | formats = [] | |
35 | for code in ['LOW', 'HIGH', 'HQ']: | |
36 | encoding = encodings.find(code) | |
37 | if encoding is None: | |
38 | continue | |
39 | encoding_url = encoding.find('FILENAME').text | |
40 | formats.append({ | |
41 | 'url': encoding_url, | |
42 | 'ext': determine_ext(encoding_url), | |
43 | 'format_id': code.lower(), | |
44 | }) | |
45 | ||
46 | descr = self._html_search_regex(r'<p class="Content Copy">(.*?)</p>', webpage, u'description') | |
47 | return { | |
48 | 'id': video_id, | |
49 | 'title': self._og_search_title(webpage), | |
50 | 'formats': formats, | |
51 | 'description': descr, | |
52 | 'thumbnail': config.find('STILL/STILL_BIG').text, | |
53 | } |