]>
Commit | Line | Data |
---|---|---|
63da13e8 JMF |
1 | # encoding: utf-8 |
2 | import re | |
3 | import xml.etree.ElementTree | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | determine_ext, | |
8 | clean_html, | |
9 | get_element_by_attribute, | |
10 | ) | |
11 | ||
12 | ||
13 | class FazIE(InfoExtractor): | |
14 | IE_NAME = u'faz.net' | |
15 | _VALID_URL = r'https?://www\.faz\.net/multimedia/videos/.*?-(?P<id>\d+).html' | |
16 | ||
17 | _TEST = { | |
18 | u'url': u'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', | |
19 | u'file': u'12610585.mp4', | |
20 | u'info_dict': { | |
21 | u'title': u'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', | |
22 | u'description': u'md5:1453fbf9a0d041d985a47306192ea253', | |
23 | }, | |
24 | } | |
25 | ||
26 | def _real_extract(self, url): | |
27 | mobj = re.match(self._VALID_URL, url) | |
28 | video_id = mobj.group('id') | |
29 | self.to_screen(video_id) | |
30 | webpage = self._download_webpage(url, video_id) | |
31 | config_xml_url = self._search_regex(r'writeFLV\(\'(.+?)\',', webpage, | |
32 | u'config xml url') | |
33 | config_xml = self._download_webpage(config_xml_url, video_id, | |
34 | u'Downloading config xml') | |
35 | config = xml.etree.ElementTree.fromstring(config_xml.encode('utf-8')) | |
36 | ||
37 | encodings = config.find('ENCODINGS') | |
38 | formats = [] | |
39 | for code in ['LOW', 'HIGH', 'HQ']: | |
40 | encoding = encodings.find(code) | |
41 | if encoding is None: | |
42 | continue | |
43 | encoding_url = encoding.find('FILENAME').text | |
44 | formats.append({ | |
45 | 'url': encoding_url, | |
46 | 'ext': determine_ext(encoding_url), | |
47 | 'format_id': code.lower(), | |
48 | }) | |
49 | ||
50 | descr_html = get_element_by_attribute('class', 'Content Copy', webpage) | |
51 | info = { | |
52 | 'id': video_id, | |
53 | 'title': self._og_search_title(webpage), | |
54 | 'formats': formats, | |
55 | 'description': clean_html(descr_html), | |
56 | 'thumbnail': config.find('STILL/STILL_BIG').text, | |
57 | } | |
58 | # TODO: Remove when #980 has been merged | |
59 | info.update(formats[-1]) | |
60 | return info |