]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import InfoExtractor | |
4 | from ..compat import compat_etree_fromstring | |
5 | from ..utils import ( | |
6 | xpath_element, | |
7 | xpath_text, | |
8 | int_or_none, | |
9 | ) | |
10 | ||
11 | ||
12 | class FazIE(InfoExtractor): | |
13 | IE_NAME = 'faz.net' | |
14 | _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html' | |
15 | ||
16 | _TESTS = [{ | |
17 | 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', | |
18 | 'info_dict': { | |
19 | 'id': '12610585', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', | |
22 | 'description': 'md5:1453fbf9a0d041d985a47306192ea253', | |
23 | }, | |
24 | }, { | |
25 | 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', | |
26 | 'only_matching': True, | |
27 | }, { | |
28 | 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', | |
29 | 'only_matching': True, | |
30 | }, { | |
31 | 'url': 'http://www.faz.net/-13659345.html', | |
32 | 'only_matching': True, | |
33 | }, { | |
34 | 'url': 'http://www.faz.net/aktuell/politik/-13659345.html', | |
35 | 'only_matching': True, | |
36 | }, { | |
37 | 'url': 'http://www.faz.net/foobarblafasel-13659345.html', | |
38 | 'only_matching': True, | |
39 | }] | |
40 | ||
41 | def _real_extract(self, url): | |
42 | video_id = self._match_id(url) | |
43 | ||
44 | webpage = self._download_webpage(url, video_id) | |
45 | description = self._og_search_description(webpage) | |
46 | media = self._html_search_regex( | |
47 | r"data-videojs-media='([^']+)", | |
48 | webpage, 'media') | |
49 | if media == 'extern': | |
50 | perform_url = self._search_regex( | |
51 | r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})", | |
52 | webpage, 'perform url') | |
53 | return self.url_result(perform_url) | |
54 | config = compat_etree_fromstring(media) | |
55 | ||
56 | encodings = xpath_element(config, 'ENCODINGS', 'encodings', True) | |
57 | formats = [] | |
58 | for pref, code in enumerate(['LOW', 'HIGH', 'HQ']): | |
59 | encoding = xpath_element(encodings, code) | |
60 | if encoding is not None: | |
61 | encoding_url = xpath_text(encoding, 'FILENAME') | |
62 | if encoding_url: | |
63 | tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000) | |
64 | if tbr: | |
65 | tbr = int_or_none(tbr.replace(',', '.')) | |
66 | f = { | |
67 | 'url': encoding_url, | |
68 | 'format_id': code.lower(), | |
69 | 'quality': pref, | |
70 | 'tbr': tbr, | |
71 | 'vcodec': xpath_text(encoding, 'CODEC'), | |
72 | } | |
73 | mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url) | |
74 | if mobj: | |
75 | f.update({ | |
76 | 'width': int(mobj.group(1)), | |
77 | 'height': int(mobj.group(2)), | |
78 | 'tbr': tbr or int(mobj.group(3)), | |
79 | }) | |
80 | formats.append(f) | |
81 | ||
82 | return { | |
83 | 'id': video_id, | |
84 | 'title': self._og_search_title(webpage), | |
85 | 'formats': formats, | |
86 | 'description': description.strip() if description else None, | |
87 | 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'), | |
88 | 'duration': int_or_none(xpath_text(config, 'DURATION')), | |
89 | } |