]>
Commit | Line | Data |
---|---|---|
e0a8686f RA |
1 | import re |
2 | ||
63da13e8 | 3 | from .common import InfoExtractor |
e0a8686f | 4 | from ..compat import compat_etree_fromstring |
ecbccea7 | 5 | from ..utils import ( |
6 | xpath_element, | |
7 | xpath_text, | |
8 | int_or_none, | |
9 | ) | |
63da13e8 JMF |
10 | |
11 | ||
12 | class FazIE(InfoExtractor): | |
526b276f | 13 | IE_NAME = 'faz.net' |
255f5694 | 14 | _VALID_URL = r'https?://(?:www\.)?faz\.net/(?:[^/]+/)*.*?-(?P<id>\d+)\.html' |
63da13e8 | 15 | |
255f5694 | 16 | _TESTS = [{ |
526b276f PH |
17 | 'url': 'http://www.faz.net/multimedia/videos/stockholm-chemie-nobelpreis-fuer-drei-amerikanische-forscher-12610585.html', |
18 | 'info_dict': { | |
19 | 'id': '12610585', | |
20 | 'ext': 'mp4', | |
21 | 'title': 'Stockholm: Chemie-Nobelpreis für drei amerikanische Forscher', | |
22 | 'description': 'md5:1453fbf9a0d041d985a47306192ea253', | |
63da13e8 | 23 | }, |
255f5694 S |
24 | }, { |
25 | 'url': 'http://www.faz.net/aktuell/politik/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', | |
26 | 'only_matching': True, | |
27 | }, { | |
28 | 'url': 'http://www.faz.net/berlin-gabriel-besteht-zerreissprobe-ueber-datenspeicherung-13659345.html', | |
29 | 'only_matching': True, | |
30 | }, { | |
31 | 'url': 'http://www.faz.net/-13659345.html', | |
32 | 'only_matching': True, | |
33 | }, { | |
34 | 'url': 'http://www.faz.net/aktuell/politik/-13659345.html', | |
35 | 'only_matching': True, | |
36 | }, { | |
37 | 'url': 'http://www.faz.net/foobarblafasel-13659345.html', | |
38 | 'only_matching': True, | |
39 | }] | |
63da13e8 JMF |
40 | |
41 | def _real_extract(self, url): | |
526b276f PH |
42 | video_id = self._match_id(url) |
43 | ||
63da13e8 | 44 | webpage = self._download_webpage(url, video_id) |
ecbccea7 | 45 | description = self._og_search_description(webpage) |
e0a8686f RA |
46 | media = self._html_search_regex( |
47 | r"data-videojs-media='([^']+)", | |
48 | webpage, 'media') | |
49 | if media == 'extern': | |
50 | perform_url = self._search_regex( | |
51 | r"<iframe[^>]+?src='((?:http:)?//player\.performgroup\.com/eplayer/eplayer\.html#/?[0-9a-f]{26}\.[0-9a-z]{26})", | |
52 | webpage, 'perform url') | |
53 | return self.url_result(perform_url) | |
54 | config = compat_etree_fromstring(media) | |
63da13e8 | 55 | |
ecbccea7 | 56 | encodings = xpath_element(config, 'ENCODINGS', 'encodings', True) |
63da13e8 | 57 | formats = [] |
526b276f | 58 | for pref, code in enumerate(['LOW', 'HIGH', 'HQ']): |
ecbccea7 | 59 | encoding = xpath_element(encodings, code) |
9c86d509 | 60 | if encoding is not None: |
ecbccea7 | 61 | encoding_url = xpath_text(encoding, 'FILENAME') |
62 | if encoding_url: | |
e0a8686f RA |
63 | tbr = xpath_text(encoding, 'AVERAGEBITRATE', 1000) |
64 | if tbr: | |
65 | tbr = int_or_none(tbr.replace(',', '.')) | |
66 | f = { | |
ecbccea7 | 67 | 'url': encoding_url, |
68 | 'format_id': code.lower(), | |
69 | 'quality': pref, | |
e0a8686f RA |
70 | 'tbr': tbr, |
71 | 'vcodec': xpath_text(encoding, 'CODEC'), | |
72 | } | |
73 | mobj = re.search(r'(\d+)x(\d+)_(\d+)\.mp4', encoding_url) | |
74 | if mobj: | |
75 | f.update({ | |
76 | 'width': int(mobj.group(1)), | |
77 | 'height': int(mobj.group(2)), | |
78 | 'tbr': tbr or int(mobj.group(3)), | |
79 | }) | |
80 | formats.append(f) | |
63da13e8 | 81 | |
fb7abb31 | 82 | return { |
63da13e8 JMF |
83 | 'id': video_id, |
84 | 'title': self._og_search_title(webpage), | |
85 | 'formats': formats, | |
ecbccea7 | 86 | 'description': description.strip() if description else None, |
87 | 'thumbnail': xpath_text(config, 'STILL/STILL_BIG'), | |
88 | 'duration': int_or_none(xpath_text(config, 'DURATION')), | |
63da13e8 | 89 | } |