]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | import xml.etree.ElementTree | |
3 | import operator | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class MetacriticIE(InfoExtractor): | |
9 | _VALID_URL = r'https?://www\.metacritic\.com/.+?/trailers/(?P<id>\d+)' | |
10 | ||
11 | _TEST = { | |
12 | u'url': u'http://www.metacritic.com/game/playstation-4/infamous-second-son/trailers/3698222', | |
13 | u'file': u'3698222.mp4', | |
14 | u'info_dict': { | |
15 | u'title': u'inFamous: Second Son - inSide Sucker Punch: Smoke & Mirrors', | |
16 | u'description': u'Take a peak behind-the-scenes to see how Sucker Punch brings smoke into the universe of inFAMOUS Second Son on the PS4.', | |
17 | u'duration': 221, | |
18 | }, | |
19 | } | |
20 | ||
21 | def _real_extract(self, url): | |
22 | mobj = re.match(self._VALID_URL, url) | |
23 | video_id = mobj.group('id') | |
24 | webpage = self._download_webpage(url, video_id) | |
25 | # The xml is not well formatted, there are raw '&' | |
26 | info_xml = self._download_webpage('http://www.metacritic.com/video_data?video=' + video_id, | |
27 | video_id, u'Downloading info xml').replace('&', '&') | |
28 | info = xml.etree.ElementTree.fromstring(info_xml.encode('utf-8')) | |
29 | ||
30 | clip = next(c for c in info.findall('playList/clip') if c.find('id').text == video_id) | |
31 | formats = [] | |
32 | for videoFile in clip.findall('httpURI/videoFile'): | |
33 | rate_str = videoFile.find('rate').text | |
34 | video_url = videoFile.find('filePath').text | |
35 | formats.append({ | |
36 | 'url': video_url, | |
37 | 'ext': 'mp4', | |
38 | 'format_id': rate_str, | |
39 | 'rate': int(rate_str), | |
40 | }) | |
41 | formats.sort(key=operator.itemgetter('rate')) | |
42 | ||
43 | description = self._html_search_regex(r'<b>Description:</b>(.*?)</p>', | |
44 | webpage, u'description', flags=re.DOTALL) | |
45 | ||
46 | info = { | |
47 | 'id': video_id, | |
48 | 'title': clip.find('title').text, | |
49 | 'formats': formats, | |
50 | 'description': description, | |
51 | 'duration': int(clip.find('duration').text), | |
52 | } | |
53 | # TODO: Remove when #980 has been merged | |
54 | info.update(formats[-1]) | |
55 | return info |