]>
Commit | Line | Data |
---|---|---|
bf64ff72 YK |
1 | import re |
2 | import xml.etree.ElementTree | |
3 | ||
4 | from .common import InfoExtractor | |
5 | from ..utils import ( | |
6 | unified_strdate, | |
ebdf2af7 | 7 | compat_urllib_parse, |
bf64ff72 YK |
8 | ) |
9 | ||
10 | class GameSpotIE(InfoExtractor): | |
ebdf2af7 | 11 | _VALID_URL = r'(?:http://)?(?:www\.)?gamespot\.com/.*-(?P<page_id>\d+)/?' |
bf64ff72 YK |
12 | _TEST = { |
13 | u"url": u"http://www.gamespot.com/arma-iii/videos/arma-iii-community-guide-sitrep-i-6410818/", | |
14 | u"file": u"6410818.mp4", | |
ebdf2af7 | 15 | u"md5": u"b2a30deaa8654fcccd43713a6b6a4825", |
bf64ff72 | 16 | u"info_dict": { |
dd01d655 | 17 | u"title": u"Arma 3 - Community Guide: SITREP I", |
bf64ff72 YK |
18 | u"upload_date": u"20130627", |
19 | } | |
20 | } | |
21 | ||
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
ebdf2af7 JMF |
25 | page_id = mobj.group('page_id') |
26 | webpage = self._download_webpage(url, page_id) | |
27 | video_id = self._html_search_regex([r'"og:video" content=".*?\?id=(\d+)"', | |
28 | r'http://www\.gamespot\.com/videoembed/(\d+)'], | |
29 | webpage, 'video id') | |
30 | data = compat_urllib_parse.urlencode({'id': video_id, 'newplayer': '1'}) | |
31 | info_url = 'http://www.gamespot.com/pages/video_player/xml.php?' + data | |
bf64ff72 YK |
32 | info_xml = self._download_webpage(info_url, video_id) |
33 | doc = xml.etree.ElementTree.fromstring(info_xml) | |
34 | clip_el = doc.find('./playList/clip') | |
35 | ||
ebdf2af7 JMF |
36 | http_urls = [{'url': node.find('filePath').text, |
37 | 'rate': int(node.find('rate').text)} | |
38 | for node in clip_el.find('./httpURI')] | |
39 | best_quality = sorted(http_urls, key=lambda f: f['rate'])[-1] | |
40 | video_url = best_quality['url'] | |
bf64ff72 YK |
41 | title = clip_el.find('./title').text |
42 | ext = video_url.rpartition('.')[2] | |
43 | thumbnail_url = clip_el.find('./screenGrabURI').text | |
44 | view_count = int(clip_el.find('./views').text) | |
45 | upload_date = unified_strdate(clip_el.find('./postDate').text) | |
46 | ||
47 | return [{ | |
48 | 'id' : video_id, | |
49 | 'url' : video_url, | |
50 | 'ext' : ext, | |
51 | 'title' : title, | |
52 | 'thumbnail' : thumbnail_url, | |
53 | 'upload_date' : upload_date, | |
54 | 'view_count' : view_count, | |
55 | }] |