]>
Commit | Line | Data |
---|---|---|
6cb38a99 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class FirstpostIE(InfoExtractor): | |
6cb38a99 PH |
9 | _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' |
10 | ||
11 | _TEST = { | |
12 | 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', | |
13 | 'md5': 'ee9114957692f01fb1263ed87039112a', | |
14 | 'info_dict': { | |
15 | 'id': '1025403', | |
16 | 'ext': 'mp4', | |
17 | 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', | |
d2824416 | 18 | 'description': 'md5:feef3041cb09724e0bdc02843348f5f4', |
6cb38a99 PH |
19 | } |
20 | } | |
21 | ||
22 | def _real_extract(self, url): | |
23 | mobj = re.match(self._VALID_URL, url) | |
24 | video_id = mobj.group('id') | |
25 | ||
d2824416 S |
26 | page = self._download_webpage(url, video_id) |
27 | title = self._html_search_meta('twitter:title', page, 'title') | |
28 | description = self._html_search_meta('twitter:description', page, 'title') | |
29 | ||
6a4f3528 S |
30 | data = self._download_xml( |
31 | 'http://www.firstpost.com/getvideoxml-%s.xml' % video_id, video_id, | |
32 | 'Downloading video XML') | |
33 | ||
34 | item = data.find('./playlist/item') | |
35 | thumbnail = item.find('./image').text | |
6a4f3528 S |
36 | |
37 | formats = [ | |
38 | { | |
39 | 'url': details.find('./file').text, | |
40 | 'format_id': details.find('./label').text.strip(), | |
41 | 'width': int(details.find('./width').text.strip()), | |
42 | 'height': int(details.find('./height').text.strip()), | |
43 | } for details in item.findall('./source/file_details') if details.find('./file').text | |
44 | ] | |
6cb38a99 PH |
45 | |
46 | return { | |
47 | 'id': video_id, | |
6a4f3528 | 48 | 'title': title, |
d2824416 | 49 | 'description': description, |
6a4f3528 S |
50 | 'thumbnail': thumbnail, |
51 | 'formats': formats, | |
6cb38a99 | 52 | } |