]>
Commit | Line | Data |
---|---|---|
6cb38a99 PH |
1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | ||
7 | ||
8 | class FirstpostIE(InfoExtractor): | |
9 | IE_NAME = 'Firstpost.com' | |
10 | _VALID_URL = r'http://(?:www\.)?firstpost\.com/[^/]+/.*-(?P<id>[0-9]+)\.html' | |
11 | ||
12 | _TEST = { | |
13 | 'url': 'http://www.firstpost.com/india/india-to-launch-indigenous-aircraft-carrier-monday-1025403.html', | |
14 | 'md5': 'ee9114957692f01fb1263ed87039112a', | |
15 | 'info_dict': { | |
16 | 'id': '1025403', | |
17 | 'ext': 'mp4', | |
18 | 'title': 'India to launch indigenous aircraft carrier INS Vikrant today', | |
19 | 'description': 'Its flight deck is over twice the size of a football field, its power unit can light up the entire Kochi city and the cabling is enough to cover the distance between here to Delhi.', | |
20 | } | |
21 | } | |
22 | ||
23 | def _real_extract(self, url): | |
24 | mobj = re.match(self._VALID_URL, url) | |
25 | video_id = mobj.group('id') | |
26 | ||
27 | webpage = self._download_webpage(url, video_id) | |
28 | video_url = self._html_search_regex( | |
29 | r'<div.*?name="div_video".*?flashvars="([^"]+)">', | |
30 | webpage, 'video URL') | |
31 | ||
32 | return { | |
33 | 'id': video_id, | |
34 | 'url': video_url, | |
35 | 'title': self._og_search_title(webpage), | |
36 | 'description': self._og_search_description(webpage), | |
37 | 'thumbnail': self._og_search_thumbnail(webpage), | |
38 | } |