]>
Commit | Line | Data |
---|---|---|
1 | from __future__ import unicode_literals | |
2 | ||
3 | import re | |
4 | ||
5 | from .common import InfoExtractor | |
6 | from ..utils import ( | |
7 | month_by_name, | |
8 | int_or_none, | |
9 | ) | |
10 | ||
11 | ||
12 | class NDTVIE(InfoExtractor): | |
13 | _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' | |
14 | ||
15 | _TEST = { | |
16 | 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', | |
17 | 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', | |
18 | 'info_dict': { | |
19 | 'id': '300710', | |
20 | 'ext': 'mp4', | |
21 | 'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", | |
22 | 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', | |
23 | 'upload_date': '20131208', | |
24 | 'duration': 1327, | |
25 | 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', | |
26 | }, | |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | video_id = self._match_id(url) | |
31 | webpage = self._download_webpage(url, video_id) | |
32 | ||
33 | filename = self._search_regex( | |
34 | r"__filename='([^']+)'", webpage, 'video filename') | |
35 | video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % | |
36 | filename) | |
37 | ||
38 | duration = int_or_none(self._search_regex( | |
39 | r"__duration='([^']+)'", webpage, 'duration', fatal=False)) | |
40 | ||
41 | date_m = re.search(r'''(?x) | |
42 | <p\s+class="vod_dateline">\s* | |
43 | Published\s+On:\s* | |
44 | (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) | |
45 | ''', webpage) | |
46 | upload_date = None | |
47 | ||
48 | if date_m is not None: | |
49 | month = month_by_name(date_m.group('monthname')) | |
50 | if month is not None: | |
51 | upload_date = '%s%02d%02d' % ( | |
52 | date_m.group('year'), month, int(date_m.group('day'))) | |
53 | ||
54 | description = self._og_search_description(webpage) | |
55 | READ_MORE = ' (Read more)' | |
56 | if description.endswith(READ_MORE): | |
57 | description = description[:-len(READ_MORE)] | |
58 | ||
59 | title = self._og_search_title(webpage) | |
60 | TITLE_SUFFIX = ' - NDTV' | |
61 | if title.endswith(TITLE_SUFFIX): | |
62 | title = title[:-len(TITLE_SUFFIX)] | |
63 | ||
64 | return { | |
65 | 'id': video_id, | |
66 | 'url': video_url, | |
67 | 'title': title, | |
68 | 'description': description, | |
69 | 'thumbnail': self._og_search_thumbnail(webpage), | |
70 | 'duration': duration, | |
71 | 'upload_date': upload_date, | |
72 | } |