]>
Commit | Line | Data |
---|---|---|
3141feb7 S |
1 | from __future__ import unicode_literals |
2 | ||
caefb1de | 3 | import re |
caefb1de PH |
4 | |
5 | from .common import InfoExtractor | |
3141feb7 S |
6 | from ..utils import ( |
7 | month_by_name, | |
8 | int_or_none, | |
9 | ) | |
caefb1de PH |
10 | |
11 | ||
12 | class NDTVIE(InfoExtractor): | |
13 | _VALID_URL = r'^https?://(?:www\.)?ndtv\.com/video/player/[^/]*/[^/]*/(?P<id>[a-z0-9]+)' | |
14 | ||
15 | _TEST = { | |
3141feb7 S |
16 | 'url': 'http://www.ndtv.com/video/player/news/ndtv-exclusive-don-t-need-character-certificate-from-rahul-gandhi-says-arvind-kejriwal/300710', |
17 | 'md5': '39f992dbe5fb531c395d8bbedb1e5e88', | |
18 | 'info_dict': { | |
19 | 'id': '300710', | |
20 | 'ext': 'mp4', | |
21 | 'title': "NDTV exclusive: Don't need character certificate from Rahul Gandhi, says Arvind Kejriwal", | |
22 | 'description': 'md5:ab2d4b4a6056c5cb4caa6d729deabf02', | |
23 | 'upload_date': '20131208', | |
24 | 'duration': 1327, | |
25 | 'thumbnail': 'http://i.ndtvimg.com/video/images/vod/medium/2013-12/big_300710_1386518307.jpg', | |
caefb1de PH |
26 | }, |
27 | } | |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group('id') | |
32 | ||
33 | webpage = self._download_webpage(url, video_id) | |
34 | ||
35 | filename = self._search_regex( | |
3141feb7 S |
36 | r"__filename='([^']+)'", webpage, 'video filename') |
37 | video_url = ('http://bitcast-b.bitgravity.com/ndtvod/23372/ndtv/%s' % | |
caefb1de PH |
38 | filename) |
39 | ||
3141feb7 S |
40 | duration = int_or_none(self._search_regex( |
41 | r"__duration='([^']+)'", webpage, 'duration', fatal=False)) | |
caefb1de PH |
42 | |
43 | date_m = re.search(r'''(?x) | |
44 | <p\s+class="vod_dateline">\s* | |
45 | Published\s+On:\s* | |
46 | (?P<monthname>[A-Za-z]+)\s+(?P<day>[0-9]+),\s*(?P<year>[0-9]+) | |
47 | ''', webpage) | |
48 | upload_date = None | |
3141feb7 | 49 | |
caefb1de PH |
50 | if date_m is not None: |
51 | month = month_by_name(date_m.group('monthname')) | |
52 | if month is not None: | |
53 | upload_date = '%s%02d%02d' % ( | |
54 | date_m.group('year'), month, int(date_m.group('day'))) | |
55 | ||
56 | description = self._og_search_description(webpage) | |
3141feb7 | 57 | READ_MORE = ' (Read more)' |
caefb1de PH |
58 | if description.endswith(READ_MORE): |
59 | description = description[:-len(READ_MORE)] | |
60 | ||
3141feb7 S |
61 | title = self._og_search_title(webpage) |
62 | TITLE_SUFFIX = ' - NDTV' | |
63 | if title.endswith(TITLE_SUFFIX): | |
64 | title = title[:-len(TITLE_SUFFIX)] | |
65 | ||
caefb1de PH |
66 | return { |
67 | 'id': video_id, | |
68 | 'url': video_url, | |
3141feb7 | 69 | 'title': title, |
caefb1de PH |
70 | 'description': description, |
71 | 'thumbnail': self._og_search_thumbnail(webpage), | |
72 | 'duration': duration, | |
73 | 'upload_date': upload_date, | |
74 | } |