]>
Commit | Line | Data |
---|---|---|
cb9722cb PH |
1 | from __future__ import unicode_literals |
2 | ||
382ed50e PH |
3 | import re |
4 | ||
5 | from ..utils import ( | |
6d88bc37 | 6 | ExtractorError, |
de79c46c | 7 | unescapeHTML, |
382ed50e | 8 | unified_strdate, |
a1a530b0 | 9 | US_RATINGS, |
382ed50e PH |
10 | ) |
11 | from .subtitles import SubtitlesInfoExtractor | |
12 | ||
13 | ||
14 | class VikiIE(SubtitlesInfoExtractor): | |
cb9722cb | 15 | IE_NAME = 'viki' |
382ed50e PH |
16 | |
17 | _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | |
18 | _TEST = { | |
cb9722cb PH |
19 | 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', |
20 | 'md5': 'a21454021c2646f5433514177e2caa5f', | |
21 | 'info_dict': { | |
22 | 'id': '1023585v', | |
23 | 'ext': 'mp4', | |
24 | 'title': 'Heirs Episode 14', | |
25 | 'uploader': 'SBS', | |
26 | 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e', | |
27 | 'upload_date': '20131121', | |
28 | 'age_limit': 13, | |
6d88bc37 | 29 | }, |
cb9722cb | 30 | 'skip': 'Blocked in the US', |
382ed50e PH |
31 | } |
32 | ||
33 | def _real_extract(self, url): | |
34 | mobj = re.match(self._VALID_URL, url) | |
35 | video_id = mobj.group(1) | |
36 | ||
37 | webpage = self._download_webpage(url, video_id) | |
38 | title = self._og_search_title(webpage) | |
39 | description = self._og_search_description(webpage) | |
40 | thumbnail = self._og_search_thumbnail(webpage) | |
41 | ||
1fb2bcbb PH |
42 | uploader_m = re.search( |
43 | r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage) | |
44 | if uploader_m is None: | |
45 | uploader = None | |
46 | else: | |
07e40358 | 47 | uploader = uploader_m.group(1).strip() |
382ed50e PH |
48 | |
49 | rating_str = self._html_search_regex( | |
50 | r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | |
cb9722cb | 51 | 'rating information', default='').strip() |
a1a530b0 | 52 | age_limit = US_RATINGS.get(rating_str) |
382ed50e PH |
53 | |
54 | info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | |
b7553b25 | 55 | info_webpage = self._download_webpage( |
cb9722cb | 56 | info_url, video_id, note='Downloading info page') |
6d88bc37 PH |
57 | if re.match(r'\s*<div\s+class="video-error', info_webpage): |
58 | raise ExtractorError( | |
cb9722cb | 59 | 'Video %s is blocked from your location.' % video_id, |
6d88bc37 | 60 | expected=True) |
382ed50e | 61 | video_url = self._html_search_regex( |
cb9722cb | 62 | r'<source[^>]+src="([^"]+)"', info_webpage, 'video URL') |
382ed50e PH |
63 | |
64 | upload_date_str = self._html_search_regex( | |
cb9722cb | 65 | r'"created_at":"([^"]+)"', info_webpage, 'upload date') |
382ed50e PH |
66 | upload_date = ( |
67 | unified_strdate(upload_date_str) | |
68 | if upload_date_str is not None | |
69 | else None | |
70 | ) | |
71 | ||
72 | # subtitles | |
73 | video_subtitles = self.extract_subtitles(video_id, info_webpage) | |
74 | if self._downloader.params.get('listsubtitles', False): | |
75 | self._list_available_subtitles(video_id, info_webpage) | |
76 | return | |
77 | ||
78 | return { | |
79 | 'id': video_id, | |
80 | 'title': title, | |
81 | 'url': video_url, | |
82 | 'description': description, | |
83 | 'thumbnail': thumbnail, | |
84 | 'age_limit': age_limit, | |
85 | 'uploader': uploader, | |
86 | 'subtitles': video_subtitles, | |
87 | 'upload_date': upload_date, | |
88 | } | |
89 | ||
90 | def _get_available_subtitles(self, video_id, info_webpage): | |
91 | res = {} | |
de79c46c PH |
92 | for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage): |
93 | sturl = unescapeHTML(sturl_html) | |
382ed50e PH |
94 | m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) |
95 | if not m: | |
96 | continue | |
97 | res[m.group('lang')] = sturl | |
98 | return res |