]>
Commit | Line | Data |
---|---|---|
382ed50e PH |
1 | import re |
2 | ||
3 | from ..utils import ( | |
6d88bc37 | 4 | ExtractorError, |
de79c46c | 5 | unescapeHTML, |
382ed50e PH |
6 | unified_strdate, |
7 | ) | |
8 | from .subtitles import SubtitlesInfoExtractor | |
9 | ||
10 | ||
11 | class VikiIE(SubtitlesInfoExtractor): | |
12 | IE_NAME = u'viki' | |
13 | ||
14 | _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | |
15 | _TEST = { | |
16 | u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', | |
17 | u'file': u'1023585v.mp4', | |
18 | u'md5': u'a21454021c2646f5433514177e2caa5f', | |
19 | u'info_dict': { | |
20 | u'title': u'Heirs Episode 14', | |
21 | u'uploader': u'SBS', | |
22 | u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', | |
23 | u'upload_date': u'20131121', | |
24 | u'age_limit': 13, | |
6d88bc37 PH |
25 | }, |
26 | u'skip': u'Blocked in the US', | |
382ed50e PH |
27 | } |
28 | ||
29 | def _real_extract(self, url): | |
30 | mobj = re.match(self._VALID_URL, url) | |
31 | video_id = mobj.group(1) | |
32 | ||
33 | webpage = self._download_webpage(url, video_id) | |
34 | title = self._og_search_title(webpage) | |
35 | description = self._og_search_description(webpage) | |
36 | thumbnail = self._og_search_thumbnail(webpage) | |
37 | ||
1fb2bcbb PH |
38 | uploader_m = re.search( |
39 | r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage) | |
40 | if uploader_m is None: | |
41 | uploader = None | |
42 | else: | |
07e40358 | 43 | uploader = uploader_m.group(1).strip() |
382ed50e PH |
44 | |
45 | rating_str = self._html_search_regex( | |
46 | r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | |
47 | u'rating information', default='').strip() | |
48 | RATINGS = { | |
49 | 'G': 0, | |
50 | 'PG': 10, | |
51 | 'PG-13': 13, | |
52 | 'R': 16, | |
53 | 'NC': 18, | |
54 | } | |
55 | age_limit = RATINGS.get(rating_str) | |
56 | ||
57 | info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | |
b7553b25 PH |
58 | info_webpage = self._download_webpage( |
59 | info_url, video_id, note=u'Downloading info page') | |
6d88bc37 PH |
60 | if re.match(r'\s*<div\s+class="video-error', info_webpage): |
61 | raise ExtractorError( | |
62 | u'Video %s is blocked from your location.' % video_id, | |
63 | expected=True) | |
382ed50e PH |
64 | video_url = self._html_search_regex( |
65 | r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') | |
66 | ||
67 | upload_date_str = self._html_search_regex( | |
68 | r'"created_at":"([^"]+)"', info_webpage, u'upload date') | |
69 | upload_date = ( | |
70 | unified_strdate(upload_date_str) | |
71 | if upload_date_str is not None | |
72 | else None | |
73 | ) | |
74 | ||
75 | # subtitles | |
76 | video_subtitles = self.extract_subtitles(video_id, info_webpage) | |
77 | if self._downloader.params.get('listsubtitles', False): | |
78 | self._list_available_subtitles(video_id, info_webpage) | |
79 | return | |
80 | ||
81 | return { | |
82 | 'id': video_id, | |
83 | 'title': title, | |
84 | 'url': video_url, | |
85 | 'description': description, | |
86 | 'thumbnail': thumbnail, | |
87 | 'age_limit': age_limit, | |
88 | 'uploader': uploader, | |
89 | 'subtitles': video_subtitles, | |
90 | 'upload_date': upload_date, | |
91 | } | |
92 | ||
93 | def _get_available_subtitles(self, video_id, info_webpage): | |
94 | res = {} | |
de79c46c PH |
95 | for sturl_html in re.findall(r'<track src="([^"]+)"/>', info_webpage): |
96 | sturl = unescapeHTML(sturl_html) | |
382ed50e PH |
97 | m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) |
98 | if not m: | |
99 | continue | |
100 | res[m.group('lang')] = sturl | |
101 | return res |