]>
Commit | Line | Data |
---|---|---|
382ed50e PH |
1 | import re |
2 | ||
3 | from ..utils import ( | |
4 | unified_strdate, | |
5 | ) | |
6 | from .subtitles import SubtitlesInfoExtractor | |
7 | ||
8 | ||
9 | class VikiIE(SubtitlesInfoExtractor): | |
10 | IE_NAME = u'viki' | |
11 | ||
12 | _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' | |
13 | _TEST = { | |
14 | u'url': u'http://www.viki.com/videos/1023585v-heirs-episode-14', | |
15 | u'file': u'1023585v.mp4', | |
16 | u'md5': u'a21454021c2646f5433514177e2caa5f', | |
17 | u'info_dict': { | |
18 | u'title': u'Heirs Episode 14', | |
19 | u'uploader': u'SBS', | |
20 | u'description': u'md5:c4b17b9626dd4b143dcc4d855ba3474e', | |
21 | u'upload_date': u'20131121', | |
22 | u'age_limit': 13, | |
23 | } | |
24 | } | |
25 | ||
26 | def _real_extract(self, url): | |
27 | mobj = re.match(self._VALID_URL, url) | |
28 | video_id = mobj.group(1) | |
29 | ||
30 | webpage = self._download_webpage(url, video_id) | |
31 | title = self._og_search_title(webpage) | |
32 | description = self._og_search_description(webpage) | |
33 | thumbnail = self._og_search_thumbnail(webpage) | |
34 | ||
35 | uploader = self._html_search_regex( | |
36 | r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage, | |
37 | u'uploader') | |
38 | if uploader is not None: | |
39 | uploader = uploader.strip() | |
40 | ||
41 | rating_str = self._html_search_regex( | |
42 | r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | |
43 | u'rating information', default='').strip() | |
44 | RATINGS = { | |
45 | 'G': 0, | |
46 | 'PG': 10, | |
47 | 'PG-13': 13, | |
48 | 'R': 16, | |
49 | 'NC': 18, | |
50 | } | |
51 | age_limit = RATINGS.get(rating_str) | |
52 | ||
53 | info_url = 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id | |
54 | info_webpage = self._download_webpage(info_url, video_id) | |
55 | video_url = self._html_search_regex( | |
56 | r'<source[^>]+src="([^"]+)"', info_webpage, u'video URL') | |
57 | ||
58 | upload_date_str = self._html_search_regex( | |
59 | r'"created_at":"([^"]+)"', info_webpage, u'upload date') | |
60 | upload_date = ( | |
61 | unified_strdate(upload_date_str) | |
62 | if upload_date_str is not None | |
63 | else None | |
64 | ) | |
65 | ||
66 | # subtitles | |
67 | video_subtitles = self.extract_subtitles(video_id, info_webpage) | |
68 | if self._downloader.params.get('listsubtitles', False): | |
69 | self._list_available_subtitles(video_id, info_webpage) | |
70 | return | |
71 | ||
72 | return { | |
73 | 'id': video_id, | |
74 | 'title': title, | |
75 | 'url': video_url, | |
76 | 'description': description, | |
77 | 'thumbnail': thumbnail, | |
78 | 'age_limit': age_limit, | |
79 | 'uploader': uploader, | |
80 | 'subtitles': video_subtitles, | |
81 | 'upload_date': upload_date, | |
82 | } | |
83 | ||
84 | def _get_available_subtitles(self, video_id, info_webpage): | |
85 | res = {} | |
86 | for sturl in re.findall(r'<track src="([^"]+)"/>'): | |
87 | m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) | |
88 | if not m: | |
89 | continue | |
90 | res[m.group('lang')] = sturl | |
91 | return res |