]>
Commit | Line | Data |
---|---|---|
cb9722cb PH |
1 | from __future__ import unicode_literals |
2 | ||
382ed50e PH |
3 | import re |
4 | ||
8e3df9df YCH |
5 | from ..compat import ( |
6 | compat_urlparse, | |
7 | compat_urllib_request, | |
8 | ) | |
382ed50e | 9 | from ..utils import ( |
6d88bc37 | 10 | ExtractorError, |
de79c46c | 11 | unescapeHTML, |
382ed50e | 12 | unified_strdate, |
a1a530b0 | 13 | US_RATINGS, |
d948e09b YCH |
14 | determine_ext, |
15 | mimetype2ext, | |
382ed50e | 16 | ) |
4f7cea6c | 17 | from .common import InfoExtractor |
382ed50e PH |
18 | |
19 | ||
4f7cea6c | 20 | class VikiIE(InfoExtractor): |
cb9722cb | 21 | IE_NAME = 'viki' |
382ed50e | 22 | |
8e3df9df YCH |
23 | # iPad2 |
24 | _USER_AGENT = 'Mozilla/5.0(iPad; U; CPU OS 4_3 like Mac OS X; en-us) AppleWebKit/533.17.9 (KHTML, like Gecko) Version/5.0.2 Mobile/8F191 Safari/6533.18.5' | |
25 | ||
382ed50e | 26 | _VALID_URL = r'^https?://(?:www\.)?viki\.com/videos/(?P<id>[0-9]+v)' |
8e3df9df | 27 | _TESTS = [{ |
cb9722cb | 28 | 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14', |
cb9722cb PH |
29 | 'info_dict': { |
30 | 'id': '1023585v', | |
31 | 'ext': 'mp4', | |
32 | 'title': 'Heirs Episode 14', | |
33 | 'uploader': 'SBS', | |
34 | 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e', | |
35 | 'upload_date': '20131121', | |
36 | 'age_limit': 13, | |
6d88bc37 | 37 | }, |
cb9722cb | 38 | 'skip': 'Blocked in the US', |
8e3df9df YCH |
39 | }, { |
40 | 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference', | |
41 | 'md5': 'ca6493e6f0a6ec07da9aa8d6304b4b2c', | |
42 | 'info_dict': { | |
43 | 'id': '1067139v', | |
44 | 'ext': 'mp4', | |
45 | 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea', | |
46 | 'upload_date': '20150430', | |
47 | 'title': '\'The Avengers: Age of Ultron\' Press Conference', | |
48 | } | |
d948e09b YCH |
49 | }, { |
50 | 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi', | |
51 | 'info_dict': { | |
52 | 'id': '1048879v', | |
53 | 'ext': 'mp4', | |
54 | 'upload_date': '20140820', | |
55 | 'description': 'md5:54ff56d51bdfc7a30441ec967394e91c', | |
56 | 'title': 'Ankhon Dekhi', | |
57 | }, | |
58 | 'params': { | |
59 | # requires ffmpeg | |
60 | 'skip_download': True, | |
61 | } | |
8e3df9df | 62 | }] |
382ed50e PH |
63 | |
64 | def _real_extract(self, url): | |
8ee34150 | 65 | video_id = self._match_id(url) |
382ed50e PH |
66 | |
67 | webpage = self._download_webpage(url, video_id) | |
68 | title = self._og_search_title(webpage) | |
69 | description = self._og_search_description(webpage) | |
70 | thumbnail = self._og_search_thumbnail(webpage) | |
71 | ||
1fb2bcbb PH |
72 | uploader_m = re.search( |
73 | r'<strong>Broadcast Network: </strong>\s*([^<]*)<', webpage) | |
74 | if uploader_m is None: | |
75 | uploader = None | |
76 | else: | |
07e40358 | 77 | uploader = uploader_m.group(1).strip() |
382ed50e PH |
78 | |
79 | rating_str = self._html_search_regex( | |
80 | r'<strong>Rating: </strong>\s*([^<]*)<', webpage, | |
cb9722cb | 81 | 'rating information', default='').strip() |
a1a530b0 | 82 | age_limit = US_RATINGS.get(rating_str) |
382ed50e | 83 | |
8e3df9df YCH |
84 | req = compat_urllib_request.Request( |
85 | 'http://www.viki.com/player5_fragment/%s?action=show&controller=videos' % video_id) | |
86 | req.add_header('User-Agent', self._USER_AGENT) | |
b7553b25 | 87 | info_webpage = self._download_webpage( |
8e3df9df | 88 | req, video_id, note='Downloading info page') |
89966a5a YCH |
89 | err_msg = self._html_search_regex(r'<div[^>]+class="video-error[^>]+>(.+)</div>', info_webpage, 'error message', default=None) |
90 | if err_msg: | |
89966a5a YCH |
91 | if 'not available in your region' in err_msg: |
92 | raise ExtractorError( | |
93 | 'Video %s is blocked from your location.' % video_id, | |
94 | expected=True) | |
95 | else: | |
96 | raise ExtractorError('Viki said: ' + err_msg) | |
d948e09b YCH |
97 | mobj = re.search( |
98 | r'<source[^>]+type="(?P<mime_type>[^"]+)"[^>]+src="(?P<url>[^"]+)"', info_webpage) | |
99 | if not mobj: | |
100 | raise ExtractorError('Unable to find video URL') | |
101 | video_url = unescapeHTML(mobj.group('url')) | |
102 | video_ext = mimetype2ext(mobj.group('mime_type')) | |
103 | ||
104 | if determine_ext(video_url) == 'm3u8': | |
105 | formats = self._extract_m3u8_formats( | |
106 | video_url, video_id, ext=video_ext) | |
107 | else: | |
108 | formats = [{ | |
109 | 'url': video_url, | |
110 | 'ext': video_ext, | |
111 | }] | |
382ed50e PH |
112 | |
113 | upload_date_str = self._html_search_regex( | |
cb9722cb | 114 | r'"created_at":"([^"]+)"', info_webpage, 'upload date') |
382ed50e PH |
115 | upload_date = ( |
116 | unified_strdate(upload_date_str) | |
117 | if upload_date_str is not None | |
118 | else None | |
119 | ) | |
120 | ||
121 | # subtitles | |
122 | video_subtitles = self.extract_subtitles(video_id, info_webpage) | |
382ed50e PH |
123 | |
124 | return { | |
125 | 'id': video_id, | |
126 | 'title': title, | |
d948e09b | 127 | 'formats': formats, |
382ed50e PH |
128 | 'description': description, |
129 | 'thumbnail': thumbnail, | |
130 | 'age_limit': age_limit, | |
131 | 'uploader': uploader, | |
132 | 'subtitles': video_subtitles, | |
133 | 'upload_date': upload_date, | |
134 | } | |
135 | ||
4f7cea6c | 136 | def _get_subtitles(self, video_id, info_webpage): |
382ed50e | 137 | res = {} |
4f7cea6c | 138 | for sturl_html in re.findall(r'<track src="([^"]+)"', info_webpage): |
de79c46c | 139 | sturl = unescapeHTML(sturl_html) |
382ed50e PH |
140 | m = re.search(r'/(?P<lang>[a-z]+)\.vtt', sturl) |
141 | if not m: | |
142 | continue | |
4f7cea6c JMF |
143 | res[m.group('lang')] = [{ |
144 | 'url': compat_urlparse.urljoin('http://www.viki.com', sturl), | |
145 | 'ext': 'vtt', | |
146 | }] | |
382ed50e | 147 | return res |