]>
Commit | Line | Data |
---|---|---|
77aa6b32 | 1 | # encoding: utf-8 |
2 | ||
3 | import re | |
4 | import json | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | compat_urllib_request, | |
9 | ExtractorError, | |
10 | ) | |
11 | ||
12 | ||
13 | class IviIE(InfoExtractor): | |
14 | IE_DESC = u'ivi.ru' | |
15 | IE_NAME = u'ivi' | |
16 | _VALID_URL = r'^https?://(?:www\.)?(?P<url>ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+))' | |
17 | ||
18 | _TESTS = [ | |
19 | # Single movie | |
20 | { | |
21 | u'url': u'http://www.ivi.ru/watch/53141', | |
22 | u'file': u'53141.mp4', | |
23 | u'md5': u'6ff5be2254e796ed346251d117196cf4', | |
24 | u'info_dict': { | |
25 | u'title': u'Иван Васильевич меняет профессию', | |
26 | u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346', | |
27 | u'duration': 5498, | |
28 | u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', | |
29 | }, | |
30 | }, | |
31 | # Serial's serie | |
32 | { | |
33 | u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791', | |
34 | u'file': u'74791.mp4', | |
35 | u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9', | |
36 | u'info_dict': { | |
37 | u'title': u'Дежурный ангел - 1 серия', | |
38 | u'duration': 2490, | |
39 | u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', | |
40 | }, | |
41 | } | |
42 | ] | |
43 | ||
44 | # Sorted by quality | |
45 | _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] | |
46 | ||
47 | # Sorted by size | |
48 | _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] | |
49 | ||
50 | def _extract_description(self, html): | |
51 | m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) | |
52 | return m.group('description') if m is not None else None | |
53 | ||
54 | def _extract_comment_count(self, html): | |
55 | m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) | |
56 | return int(m.group('commentcount')) if m is not None else 0 | |
57 | ||
58 | def _real_extract(self, url): | |
59 | mobj = re.match(self._VALID_URL, url) | |
60 | video_id = mobj.group('videoid') | |
61 | ||
62 | api_url = 'http://api.digitalaccess.ru/api/json/' | |
63 | ||
64 | data = {u'method': u'da.content.get', | |
65 | u'params': [video_id, {u'site': u's183', | |
66 | u'referrer': u'http://www.ivi.ru/watch/%s' % video_id, | |
67 | u'contentid': video_id | |
68 | } | |
69 | ] | |
70 | } | |
71 | ||
72 | request = compat_urllib_request.Request(api_url, json.dumps(data)) | |
73 | ||
74 | video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON') | |
75 | video_json = json.loads(video_json_page) | |
76 | ||
77 | if u'error' in video_json: | |
78 | error = video_json[u'error'] | |
79 | if error[u'origin'] == u'NoRedisValidData': | |
80 | raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | |
81 | raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True) | |
82 | ||
83 | result = video_json[u'result'] | |
84 | ||
85 | formats = [{'url': x[u'url'], | |
86 | 'format_id': x[u'content_format'] | |
87 | } for x in result[u'files'] if x[u'content_format'] in self._known_formats] | |
88 | formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) | |
89 | ||
90 | if len(formats) == 0: | |
91 | self._downloader.report_warning(u'No media links available for %s' % video_id) | |
92 | return | |
93 | ||
94 | duration = result[u'duration'] | |
95 | compilation = result[u'compilation'] | |
96 | title = result[u'title'] | |
97 | ||
98 | title = '%s - %s' % (compilation, title) if compilation is not None else title | |
99 | ||
100 | previews = result[u'preview'] | |
101 | previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) | |
102 | thumbnail = previews[-1][u'url'] if len(previews) > 0 else None | |
103 | ||
104 | video_page_url = 'http://' + mobj.group('url') | |
105 | video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') | |
106 | ||
107 | description = self._extract_description(video_page) | |
108 | comment_count = self._extract_comment_count(video_page) | |
109 | ||
110 | return { | |
111 | 'id': video_id, | |
112 | 'title': title, | |
113 | 'thumbnail': thumbnail, | |
114 | 'description': description, | |
115 | 'duration': duration, | |
116 | 'comment_count': comment_count, | |
117 | 'formats': formats, | |
118 | } | |
119 | ||
120 | ||
121 | class IviCompilationIE(InfoExtractor): | |
122 | IE_DESC = u'ivi.ru compilations' | |
123 | IE_NAME = u'ivi:compilation' | |
124 | _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' | |
125 | ||
126 | def _extract_entries(self, html, compilation_id): | |
127 | return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') | |
128 | for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)] | |
129 | ||
130 | def _real_extract(self, url): | |
131 | mobj = re.match(self._VALID_URL, url) | |
132 | compilation_id = mobj.group('compilationid') | |
133 | season_id = mobj.group('seasonid') | |
134 | ||
135 | if season_id is not None: # Season link | |
136 | season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id) | |
137 | playlist_id = '%s/season%s' % (compilation_id, season_id) | |
138 | playlist_title = self._html_search_meta(u'title', season_page, u'title') | |
139 | entries = self._extract_entries(season_page, compilation_id) | |
140 | else: # Compilation link | |
141 | compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page') | |
142 | playlist_id = compilation_id | |
143 | playlist_title = self._html_search_meta(u'title', compilation_page, u'title') | |
144 | seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) | |
145 | if len(seasons) == 0: # No seasons in this compilation | |
146 | entries = self._extract_entries(compilation_page, compilation_id) | |
147 | else: | |
148 | entries = [] | |
149 | for season_id in seasons: | |
150 | season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), | |
151 | compilation_id, u'Downloading season %s web page' % season_id) | |
152 | entries.extend(self._extract_entries(season_page, compilation_id)) | |
153 | ||
154 | return self.playlist_result(entries, playlist_id, playlist_title) |