]>
Commit | Line | Data |
---|---|---|
77aa6b32 | 1 | # encoding: utf-8 |
2 | ||
3 | import re | |
4 | import json | |
5 | ||
6 | from .common import InfoExtractor | |
7 | from ..utils import ( | |
8 | compat_urllib_request, | |
9 | ExtractorError, | |
10 | ) | |
11 | ||
12 | ||
13 | class IviIE(InfoExtractor): | |
14 | IE_DESC = u'ivi.ru' | |
15 | IE_NAME = u'ivi' | |
16 | _VALID_URL = r'^https?://(?:www\.)?(?P<url>ivi\.ru/watch(?:/(?P<compilationid>[^/]+))?/(?P<videoid>\d+))' | |
17 | ||
18 | _TESTS = [ | |
19 | # Single movie | |
20 | { | |
21 | u'url': u'http://www.ivi.ru/watch/53141', | |
22 | u'file': u'53141.mp4', | |
23 | u'md5': u'6ff5be2254e796ed346251d117196cf4', | |
24 | u'info_dict': { | |
25 | u'title': u'Иван Васильевич меняет профессию', | |
26 | u'description': u'md5:14d8eda24e9d93d29b5857012c6d6346', | |
27 | u'duration': 5498, | |
28 | u'thumbnail': u'http://thumbs.ivi.ru/f20.vcp.digitalaccess.ru/contents/d/1/c3c885163a082c29bceeb7b5a267a6.jpg', | |
29 | }, | |
6c6db72e | 30 | u'skip': u'Only works from Russia', |
77aa6b32 | 31 | }, |
32 | # Serial's serie | |
33 | { | |
34 | u'url': u'http://www.ivi.ru/watch/dezhurnyi_angel/74791', | |
35 | u'file': u'74791.mp4', | |
36 | u'md5': u'3e6cc9a848c1d2ebcc6476444967baa9', | |
37 | u'info_dict': { | |
38 | u'title': u'Дежурный ангел - 1 серия', | |
39 | u'duration': 2490, | |
40 | u'thumbnail': u'http://thumbs.ivi.ru/f7.vcp.digitalaccess.ru/contents/8/e/bc2f6c2b6e5d291152fdd32c059141.jpg', | |
41 | }, | |
6c6db72e | 42 | u'skip': u'Only works from Russia', |
77aa6b32 | 43 | } |
44 | ] | |
45 | ||
46 | # Sorted by quality | |
47 | _known_formats = ['MP4-low-mobile', 'MP4-mobile', 'FLV-lo', 'MP4-lo', 'FLV-hi', 'MP4-hi', 'MP4-SHQ'] | |
48 | ||
49 | # Sorted by size | |
50 | _known_thumbnails = ['Thumb-120x90', 'Thumb-160', 'Thumb-640x480'] | |
51 | ||
52 | def _extract_description(self, html): | |
53 | m = re.search(r'<meta name="description" content="(?P<description>[^"]+)"/>', html) | |
54 | return m.group('description') if m is not None else None | |
55 | ||
56 | def _extract_comment_count(self, html): | |
57 | m = re.search(u'(?s)<a href="#" id="view-comments" class="action-button dim gradient">\s*Комментарии:\s*(?P<commentcount>\d+)\s*</a>', html) | |
58 | return int(m.group('commentcount')) if m is not None else 0 | |
59 | ||
60 | def _real_extract(self, url): | |
61 | mobj = re.match(self._VALID_URL, url) | |
62 | video_id = mobj.group('videoid') | |
63 | ||
64 | api_url = 'http://api.digitalaccess.ru/api/json/' | |
65 | ||
66 | data = {u'method': u'da.content.get', | |
67 | u'params': [video_id, {u'site': u's183', | |
68 | u'referrer': u'http://www.ivi.ru/watch/%s' % video_id, | |
69 | u'contentid': video_id | |
70 | } | |
71 | ] | |
72 | } | |
73 | ||
74 | request = compat_urllib_request.Request(api_url, json.dumps(data)) | |
75 | ||
76 | video_json_page = self._download_webpage(request, video_id, u'Downloading video JSON') | |
77 | video_json = json.loads(video_json_page) | |
78 | ||
79 | if u'error' in video_json: | |
80 | error = video_json[u'error'] | |
81 | if error[u'origin'] == u'NoRedisValidData': | |
82 | raise ExtractorError(u'Video %s does not exist' % video_id, expected=True) | |
83 | raise ExtractorError(u'Unable to download video %s: %s' % (video_id, error[u'message']), expected=True) | |
84 | ||
85 | result = video_json[u'result'] | |
86 | ||
87 | formats = [{'url': x[u'url'], | |
88 | 'format_id': x[u'content_format'] | |
89 | } for x in result[u'files'] if x[u'content_format'] in self._known_formats] | |
90 | formats.sort(key=lambda fmt: self._known_formats.index(fmt['format_id'])) | |
91 | ||
92 | if len(formats) == 0: | |
93 | self._downloader.report_warning(u'No media links available for %s' % video_id) | |
94 | return | |
95 | ||
96 | duration = result[u'duration'] | |
97 | compilation = result[u'compilation'] | |
98 | title = result[u'title'] | |
99 | ||
100 | title = '%s - %s' % (compilation, title) if compilation is not None else title | |
101 | ||
102 | previews = result[u'preview'] | |
103 | previews.sort(key=lambda fmt: self._known_thumbnails.index(fmt['content_format'])) | |
104 | thumbnail = previews[-1][u'url'] if len(previews) > 0 else None | |
105 | ||
106 | video_page_url = 'http://' + mobj.group('url') | |
107 | video_page = self._download_webpage(video_page_url, video_id, u'Downloading video page') | |
108 | ||
109 | description = self._extract_description(video_page) | |
110 | comment_count = self._extract_comment_count(video_page) | |
111 | ||
112 | return { | |
113 | 'id': video_id, | |
114 | 'title': title, | |
115 | 'thumbnail': thumbnail, | |
116 | 'description': description, | |
117 | 'duration': duration, | |
118 | 'comment_count': comment_count, | |
119 | 'formats': formats, | |
120 | } | |
121 | ||
122 | ||
123 | class IviCompilationIE(InfoExtractor): | |
124 | IE_DESC = u'ivi.ru compilations' | |
125 | IE_NAME = u'ivi:compilation' | |
126 | _VALID_URL = r'^https?://(?:www\.)?ivi\.ru/watch/(?!\d+)(?P<compilationid>[a-z\d_-]+)(?:/season(?P<seasonid>\d+))?$' | |
127 | ||
128 | def _extract_entries(self, html, compilation_id): | |
129 | return [self.url_result('http://www.ivi.ru/watch/%s/%s' % (compilation_id, serie), 'Ivi') | |
130 | for serie in re.findall(r'<strong><a href="/watch/%s/(\d+)">(?:[^<]+)</a></strong>' % compilation_id, html)] | |
131 | ||
132 | def _real_extract(self, url): | |
133 | mobj = re.match(self._VALID_URL, url) | |
134 | compilation_id = mobj.group('compilationid') | |
135 | season_id = mobj.group('seasonid') | |
136 | ||
137 | if season_id is not None: # Season link | |
138 | season_page = self._download_webpage(url, compilation_id, u'Downloading season %s web page' % season_id) | |
139 | playlist_id = '%s/season%s' % (compilation_id, season_id) | |
140 | playlist_title = self._html_search_meta(u'title', season_page, u'title') | |
141 | entries = self._extract_entries(season_page, compilation_id) | |
142 | else: # Compilation link | |
143 | compilation_page = self._download_webpage(url, compilation_id, u'Downloading compilation web page') | |
144 | playlist_id = compilation_id | |
145 | playlist_title = self._html_search_meta(u'title', compilation_page, u'title') | |
146 | seasons = re.findall(r'<a href="/watch/%s/season(\d+)">[^<]+</a>' % compilation_id, compilation_page) | |
147 | if len(seasons) == 0: # No seasons in this compilation | |
148 | entries = self._extract_entries(compilation_page, compilation_id) | |
149 | else: | |
150 | entries = [] | |
151 | for season_id in seasons: | |
152 | season_page = self._download_webpage('http://www.ivi.ru/watch/%s/season%s' % (compilation_id, season_id), | |
153 | compilation_id, u'Downloading season %s web page' % season_id) | |
154 | entries.extend(self._extract_entries(season_page, compilation_id)) | |
155 | ||
156 | return self.playlist_result(entries, playlist_id, playlist_title) |