]>
Commit | Line | Data |
---|---|---|
1 | # coding: utf-8 | |
2 | from __future__ import unicode_literals | |
3 | ||
4 | import hashlib | |
5 | import re | |
6 | ||
7 | from .common import InfoExtractor | |
8 | from ..compat import ( | |
9 | compat_parse_qs, | |
10 | compat_urlparse, | |
11 | ) | |
12 | from ..utils import ( | |
13 | ExtractorError, | |
14 | int_or_none, | |
15 | float_or_none, | |
16 | parse_iso8601, | |
17 | smuggle_url, | |
18 | strip_jsonp, | |
19 | unified_timestamp, | |
20 | unsmuggle_url, | |
21 | urlencode_postdata, | |
22 | ) | |
23 | ||
24 | ||
25 | class BiliBiliIE(InfoExtractor): | |
26 | _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' | |
27 | ||
28 | _TESTS = [{ | |
29 | 'url': 'http://www.bilibili.tv/video/av1074402/', | |
30 | 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', | |
31 | 'info_dict': { | |
32 | 'id': '1074402', | |
33 | 'ext': 'mp4', | |
34 | 'title': '【金坷垃】金泡沫', | |
35 | 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', | |
36 | 'duration': 308.315, | |
37 | 'timestamp': 1398012660, | |
38 | 'upload_date': '20140420', | |
39 | 'thumbnail': r're:^https?://.+\.jpg', | |
40 | 'uploader': '菊子桑', | |
41 | 'uploader_id': '156160', | |
42 | }, | |
43 | }, { | |
44 | # Tested in BiliBiliBangumiIE | |
45 | 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', | |
46 | 'only_matching': True, | |
47 | }, { | |
48 | 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', | |
49 | 'md5': '3f721ad1e75030cc06faf73587cfec57', | |
50 | 'info_dict': { | |
51 | 'id': '100643', | |
52 | 'ext': 'mp4', | |
53 | 'title': 'CHAOS;CHILD', | |
54 | 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', | |
55 | }, | |
56 | 'skip': 'Geo-restricted to China', | |
57 | }] | |
58 | ||
59 | _APP_KEY = '84956560bc028eb7' | |
60 | _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' | |
61 | ||
62 | def _report_error(self, result): | |
63 | if 'message' in result: | |
64 | raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) | |
65 | elif 'code' in result: | |
66 | raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) | |
67 | else: | |
68 | raise ExtractorError('Can\'t extract Bangumi episode ID') | |
69 | ||
70 | def _real_extract(self, url): | |
71 | url, smuggled_data = unsmuggle_url(url, {}) | |
72 | ||
73 | mobj = re.match(self._VALID_URL, url) | |
74 | video_id = mobj.group('id') | |
75 | anime_id = mobj.group('anime_id') | |
76 | webpage = self._download_webpage(url, video_id) | |
77 | ||
78 | if 'anime/' not in url: | |
79 | cid = compat_parse_qs(self._search_regex( | |
80 | [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', | |
81 | r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], | |
82 | webpage, 'player parameters'))['cid'][0] | |
83 | else: | |
84 | if 'no_bangumi_tip' not in smuggled_data: | |
85 | self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( | |
86 | video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) | |
87 | headers = { | |
88 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
89 | } | |
90 | headers.update(self.geo_verification_headers()) | |
91 | ||
92 | js = self._download_json( | |
93 | 'http://bangumi.bilibili.com/web_api/get_source', video_id, | |
94 | data=urlencode_postdata({'episode_id': video_id}), | |
95 | headers=headers) | |
96 | if 'result' not in js: | |
97 | self._report_error(js) | |
98 | cid = js['result']['cid'] | |
99 | ||
100 | payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) | |
101 | sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | |
102 | ||
103 | video_info = self._download_json( | |
104 | 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), | |
105 | video_id, note='Downloading video info page', | |
106 | headers=self.geo_verification_headers()) | |
107 | ||
108 | if 'durl' not in video_info: | |
109 | self._report_error(video_info) | |
110 | ||
111 | entries = [] | |
112 | ||
113 | for idx, durl in enumerate(video_info['durl']): | |
114 | formats = [{ | |
115 | 'url': durl['url'], | |
116 | 'filesize': int_or_none(durl['size']), | |
117 | }] | |
118 | for backup_url in durl.get('backup_url', []): | |
119 | formats.append({ | |
120 | 'url': backup_url, | |
121 | # backup URLs have lower priorities | |
122 | 'preference': -2 if 'hd.mp4' in backup_url else -3, | |
123 | }) | |
124 | ||
125 | for a_format in formats: | |
126 | a_format.setdefault('http_headers', {}).update({ | |
127 | 'Referer': url, | |
128 | }) | |
129 | ||
130 | self._sort_formats(formats) | |
131 | ||
132 | entries.append({ | |
133 | 'id': '%s_part%s' % (video_id, idx), | |
134 | 'duration': float_or_none(durl.get('length'), 1000), | |
135 | 'formats': formats, | |
136 | }) | |
137 | ||
138 | title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') | |
139 | description = self._html_search_meta('description', webpage) | |
140 | timestamp = unified_timestamp(self._html_search_regex( | |
141 | r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) | |
142 | thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) | |
143 | ||
144 | # TODO 'view_count' requires deobfuscating Javascript | |
145 | info = { | |
146 | 'id': video_id, | |
147 | 'title': title, | |
148 | 'description': description, | |
149 | 'timestamp': timestamp, | |
150 | 'thumbnail': thumbnail, | |
151 | 'duration': float_or_none(video_info.get('timelength'), scale=1000), | |
152 | } | |
153 | ||
154 | uploader_mobj = re.search( | |
155 | r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', | |
156 | webpage) | |
157 | if uploader_mobj: | |
158 | info.update({ | |
159 | 'uploader': uploader_mobj.group('name'), | |
160 | 'uploader_id': uploader_mobj.group('id'), | |
161 | }) | |
162 | ||
163 | for entry in entries: | |
164 | entry.update(info) | |
165 | ||
166 | if len(entries) == 1: | |
167 | return entries[0] | |
168 | else: | |
169 | for idx, entry in enumerate(entries): | |
170 | entry['id'] = '%s_part%d' % (video_id, (idx + 1)) | |
171 | ||
172 | return { | |
173 | '_type': 'multi_video', | |
174 | 'id': video_id, | |
175 | 'title': title, | |
176 | 'description': description, | |
177 | 'entries': entries, | |
178 | } | |
179 | ||
180 | ||
181 | class BiliBiliBangumiIE(InfoExtractor): | |
182 | _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' | |
183 | ||
184 | IE_NAME = 'bangumi.bilibili.com' | |
185 | IE_DESC = 'BiliBili番剧' | |
186 | ||
187 | _TESTS = [{ | |
188 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
189 | 'info_dict': { | |
190 | 'id': '1869', | |
191 | 'title': '混沌武士', | |
192 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
193 | }, | |
194 | 'playlist_count': 26, | |
195 | }, { | |
196 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
197 | 'info_dict': { | |
198 | 'id': '1869', | |
199 | 'title': '混沌武士', | |
200 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
201 | }, | |
202 | 'playlist': [{ | |
203 | 'md5': '91da8621454dd58316851c27c68b0c13', | |
204 | 'info_dict': { | |
205 | 'id': '40062', | |
206 | 'ext': 'mp4', | |
207 | 'title': '混沌武士', | |
208 | 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', | |
209 | 'timestamp': 1414538739, | |
210 | 'upload_date': '20141028', | |
211 | 'episode': '疾风怒涛 Tempestuous Temperaments', | |
212 | 'episode_number': 1, | |
213 | }, | |
214 | }], | |
215 | 'params': { | |
216 | 'playlist_items': '1', | |
217 | }, | |
218 | }] | |
219 | ||
220 | @classmethod | |
221 | def suitable(cls, url): | |
222 | return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) | |
223 | ||
224 | def _real_extract(self, url): | |
225 | bangumi_id = self._match_id(url) | |
226 | ||
227 | # Sometimes this API returns a JSONP response | |
228 | season_info = self._download_json( | |
229 | 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, | |
230 | bangumi_id, transform_source=strip_jsonp)['result'] | |
231 | ||
232 | entries = [{ | |
233 | '_type': 'url_transparent', | |
234 | 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), | |
235 | 'ie_key': BiliBiliIE.ie_key(), | |
236 | 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), | |
237 | 'episode': episode.get('index_title'), | |
238 | 'episode_number': int_or_none(episode.get('index')), | |
239 | } for episode in season_info['episodes']] | |
240 | ||
241 | entries = sorted(entries, key=lambda entry: entry.get('episode_number')) | |
242 | ||
243 | return self.playlist_result( | |
244 | entries, bangumi_id, | |
245 | season_info.get('bangumi_title'), season_info.get('evaluate')) |