]>
Commit | Line | Data |
---|---|---|
28746fbd PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
04b32c8f | 4 | import hashlib |
520e7533 | 5 | import re |
28746fbd PH |
6 | |
7 | from .common import InfoExtractor | |
bd8f48c7 YCH |
8 | from ..compat import ( |
9 | compat_parse_qs, | |
10 | compat_urlparse, | |
11 | ) | |
28746fbd | 12 | from ..utils import ( |
bd8f48c7 | 13 | ExtractorError, |
6461f2b7 YCH |
14 | int_or_none, |
15 | float_or_none, | |
bd8f48c7 YCH |
16 | parse_iso8601, |
17 | smuggle_url, | |
18 | strip_jsonp, | |
04b32c8f | 19 | unified_timestamp, |
bd8f48c7 | 20 | unsmuggle_url, |
1f85029d | 21 | urlencode_postdata, |
28746fbd PH |
22 | ) |
23 | ||
24 | ||
25 | class BiliBiliIE(InfoExtractor): | |
bd8f48c7 | 26 | _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' |
28746fbd | 27 | |
bd8f48c7 | 28 | _TESTS = [{ |
28746fbd | 29 | 'url': 'http://www.bilibili.tv/video/av1074402/', |
3526c304 | 30 | 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788', |
28746fbd | 31 | 'info_dict': { |
04b32c8f | 32 | 'id': '1074402', |
3526c304 | 33 | 'ext': 'flv', |
28746fbd | 34 | 'title': '【金坷垃】金泡沫', |
6461f2b7 | 35 | 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', |
3526c304 S |
36 | 'duration': 308.067, |
37 | 'timestamp': 1398012678, | |
28746fbd | 38 | 'upload_date': '20140420', |
ec85ded8 | 39 | 'thumbnail': r're:^https?://.+\.jpg', |
d90e4030 | 40 | 'uploader': '菊子桑', |
6461f2b7 | 41 | 'uploader_id': '156160', |
28746fbd | 42 | }, |
bd8f48c7 YCH |
43 | }, { |
44 | # Tested in BiliBiliBangumiIE | |
45 | 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', | |
46 | 'only_matching': True, | |
47 | }, { | |
48 | 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', | |
49 | 'md5': '3f721ad1e75030cc06faf73587cfec57', | |
50 | 'info_dict': { | |
51 | 'id': '100643', | |
52 | 'ext': 'mp4', | |
53 | 'title': 'CHAOS;CHILD', | |
54 | 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', | |
55 | }, | |
56 | 'skip': 'Geo-restricted to China', | |
ca270371 YCH |
57 | }, { |
58 | # Title with double quotes | |
59 | 'url': 'http://www.bilibili.com/video/av8903802/', | |
60 | 'info_dict': { | |
61 | 'id': '8903802', | |
ca270371 YCH |
62 | 'title': '阿滴英文|英文歌分享#6 "Closer', |
63 | 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文', | |
ca270371 | 64 | }, |
3526c304 S |
65 | 'playlist': [{ |
66 | 'info_dict': { | |
67 | 'id': '8903802_part1', | |
68 | 'ext': 'flv', | |
69 | 'title': '阿滴英文|英文歌分享#6 "Closer', | |
70 | 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | |
71 | 'uploader': '阿滴英文', | |
72 | 'uploader_id': '65880958', | |
73 | 'timestamp': 1488382634, | |
74 | 'upload_date': '20170301', | |
75 | }, | |
76 | 'params': { | |
77 | 'skip_download': True, # Test metadata only | |
78 | }, | |
79 | }, { | |
80 | 'info_dict': { | |
81 | 'id': '8903802_part2', | |
82 | 'ext': 'flv', | |
83 | 'title': '阿滴英文|英文歌分享#6 "Closer', | |
84 | 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a', | |
85 | 'uploader': '阿滴英文', | |
86 | 'uploader_id': '65880958', | |
87 | 'timestamp': 1488382634, | |
88 | 'upload_date': '20170301', | |
89 | }, | |
90 | 'params': { | |
91 | 'skip_download': True, # Test metadata only | |
92 | }, | |
93 | }] | |
bd8f48c7 | 94 | }] |
28746fbd | 95 | |
baa3e184 RA |
96 | _APP_KEY = '84956560bc028eb7' |
97 | _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' | |
6461f2b7 | 98 | |
bd8f48c7 YCH |
99 | def _report_error(self, result): |
100 | if 'message' in result: | |
101 | raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) | |
102 | elif 'code' in result: | |
103 | raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) | |
104 | else: | |
105 | raise ExtractorError('Can\'t extract Bangumi episode ID') | |
106 | ||
520e7533 | 107 | def _real_extract(self, url): |
bd8f48c7 YCH |
108 | url, smuggled_data = unsmuggle_url(url, {}) |
109 | ||
110 | mobj = re.match(self._VALID_URL, url) | |
111 | video_id = mobj.group('id') | |
112 | anime_id = mobj.group('anime_id') | |
6461f2b7 YCH |
113 | webpage = self._download_webpage(url, video_id) |
114 | ||
bd8f48c7 | 115 | if 'anime/' not in url: |
3526c304 S |
116 | cid = self._search_regex( |
117 | r'cid(?:["\']:|=)(\d+)', webpage, 'cid', | |
118 | default=None | |
119 | ) or compat_parse_qs(self._search_regex( | |
95a1322b S |
120 | [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', |
121 | r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)', | |
122 | r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], | |
7be15d40 P |
123 | webpage, 'player parameters'))['cid'][0] |
124 | else: | |
bd8f48c7 YCH |
125 | if 'no_bangumi_tip' not in smuggled_data: |
126 | self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( | |
127 | video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) | |
128 | headers = { | |
129 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
3a513f29 | 130 | 'Referer': url |
bd8f48c7 YCH |
131 | } |
132 | headers.update(self.geo_verification_headers()) | |
133 | ||
1f85029d YCH |
134 | js = self._download_json( |
135 | 'http://bangumi.bilibili.com/web_api/get_source', video_id, | |
136 | data=urlencode_postdata({'episode_id': video_id}), | |
bd8f48c7 YCH |
137 | headers=headers) |
138 | if 'result' not in js: | |
139 | self._report_error(js) | |
7be15d40 | 140 | cid = js['result']['cid'] |
04b32c8f | 141 | |
3a513f29 LS |
142 | headers = { |
143 | 'Referer': url | |
144 | } | |
145 | headers.update(self.geo_verification_headers()) | |
146 | ||
d90e4030 | 147 | entries = [] |
c4a21bc9 | 148 | |
3526c304 S |
149 | RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4') |
150 | for num, rendition in enumerate(RENDITIONS, start=1): | |
151 | payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition) | |
152 | sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | |
153 | ||
154 | video_info = self._download_json( | |
d1239608 | 155 | 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign), |
3526c304 S |
156 | video_id, note='Downloading video info page', |
157 | headers=headers, fatal=num == len(RENDITIONS)) | |
158 | ||
159 | if not video_info: | |
160 | continue | |
161 | ||
162 | if 'durl' not in video_info: | |
163 | if num < len(RENDITIONS): | |
164 | continue | |
165 | self._report_error(video_info) | |
166 | ||
167 | for idx, durl in enumerate(video_info['durl']): | |
168 | formats = [{ | |
169 | 'url': durl['url'], | |
170 | 'filesize': int_or_none(durl['size']), | |
171 | }] | |
172 | for backup_url in durl.get('backup_url', []): | |
173 | formats.append({ | |
174 | 'url': backup_url, | |
175 | # backup URLs have lower priorities | |
176 | 'preference': -2 if 'hd.mp4' in backup_url else -3, | |
177 | }) | |
178 | ||
179 | for a_format in formats: | |
180 | a_format.setdefault('http_headers', {}).update({ | |
181 | 'Referer': url, | |
182 | }) | |
183 | ||
184 | self._sort_formats(formats) | |
185 | ||
186 | entries.append({ | |
187 | 'id': '%s_part%s' % (video_id, idx), | |
188 | 'duration': float_or_none(durl.get('length'), 1000), | |
189 | 'formats': formats, | |
6461f2b7 | 190 | }) |
3526c304 | 191 | break |
6461f2b7 | 192 | |
3526c304 S |
193 | title = self._html_search_regex( |
194 | ('<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1', | |
195 | '(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title', | |
196 | group='title') | |
6461f2b7 | 197 | description = self._html_search_meta('description', webpage) |
04b32c8f | 198 | timestamp = unified_timestamp(self._html_search_regex( |
3526c304 S |
199 | r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', |
200 | default=None) or self._html_search_meta( | |
201 | 'uploadDate', webpage, 'timestamp', default=None)) | |
1f85029d | 202 | thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) |
6461f2b7 YCH |
203 | |
204 | # TODO 'view_count' requires deobfuscating Javascript | |
d90e4030 | 205 | info = { |
04b32c8f | 206 | 'id': video_id, |
d90e4030 | 207 | 'title': title, |
6461f2b7 YCH |
208 | 'description': description, |
209 | 'timestamp': timestamp, | |
7be15d40 | 210 | 'thumbnail': thumbnail, |
04b32c8f | 211 | 'duration': float_or_none(video_info.get('timelength'), scale=1000), |
28746fbd | 212 | } |
d90e4030 | 213 | |
6461f2b7 | 214 | uploader_mobj = re.search( |
3526c304 | 215 | r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>(?P<name>[^<]+)', |
6461f2b7 YCH |
216 | webpage) |
217 | if uploader_mobj: | |
218 | info.update({ | |
219 | 'uploader': uploader_mobj.group('name'), | |
220 | 'uploader_id': uploader_mobj.group('id'), | |
221 | }) | |
3526c304 S |
222 | if not info.get('uploader'): |
223 | info['uploader'] = self._html_search_meta( | |
224 | 'author', webpage, 'uploader', default=None) | |
6461f2b7 YCH |
225 | |
226 | for entry in entries: | |
227 | entry.update(info) | |
228 | ||
d90e4030 | 229 | if len(entries) == 1: |
d90e4030 | 230 | return entries[0] |
231 | else: | |
ad73083f YCH |
232 | for idx, entry in enumerate(entries): |
233 | entry['id'] = '%s_part%d' % (video_id, (idx + 1)) | |
234 | ||
6461f2b7 | 235 | return { |
d90e4030 | 236 | '_type': 'multi_video', |
520e7533 | 237 | 'id': video_id, |
6461f2b7 YCH |
238 | 'title': title, |
239 | 'description': description, | |
d90e4030 | 240 | 'entries': entries, |
6461f2b7 | 241 | } |
bd8f48c7 YCH |
242 | |
243 | ||
244 | class BiliBiliBangumiIE(InfoExtractor): | |
245 | _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' | |
246 | ||
247 | IE_NAME = 'bangumi.bilibili.com' | |
248 | IE_DESC = 'BiliBili番剧' | |
249 | ||
250 | _TESTS = [{ | |
251 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
252 | 'info_dict': { | |
253 | 'id': '1869', | |
254 | 'title': '混沌武士', | |
255 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
256 | }, | |
257 | 'playlist_count': 26, | |
258 | }, { | |
259 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
260 | 'info_dict': { | |
261 | 'id': '1869', | |
262 | 'title': '混沌武士', | |
263 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
264 | }, | |
265 | 'playlist': [{ | |
266 | 'md5': '91da8621454dd58316851c27c68b0c13', | |
267 | 'info_dict': { | |
268 | 'id': '40062', | |
269 | 'ext': 'mp4', | |
270 | 'title': '混沌武士', | |
271 | 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', | |
272 | 'timestamp': 1414538739, | |
273 | 'upload_date': '20141028', | |
274 | 'episode': '疾风怒涛 Tempestuous Temperaments', | |
275 | 'episode_number': 1, | |
276 | }, | |
277 | }], | |
278 | 'params': { | |
279 | 'playlist_items': '1', | |
280 | }, | |
281 | }] | |
282 | ||
283 | @classmethod | |
284 | def suitable(cls, url): | |
285 | return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) | |
286 | ||
287 | def _real_extract(self, url): | |
288 | bangumi_id = self._match_id(url) | |
289 | ||
290 | # Sometimes this API returns a JSONP response | |
291 | season_info = self._download_json( | |
292 | 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, | |
293 | bangumi_id, transform_source=strip_jsonp)['result'] | |
294 | ||
295 | entries = [{ | |
296 | '_type': 'url_transparent', | |
297 | 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), | |
298 | 'ie_key': BiliBiliIE.ie_key(), | |
299 | 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), | |
300 | 'episode': episode.get('index_title'), | |
301 | 'episode_number': int_or_none(episode.get('index')), | |
302 | } for episode in season_info['episodes']] | |
303 | ||
304 | entries = sorted(entries, key=lambda entry: entry.get('episode_number')) | |
305 | ||
306 | return self.playlist_result( | |
307 | entries, bangumi_id, | |
308 | season_info.get('bangumi_title'), season_info.get('evaluate')) |