]>
Commit | Line | Data |
---|---|---|
28746fbd PH |
1 | # coding: utf-8 |
2 | from __future__ import unicode_literals | |
3 | ||
04b32c8f | 4 | import hashlib |
520e7533 | 5 | import re |
28746fbd PH |
6 | |
7 | from .common import InfoExtractor | |
bd8f48c7 YCH |
8 | from ..compat import ( |
9 | compat_parse_qs, | |
10 | compat_urlparse, | |
11 | ) | |
28746fbd | 12 | from ..utils import ( |
bd8f48c7 | 13 | ExtractorError, |
6461f2b7 YCH |
14 | int_or_none, |
15 | float_or_none, | |
bd8f48c7 YCH |
16 | parse_iso8601, |
17 | smuggle_url, | |
18 | strip_jsonp, | |
04b32c8f | 19 | unified_timestamp, |
bd8f48c7 | 20 | unsmuggle_url, |
1f85029d | 21 | urlencode_postdata, |
28746fbd PH |
22 | ) |
23 | ||
24 | ||
25 | class BiliBiliIE(InfoExtractor): | |
bd8f48c7 | 26 | _VALID_URL = r'https?://(?:www\.|bangumi\.|)bilibili\.(?:tv|com)/(?:video/av|anime/(?P<anime_id>\d+)/play#)(?P<id>\d+)' |
28746fbd | 27 | |
bd8f48c7 | 28 | _TESTS = [{ |
28746fbd | 29 | 'url': 'http://www.bilibili.tv/video/av1074402/', |
412abb87 | 30 | 'md5': '9fa226fe2b8a9a4d5a69b4c6a183417e', |
28746fbd | 31 | 'info_dict': { |
04b32c8f | 32 | 'id': '1074402', |
412abb87 | 33 | 'ext': 'mp4', |
28746fbd | 34 | 'title': '【金坷垃】金泡沫', |
6461f2b7 | 35 | 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923', |
412abb87 | 36 | 'duration': 308.315, |
6461f2b7 | 37 | 'timestamp': 1398012660, |
28746fbd | 38 | 'upload_date': '20140420', |
ec85ded8 | 39 | 'thumbnail': r're:^https?://.+\.jpg', |
d90e4030 | 40 | 'uploader': '菊子桑', |
6461f2b7 | 41 | 'uploader_id': '156160', |
28746fbd | 42 | }, |
bd8f48c7 YCH |
43 | }, { |
44 | # Tested in BiliBiliBangumiIE | |
45 | 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062', | |
46 | 'only_matching': True, | |
47 | }, { | |
48 | 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643', | |
49 | 'md5': '3f721ad1e75030cc06faf73587cfec57', | |
50 | 'info_dict': { | |
51 | 'id': '100643', | |
52 | 'ext': 'mp4', | |
53 | 'title': 'CHAOS;CHILD', | |
54 | 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...', | |
55 | }, | |
56 | 'skip': 'Geo-restricted to China', | |
57 | }] | |
28746fbd | 58 | |
baa3e184 RA |
59 | _APP_KEY = '84956560bc028eb7' |
60 | _BILIBILI_KEY = '94aba54af9065f71de72f5508f1cd42e' | |
6461f2b7 | 61 | |
bd8f48c7 YCH |
62 | def _report_error(self, result): |
63 | if 'message' in result: | |
64 | raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True) | |
65 | elif 'code' in result: | |
66 | raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True) | |
67 | else: | |
68 | raise ExtractorError('Can\'t extract Bangumi episode ID') | |
69 | ||
520e7533 | 70 | def _real_extract(self, url): |
bd8f48c7 YCH |
71 | url, smuggled_data = unsmuggle_url(url, {}) |
72 | ||
73 | mobj = re.match(self._VALID_URL, url) | |
74 | video_id = mobj.group('id') | |
75 | anime_id = mobj.group('anime_id') | |
6461f2b7 YCH |
76 | webpage = self._download_webpage(url, video_id) |
77 | ||
bd8f48c7 | 78 | if 'anime/' not in url: |
7be15d40 P |
79 | cid = compat_parse_qs(self._search_regex( |
80 | [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)', | |
1f85029d | 81 | r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'], |
7be15d40 P |
82 | webpage, 'player parameters'))['cid'][0] |
83 | else: | |
bd8f48c7 YCH |
84 | if 'no_bangumi_tip' not in smuggled_data: |
85 | self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run youtube-dl with %s' % ( | |
86 | video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id))) | |
87 | headers = { | |
88 | 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8', | |
89 | } | |
90 | headers.update(self.geo_verification_headers()) | |
91 | ||
1f85029d YCH |
92 | js = self._download_json( |
93 | 'http://bangumi.bilibili.com/web_api/get_source', video_id, | |
94 | data=urlencode_postdata({'episode_id': video_id}), | |
bd8f48c7 YCH |
95 | headers=headers) |
96 | if 'result' not in js: | |
97 | self._report_error(js) | |
7be15d40 | 98 | cid = js['result']['cid'] |
04b32c8f YCH |
99 | |
100 | payload = 'appkey=%s&cid=%s&otype=json&quality=2&type=mp4' % (self._APP_KEY, cid) | |
101 | sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest() | |
6d00a2dc | 102 | |
04b32c8f YCH |
103 | video_info = self._download_json( |
104 | 'http://interface.bilibili.com/playurl?%s&sign=%s' % (payload, sign), | |
bd8f48c7 YCH |
105 | video_id, note='Downloading video info page', |
106 | headers=self.geo_verification_headers()) | |
107 | ||
108 | if 'durl' not in video_info: | |
109 | self._report_error(video_info) | |
28746fbd | 110 | |
d90e4030 | 111 | entries = [] |
c4a21bc9 | 112 | |
04b32c8f | 113 | for idx, durl in enumerate(video_info['durl']): |
497f5fd9 | 114 | formats = [{ |
04b32c8f YCH |
115 | 'url': durl['url'], |
116 | 'filesize': int_or_none(durl['size']), | |
497f5fd9 | 117 | }] |
86d68f90 | 118 | for backup_url in durl.get('backup_url', []): |
6461f2b7 | 119 | formats.append({ |
04b32c8f | 120 | 'url': backup_url, |
6461f2b7 | 121 | # backup URLs have lower priorities |
04b32c8f | 122 | 'preference': -2 if 'hd.mp4' in backup_url else -3, |
6461f2b7 YCH |
123 | }) |
124 | ||
228cd9bb YCH |
125 | for a_format in formats: |
126 | a_format.setdefault('http_headers', {}).update({ | |
127 | 'Referer': url, | |
128 | }) | |
129 | ||
6461f2b7 | 130 | self._sort_formats(formats) |
497f5fd9 | 131 | |
c4a21bc9 | 132 | entries.append({ |
04b32c8f YCH |
133 | 'id': '%s_part%s' % (video_id, idx), |
134 | 'duration': float_or_none(durl.get('length'), 1000), | |
55af2b26 | 135 | 'formats': formats, |
58a84b8c | 136 | }) |
28746fbd | 137 | |
6461f2b7 YCH |
138 | title = self._html_search_regex('<h1[^>]+title="([^"]+)">', webpage, 'title') |
139 | description = self._html_search_meta('description', webpage) | |
04b32c8f | 140 | timestamp = unified_timestamp(self._html_search_regex( |
bd8f48c7 | 141 | r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time', default=None)) |
1f85029d | 142 | thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage) |
6461f2b7 YCH |
143 | |
144 | # TODO 'view_count' requires deobfuscating Javascript | |
d90e4030 | 145 | info = { |
04b32c8f | 146 | 'id': video_id, |
d90e4030 | 147 | 'title': title, |
6461f2b7 YCH |
148 | 'description': description, |
149 | 'timestamp': timestamp, | |
7be15d40 | 150 | 'thumbnail': thumbnail, |
04b32c8f | 151 | 'duration': float_or_none(video_info.get('timelength'), scale=1000), |
28746fbd | 152 | } |
d90e4030 | 153 | |
6461f2b7 | 154 | uploader_mobj = re.search( |
bd8f48c7 | 155 | r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]+title="(?P<name>[^"]+)"', |
6461f2b7 YCH |
156 | webpage) |
157 | if uploader_mobj: | |
158 | info.update({ | |
159 | 'uploader': uploader_mobj.group('name'), | |
160 | 'uploader_id': uploader_mobj.group('id'), | |
161 | }) | |
162 | ||
163 | for entry in entries: | |
164 | entry.update(info) | |
165 | ||
d90e4030 | 166 | if len(entries) == 1: |
d90e4030 | 167 | return entries[0] |
168 | else: | |
ad73083f YCH |
169 | for idx, entry in enumerate(entries): |
170 | entry['id'] = '%s_part%d' % (video_id, (idx + 1)) | |
171 | ||
6461f2b7 | 172 | return { |
d90e4030 | 173 | '_type': 'multi_video', |
520e7533 | 174 | 'id': video_id, |
6461f2b7 YCH |
175 | 'title': title, |
176 | 'description': description, | |
d90e4030 | 177 | 'entries': entries, |
6461f2b7 | 178 | } |
bd8f48c7 YCH |
179 | |
180 | ||
181 | class BiliBiliBangumiIE(InfoExtractor): | |
182 | _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)' | |
183 | ||
184 | IE_NAME = 'bangumi.bilibili.com' | |
185 | IE_DESC = 'BiliBili番剧' | |
186 | ||
187 | _TESTS = [{ | |
188 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
189 | 'info_dict': { | |
190 | 'id': '1869', | |
191 | 'title': '混沌武士', | |
192 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
193 | }, | |
194 | 'playlist_count': 26, | |
195 | }, { | |
196 | 'url': 'http://bangumi.bilibili.com/anime/1869', | |
197 | 'info_dict': { | |
198 | 'id': '1869', | |
199 | 'title': '混沌武士', | |
200 | 'description': 'md5:6a9622b911565794c11f25f81d6a97d2', | |
201 | }, | |
202 | 'playlist': [{ | |
203 | 'md5': '91da8621454dd58316851c27c68b0c13', | |
204 | 'info_dict': { | |
205 | 'id': '40062', | |
206 | 'ext': 'mp4', | |
207 | 'title': '混沌武士', | |
208 | 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...', | |
209 | 'timestamp': 1414538739, | |
210 | 'upload_date': '20141028', | |
211 | 'episode': '疾风怒涛 Tempestuous Temperaments', | |
212 | 'episode_number': 1, | |
213 | }, | |
214 | }], | |
215 | 'params': { | |
216 | 'playlist_items': '1', | |
217 | }, | |
218 | }] | |
219 | ||
220 | @classmethod | |
221 | def suitable(cls, url): | |
222 | return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url) | |
223 | ||
224 | def _real_extract(self, url): | |
225 | bangumi_id = self._match_id(url) | |
226 | ||
227 | # Sometimes this API returns a JSONP response | |
228 | season_info = self._download_json( | |
229 | 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id, | |
230 | bangumi_id, transform_source=strip_jsonp)['result'] | |
231 | ||
232 | entries = [{ | |
233 | '_type': 'url_transparent', | |
234 | 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}), | |
235 | 'ie_key': BiliBiliIE.ie_key(), | |
236 | 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '), | |
237 | 'episode': episode.get('index_title'), | |
238 | 'episode_number': int_or_none(episode.get('index')), | |
239 | } for episode in season_info['episodes']] | |
240 | ||
241 | entries = sorted(entries, key=lambda entry: entry.get('episode_number')) | |
242 | ||
243 | return self.playlist_result( | |
244 | entries, bangumi_id, | |
245 | season_info.get('bangumi_title'), season_info.get('evaluate')) |