]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[Hungama] Fix `HungamaSongIE` and add `HungamaAlbumPlaylistIE` (#744)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
28746fbd
PH
1# coding: utf-8
2from __future__ import unicode_literals
3
04b32c8f 4import hashlib
6efb0711 5import itertools
06167fbb 6import json
520e7533 7import re
28746fbd 8
06167fbb 9from .common import InfoExtractor, SearchInfoExtractor
bd8f48c7 10from ..compat import (
adc74b3c 11 compat_str,
bd8f48c7
YCH
12 compat_parse_qs,
13 compat_urlparse,
14)
28746fbd 15from ..utils import (
bd8f48c7 16 ExtractorError,
6461f2b7
YCH
17 int_or_none,
18 float_or_none,
bd8f48c7 19 parse_iso8601,
adc74b3c 20 try_get,
bd8f48c7 21 smuggle_url,
4bc15a68 22 str_or_none,
bd8f48c7 23 strip_jsonp,
04b32c8f 24 unified_timestamp,
bd8f48c7 25 unsmuggle_url,
1f85029d 26 urlencode_postdata,
28746fbd
PH
27)
28
29
30class BiliBiliIE(InfoExtractor):
b4eb08bb
S
31 _VALID_URL = r'''(?x)
32 https?://
33 (?:(?:www|bangumi)\.)?
34 bilibili\.(?:tv|com)/
35 (?:
36 (?:
37 video/[aA][vV]|
38 anime/(?P<anime_id>\d+)/play\#
06167fbb 39 )(?P<id>\d+)|
9536bc07 40 (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
b4eb08bb 41 )
06167fbb 42 (?:/?\?p=(?P<page>\d+))?
b4eb08bb 43 '''
28746fbd 44
bd8f48c7 45 _TESTS = [{
06167fbb 46 'url': 'http://www.bilibili.com/video/av1074402/',
3526c304 47 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
28746fbd 48 'info_dict': {
04b32c8f 49 'id': '1074402',
3526c304 50 'ext': 'flv',
28746fbd 51 'title': '【金坷垃】金泡沫',
6461f2b7 52 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
3526c304
S
53 'duration': 308.067,
54 'timestamp': 1398012678,
28746fbd 55 'upload_date': '20140420',
ec85ded8 56 'thumbnail': r're:^https?://.+\.jpg',
d90e4030 57 'uploader': '菊子桑',
6461f2b7 58 'uploader_id': '156160',
28746fbd 59 },
bd8f48c7
YCH
60 }, {
61 # Tested in BiliBiliBangumiIE
62 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
63 'only_matching': True,
06167fbb 64 }, {
65 # bilibili.tv
66 'url': 'http://www.bilibili.tv/video/av1074402/',
67 'only_matching': True,
bd8f48c7
YCH
68 }, {
69 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
70 'md5': '3f721ad1e75030cc06faf73587cfec57',
71 'info_dict': {
72 'id': '100643',
73 'ext': 'mp4',
74 'title': 'CHAOS;CHILD',
75 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
76 },
77 'skip': 'Geo-restricted to China',
ca270371
YCH
78 }, {
79 # Title with double quotes
80 'url': 'http://www.bilibili.com/video/av8903802/',
81 'info_dict': {
82 'id': '8903802',
ca270371
YCH
83 'title': '阿滴英文|英文歌分享#6 "Closer',
84 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
ca270371 85 },
3526c304
S
86 'playlist': [{
87 'info_dict': {
88 'id': '8903802_part1',
89 'ext': 'flv',
90 'title': '阿滴英文|英文歌分享#6 "Closer',
91 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
92 'uploader': '阿滴英文',
93 'uploader_id': '65880958',
94 'timestamp': 1488382634,
95 'upload_date': '20170301',
96 },
97 'params': {
98 'skip_download': True, # Test metadata only
99 },
100 }, {
101 'info_dict': {
102 'id': '8903802_part2',
103 'ext': 'flv',
104 'title': '阿滴英文|英文歌分享#6 "Closer',
105 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
106 'uploader': '阿滴英文',
107 'uploader_id': '65880958',
108 'timestamp': 1488382634,
109 'upload_date': '20170301',
110 },
111 'params': {
112 'skip_download': True, # Test metadata only
113 },
114 }]
b4eb08bb
S
115 }, {
116 # new BV video id format
117 'url': 'https://www.bilibili.com/video/BV1JE411F741',
118 'only_matching': True,
adc74b3c 119 }, {
120 # Anthology
121 'url': 'https://www.bilibili.com/video/BV1bK411W797',
122 'info_dict': {
123 'id': 'BV1bK411W797',
7e60c069 124 'title': '物语中的人物是如何吐槽自己的OP的'
adc74b3c 125 },
126 'playlist_count': 17,
bd8f48c7 127 }]
28746fbd 128
c9a0ea6e
S
129 _APP_KEY = 'iVGUTjsxvpLeuDCf'
130 _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
6461f2b7 131
bd8f48c7
YCH
132 def _report_error(self, result):
133 if 'message' in result:
134 raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
135 elif 'code' in result:
136 raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
137 else:
138 raise ExtractorError('Can\'t extract Bangumi episode ID')
139
520e7533 140 def _real_extract(self, url):
bd8f48c7
YCH
141 url, smuggled_data = unsmuggle_url(url, {})
142
143 mobj = re.match(self._VALID_URL, url)
06167fbb 144 video_id = mobj.group('id_bv') or mobj.group('id')
145
146 av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
147 video_id = av_id
148
bd8f48c7 149 anime_id = mobj.group('anime_id')
06167fbb 150 page_id = mobj.group('page')
6461f2b7
YCH
151 webpage = self._download_webpage(url, video_id)
152
adc74b3c 153 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
154 # If the video has no page argument, check to see if it's an anthology
155 if page_id is None:
a06916d9 156 if not self.get_param('noplaylist'):
adc74b3c 157 r = self._extract_anthology_entries(bv_id, video_id, webpage)
158 if r is not None:
159 self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
160 return r
7e60c069 161 else:
162 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
adc74b3c 163
bd8f48c7 164 if 'anime/' not in url:
3526c304 165 cid = self._search_regex(
adc74b3c 166 r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + compat_str(page_id), webpage, 'cid',
06167fbb 167 default=None
168 ) or self._search_regex(
61cb6683 169 r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
3526c304
S
170 default=None
171 ) or compat_parse_qs(self._search_regex(
95a1322b
S
172 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
173 r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
174 r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
7be15d40
P
175 webpage, 'player parameters'))['cid'][0]
176 else:
bd8f48c7 177 if 'no_bangumi_tip' not in smuggled_data:
7a5c1cfe 178 self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
bd8f48c7 179 video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
10db0d2f 180 headers = {
181 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
182 'Referer': url
183 }
184 headers.update(self.geo_verification_headers())
bd8f48c7 185
1f85029d
YCH
186 js = self._download_json(
187 'http://bangumi.bilibili.com/web_api/get_source', video_id,
188 data=urlencode_postdata({'episode_id': video_id}),
bd8f48c7
YCH
189 headers=headers)
190 if 'result' not in js:
191 self._report_error(js)
7be15d40 192 cid = js['result']['cid']
04b32c8f 193
10db0d2f 194 headers = {
195 'Accept': 'application/json',
196 'Referer': url
197 }
198 headers.update(self.geo_verification_headers())
199
d90e4030 200 entries = []
c4a21bc9 201
3526c304
S
202 RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
203 for num, rendition in enumerate(RENDITIONS, start=1):
204 payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
205 sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
206
207 video_info = self._download_json(
d1239608 208 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
3526c304
S
209 video_id, note='Downloading video info page',
210 headers=headers, fatal=num == len(RENDITIONS))
211
212 if not video_info:
213 continue
214
215 if 'durl' not in video_info:
216 if num < len(RENDITIONS):
217 continue
218 self._report_error(video_info)
219
220 for idx, durl in enumerate(video_info['durl']):
221 formats = [{
222 'url': durl['url'],
223 'filesize': int_or_none(durl['size']),
224 }]
225 for backup_url in durl.get('backup_url', []):
226 formats.append({
227 'url': backup_url,
228 # backup URLs have lower priorities
f983b875 229 'quality': -2 if 'hd.mp4' in backup_url else -3,
3526c304
S
230 })
231
232 for a_format in formats:
233 a_format.setdefault('http_headers', {}).update({
234 'Referer': url,
235 })
236
237 self._sort_formats(formats)
238
239 entries.append({
240 'id': '%s_part%s' % (video_id, idx),
241 'duration': float_or_none(durl.get('length'), 1000),
242 'formats': formats,
6461f2b7 243 })
3526c304 244 break
6461f2b7 245
3526c304 246 title = self._html_search_regex(
06167fbb 247 (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
248 r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
adc74b3c 249 group='title')
250
251 # Get part title for anthologies
252 if page_id is not None:
253 # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
254 part_title = try_get(
255 self._download_json(
256 "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
257 video_id, note='Extracting videos in anthology'),
258 lambda x: x['data'][int(page_id) - 1]['part'])
259 title = part_title or title
260
6461f2b7 261 description = self._html_search_meta('description', webpage)
04b32c8f 262 timestamp = unified_timestamp(self._html_search_regex(
3526c304
S
263 r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
264 default=None) or self._html_search_meta(
265 'uploadDate', webpage, 'timestamp', default=None))
1f85029d 266 thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
6461f2b7
YCH
267
268 # TODO 'view_count' requires deobfuscating Javascript
d90e4030 269 info = {
adc74b3c 270 'id': compat_str(video_id) if page_id is None else '%s_p%s' % (video_id, page_id),
06167fbb 271 'cid': cid,
d90e4030 272 'title': title,
6461f2b7
YCH
273 'description': description,
274 'timestamp': timestamp,
7be15d40 275 'thumbnail': thumbnail,
04b32c8f 276 'duration': float_or_none(video_info.get('timelength'), scale=1000),
28746fbd 277 }
d90e4030 278
6461f2b7 279 uploader_mobj = re.search(
7e60c069 280 r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
6461f2b7
YCH
281 webpage)
282 if uploader_mobj:
283 info.update({
ed807c18 284 'uploader': uploader_mobj.group('name').strip(),
6461f2b7
YCH
285 'uploader_id': uploader_mobj.group('id'),
286 })
06167fbb 287
3526c304
S
288 if not info.get('uploader'):
289 info['uploader'] = self._html_search_meta(
290 'author', webpage, 'uploader', default=None)
6461f2b7 291
06167fbb 292 raw_danmaku = self._get_raw_danmaku(video_id, cid)
293
294 raw_tags = self._get_tags(video_id)
295 tags = list(map(lambda x: x['tag_name'], raw_tags))
296
297 top_level_info = {
298 'raw_danmaku': raw_danmaku,
06167fbb 299 'tags': tags,
300 'raw_tags': raw_tags,
301 }
a06916d9 302 if self.get_param('getcomments', False):
277d6ff5 303 def get_comments():
304 comments = self._get_all_comment_pages(video_id)
305 return {
306 'comments': comments,
307 'comment_count': len(comments)
308 }
309
310 top_level_info['__post_extractor'] = get_comments
06167fbb 311
312 '''
313 # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
314 # See https://github.com/animelover1984/youtube-dl
315 danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
316 entries[0]['subtitles'] = {
317 'danmaku': [{
318 'ext': 'ass',
319 'data': danmaku
320 }]
321 }
322 '''
323
6461f2b7
YCH
324 for entry in entries:
325 entry.update(info)
326
d90e4030 327 if len(entries) == 1:
06167fbb 328 entries[0].update(top_level_info)
d90e4030 329 return entries[0]
330 else:
ad73083f
YCH
331 for idx, entry in enumerate(entries):
332 entry['id'] = '%s_part%d' % (video_id, (idx + 1))
333
06167fbb 334 global_info = {
d90e4030 335 '_type': 'multi_video',
adc74b3c 336 'id': compat_str(video_id),
06167fbb 337 'bv_id': bv_id,
6461f2b7
YCH
338 'title': title,
339 'description': description,
d90e4030 340 'entries': entries,
6461f2b7 341 }
bd8f48c7 342
06167fbb 343 global_info.update(info)
344 global_info.update(top_level_info)
345
346 return global_info
347
adc74b3c 348 def _extract_anthology_entries(self, bv_id, video_id, webpage):
349 title = self._html_search_regex(
350 (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
351 r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
352 group='title')
353 json_data = self._download_json(
354 "https://api.bilibili.com/x/player/pagelist?bvid=%s&jsonp=jsonp" % bv_id,
355 video_id, note='Extracting videos in anthology')
356
357 if len(json_data['data']) > 1:
358 return self.playlist_from_matches(
359 json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
360 getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
361
06167fbb 362 def _get_video_id_set(self, id, is_bv):
363 query = {'bvid': id} if is_bv else {'aid': id}
364 response = self._download_json(
365 "http://api.bilibili.cn/x/web-interface/view",
366 id, query=query,
367 note='Grabbing original ID via API')
368
369 if response['code'] == -400:
370 raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
371 elif response['code'] != 0:
372 raise ExtractorError('Unknown error occurred during API check (code %s)' % response['code'], expected=True, video_id=id)
373 return (response['data']['aid'], response['data']['bvid'])
374
375 # recursive solution to getting every page of comments for the video
376 # we can stop when we reach a page without any comments
377 def _get_all_comment_pages(self, video_id, commentPageNumber=0):
378 comment_url = "https://api.bilibili.com/x/v2/reply?jsonp=jsonp&pn=%s&type=1&oid=%s&sort=2&_=1567227301685" % (commentPageNumber, video_id)
379 json_str = self._download_webpage(
380 comment_url, video_id,
381 note='Extracting comments from page %s' % (commentPageNumber))
382 replies = json.loads(json_str)['data']['replies']
383 if replies is None:
384 return []
385 return self._get_all_children(replies) + self._get_all_comment_pages(video_id, commentPageNumber + 1)
386
387 # extracts all comments in the tree
388 def _get_all_children(self, replies):
389 if replies is None:
390 return []
391
392 ret = []
393 for reply in replies:
394 author = reply['member']['uname']
395 author_id = reply['member']['mid']
396 id = reply['rpid']
397 text = reply['content']['message']
398 timestamp = reply['ctime']
399 parent = reply['parent'] if reply['parent'] != 0 else 'root'
400
401 comment = {
402 "author": author,
403 "author_id": author_id,
404 "id": id,
405 "text": text,
406 "timestamp": timestamp,
407 "parent": parent,
408 }
409 ret.append(comment)
410
411 # from the JSON, the comment structure seems arbitrarily deep, but I could be wrong.
412 # Regardless, this should work.
413 ret += self._get_all_children(reply['replies'])
414
415 return ret
416
417 def _get_raw_danmaku(self, video_id, cid):
418 # This will be useful if I decide to scrape all pages instead of doing them individually
419 # cid_url = "https://www.bilibili.com/widget/getPageList?aid=%s" % (video_id)
420 # cid_str = self._download_webpage(cid_url, video_id, note=False)
421 # cid = json.loads(cid_str)[0]['cid']
422
423 danmaku_url = "https://comment.bilibili.com/%s.xml" % (cid)
424 danmaku = self._download_webpage(danmaku_url, video_id, note='Downloading danmaku comments')
425 return danmaku
426
427 def _get_tags(self, video_id):
428 tags_url = "https://api.bilibili.com/x/tag/archive/tags?aid=%s" % (video_id)
429 tags_json = self._download_json(tags_url, video_id, note='Downloading tags')
430 return tags_json['data']
431
bd8f48c7
YCH
432
433class BiliBiliBangumiIE(InfoExtractor):
434 _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
435
436 IE_NAME = 'bangumi.bilibili.com'
437 IE_DESC = 'BiliBili番剧'
438
439 _TESTS = [{
440 'url': 'http://bangumi.bilibili.com/anime/1869',
441 'info_dict': {
442 'id': '1869',
443 'title': '混沌武士',
444 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
445 },
446 'playlist_count': 26,
447 }, {
448 'url': 'http://bangumi.bilibili.com/anime/1869',
449 'info_dict': {
450 'id': '1869',
451 'title': '混沌武士',
452 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
453 },
454 'playlist': [{
455 'md5': '91da8621454dd58316851c27c68b0c13',
456 'info_dict': {
457 'id': '40062',
458 'ext': 'mp4',
459 'title': '混沌武士',
460 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
461 'timestamp': 1414538739,
462 'upload_date': '20141028',
463 'episode': '疾风怒涛 Tempestuous Temperaments',
464 'episode_number': 1,
465 },
466 }],
467 'params': {
468 'playlist_items': '1',
469 },
470 }]
471
472 @classmethod
473 def suitable(cls, url):
474 return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
475
476 def _real_extract(self, url):
477 bangumi_id = self._match_id(url)
478
479 # Sometimes this API returns a JSONP response
480 season_info = self._download_json(
481 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
482 bangumi_id, transform_source=strip_jsonp)['result']
483
484 entries = [{
485 '_type': 'url_transparent',
486 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
487 'ie_key': BiliBiliIE.ie_key(),
488 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
489 'episode': episode.get('index_title'),
490 'episode_number': int_or_none(episode.get('index')),
491 } for episode in season_info['episodes']]
492
493 entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
494
495 return self.playlist_result(
496 entries, bangumi_id,
497 season_info.get('bangumi_title'), season_info.get('evaluate'))
4bc15a68
RA
498
499
06167fbb 500class BilibiliChannelIE(InfoExtractor):
501 _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
6efb0711 502 _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
503 _TESTS = [{
504 'url': 'https://space.bilibili.com/3985676/video',
505 'info_dict': {},
506 'playlist_mincount': 112,
507 }]
508
509 def _entries(self, list_id):
510 count, max_count = 0, None
511
512 for page_num in itertools.count(1):
513 data = self._parse_json(
514 self._download_webpage(
515 self._API_URL % (list_id, page_num), list_id,
516 note='Downloading page %d' % page_num),
517 list_id)['data']
518
519 max_count = max_count or try_get(data, lambda x: x['page']['count'])
520
521 entries = try_get(data, lambda x: x['list']['vlist'])
522 if not entries:
523 return
524 for entry in entries:
525 yield self.url_result(
526 'https://www.bilibili.com/video/%s' % entry['bvid'],
527 BiliBiliIE.ie_key(), entry['bvid'])
528
529 count += len(entries)
530 if max_count and count >= max_count:
531 return
06167fbb 532
533 def _real_extract(self, url):
534 list_id = self._match_id(url)
6efb0711 535 return self.playlist_result(self._entries(list_id), list_id)
06167fbb 536
537
538class BiliBiliSearchIE(SearchInfoExtractor):
539 IE_DESC = 'Bilibili video search, "bilisearch" keyword'
540 _MAX_RESULTS = 100000
541 _SEARCH_KEY = 'bilisearch'
542 MAX_NUMBER_OF_RESULTS = 1000
543
544 def _get_n_results(self, query, n):
545 """Get a specified number of results for a query"""
546
547 entries = []
548 pageNumber = 0
549 while True:
550 pageNumber += 1
551 # FIXME
552 api_url = "https://api.bilibili.com/x/web-interface/search/type?context=&page=%s&order=pubdate&keyword=%s&duration=0&tids_2=&__refresh__=true&search_type=video&tids=0&highlight=1" % (pageNumber, query)
553 json_str = self._download_webpage(
554 api_url, "None", query={"Search_key": query},
555 note='Extracting results from page %s' % pageNumber)
556 data = json.loads(json_str)['data']
557
558 # FIXME: this is hideous
559 if "result" not in data:
560 return {
561 '_type': 'playlist',
562 'id': query,
563 'entries': entries[:n]
564 }
565
566 videos = data['result']
567 for video in videos:
adc74b3c 568 e = self.url_result(video['arcurl'], 'BiliBili', compat_str(video['aid']))
06167fbb 569 entries.append(e)
570
571 if(len(entries) >= n or len(videos) >= BiliBiliSearchIE.MAX_NUMBER_OF_RESULTS):
572 return {
573 '_type': 'playlist',
574 'id': query,
575 'entries': entries[:n]
576 }
577
578
4bc15a68
RA
579class BilibiliAudioBaseIE(InfoExtractor):
580 def _call_api(self, path, sid, query=None):
581 if not query:
582 query = {'sid': sid}
583 return self._download_json(
584 'https://www.bilibili.com/audio/music-service-c/web/' + path,
585 sid, query=query)['data']
586
587
588class BilibiliAudioIE(BilibiliAudioBaseIE):
589 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
590 _TEST = {
591 'url': 'https://www.bilibili.com/audio/au1003142',
592 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
593 'info_dict': {
594 'id': '1003142',
595 'ext': 'm4a',
596 'title': '【tsukimi】YELLOW / 神山羊',
597 'artist': 'tsukimi',
598 'comment_count': int,
599 'description': 'YELLOW的mp3版!',
600 'duration': 183,
601 'subtitles': {
602 'origin': [{
603 'ext': 'lrc',
604 }],
605 },
606 'thumbnail': r're:^https?://.+\.jpg',
607 'timestamp': 1564836614,
608 'upload_date': '20190803',
609 'uploader': 'tsukimi-つきみぐー',
610 'view_count': int,
611 },
612 }
613
614 def _real_extract(self, url):
615 au_id = self._match_id(url)
616
617 play_data = self._call_api('url', au_id)
618 formats = [{
619 'url': play_data['cdns'][0],
620 'filesize': int_or_none(play_data.get('size')),
f0884c8b 621 'vcodec': 'none'
4bc15a68
RA
622 }]
623
624 song = self._call_api('song/info', au_id)
625 title = song['title']
626 statistic = song.get('statistic') or {}
627
628 subtitles = None
629 lyric = song.get('lyric')
630 if lyric:
631 subtitles = {
632 'origin': [{
633 'url': lyric,
634 }]
635 }
636
637 return {
638 'id': au_id,
639 'title': title,
640 'formats': formats,
641 'artist': song.get('author'),
642 'comment_count': int_or_none(statistic.get('comment')),
643 'description': song.get('intro'),
644 'duration': int_or_none(song.get('duration')),
645 'subtitles': subtitles,
646 'thumbnail': song.get('cover'),
647 'timestamp': int_or_none(song.get('passtime')),
648 'uploader': song.get('uname'),
649 'view_count': int_or_none(statistic.get('play')),
650 }
651
652
653class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
654 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
655 _TEST = {
656 'url': 'https://www.bilibili.com/audio/am10624',
657 'info_dict': {
658 'id': '10624',
659 'title': '每日新曲推荐(每日11:00更新)',
660 'description': '每天11:00更新,为你推送最新音乐',
661 },
662 'playlist_count': 19,
663 }
664
665 def _real_extract(self, url):
666 am_id = self._match_id(url)
667
668 songs = self._call_api(
669 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
670
671 entries = []
672 for song in songs:
673 sid = str_or_none(song.get('id'))
674 if not sid:
675 continue
676 entries.append(self.url_result(
677 'https://www.bilibili.com/audio/au' + sid,
678 BilibiliAudioIE.ie_key(), sid))
679
680 if entries:
681 album_data = self._call_api('menu/info', am_id) or {}
682 album_title = album_data.get('title')
683 if album_title:
684 for entry in entries:
685 entry['album'] = album_title
686 return self.playlist_result(
687 entries, am_id, album_title, album_data.get('intro'))
688
689 return self.playlist_result(entries, am_id)
63dce309
S
690
691
692class BiliBiliPlayerIE(InfoExtractor):
693 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
694 _TEST = {
695 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
696 'only_matching': True,
697 }
698
699 def _real_extract(self, url):
700 video_id = self._match_id(url)
701 return self.url_result(
702 'http://www.bilibili.tv/video/av%s/' % video_id,
703 ie=BiliBiliIE.ie_key(), video_id=video_id)