]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/bilibili.py
[cleanup] Add keyword automatically to SearchIE descriptions
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
1 # coding: utf-8
2
3 import hashlib
4 import itertools
5 import functools
6 import re
7 import math
8
9 from .common import InfoExtractor, SearchInfoExtractor
10 from ..compat import (
11 compat_parse_qs,
12 compat_urlparse,
13 compat_urllib_parse_urlparse
14 )
15 from ..utils import (
16 ExtractorError,
17 int_or_none,
18 float_or_none,
19 parse_iso8601,
20 traverse_obj,
21 try_get,
22 smuggle_url,
23 srt_subtitles_timecode,
24 str_or_none,
25 str_to_int,
26 strip_jsonp,
27 unified_timestamp,
28 unsmuggle_url,
29 urlencode_postdata,
30 OnDemandPagedList
31 )
32
33
34 class BiliBiliIE(InfoExtractor):
35 _VALID_URL = r'''(?x)
36 https?://
37 (?:(?:www|bangumi)\.)?
38 bilibili\.(?:tv|com)/
39 (?:
40 (?:
41 video/[aA][vV]|
42 anime/(?P<anime_id>\d+)/play\#
43 )(?P<id>\d+)|
44 (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
45 )
46 (?:/?\?p=(?P<page>\d+))?
47 '''
48
49 _TESTS = [{
50 'url': 'http://www.bilibili.com/video/av1074402/',
51 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
52 'info_dict': {
53 'id': '1074402',
54 'ext': 'flv',
55 'title': '【金坷垃】金泡沫',
56 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
57 'duration': 308.067,
58 'timestamp': 1398012678,
59 'upload_date': '20140420',
60 'thumbnail': r're:^https?://.+\.jpg',
61 'uploader': '菊子桑',
62 'uploader_id': '156160',
63 },
64 }, {
65 # Tested in BiliBiliBangumiIE
66 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
67 'only_matching': True,
68 }, {
69 # bilibili.tv
70 'url': 'http://www.bilibili.tv/video/av1074402/',
71 'only_matching': True,
72 }, {
73 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
74 'md5': '3f721ad1e75030cc06faf73587cfec57',
75 'info_dict': {
76 'id': '100643',
77 'ext': 'mp4',
78 'title': 'CHAOS;CHILD',
79 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
80 },
81 'skip': 'Geo-restricted to China',
82 }, {
83 # Title with double quotes
84 'url': 'http://www.bilibili.com/video/av8903802/',
85 'info_dict': {
86 'id': '8903802',
87 'title': '阿滴英文|英文歌分享#6 "Closer',
88 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
89 },
90 'playlist': [{
91 'info_dict': {
92 'id': '8903802_part1',
93 'ext': 'flv',
94 'title': '阿滴英文|英文歌分享#6 "Closer',
95 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
96 'uploader': '阿滴英文',
97 'uploader_id': '65880958',
98 'timestamp': 1488382634,
99 'upload_date': '20170301',
100 },
101 'params': {
102 'skip_download': True,
103 },
104 }, {
105 'info_dict': {
106 'id': '8903802_part2',
107 'ext': 'flv',
108 'title': '阿滴英文|英文歌分享#6 "Closer',
109 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
110 'uploader': '阿滴英文',
111 'uploader_id': '65880958',
112 'timestamp': 1488382634,
113 'upload_date': '20170301',
114 },
115 'params': {
116 'skip_download': True,
117 },
118 }]
119 }, {
120 # new BV video id format
121 'url': 'https://www.bilibili.com/video/BV1JE411F741',
122 'only_matching': True,
123 }, {
124 # Anthology
125 'url': 'https://www.bilibili.com/video/BV1bK411W797',
126 'info_dict': {
127 'id': 'BV1bK411W797',
128 'title': '物语中的人物是如何吐槽自己的OP的'
129 },
130 'playlist_count': 17,
131 }]
132
133 _APP_KEY = 'iVGUTjsxvpLeuDCf'
134 _BILIBILI_KEY = 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
135
136 def _report_error(self, result):
137 if 'message' in result:
138 raise ExtractorError('%s said: %s' % (self.IE_NAME, result['message']), expected=True)
139 elif 'code' in result:
140 raise ExtractorError('%s returns error %d' % (self.IE_NAME, result['code']), expected=True)
141 else:
142 raise ExtractorError('Can\'t extract Bangumi episode ID')
143
144 def _real_extract(self, url):
145 url, smuggled_data = unsmuggle_url(url, {})
146
147 mobj = self._match_valid_url(url)
148 video_id = mobj.group('id_bv') or mobj.group('id')
149
150 av_id, bv_id = self._get_video_id_set(video_id, mobj.group('id_bv') is not None)
151 video_id = av_id
152
153 anime_id = mobj.group('anime_id')
154 page_id = mobj.group('page')
155 webpage = self._download_webpage(url, video_id)
156
157 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
158 # If the video has no page argument, check to see if it's an anthology
159 if page_id is None:
160 if not self.get_param('noplaylist'):
161 r = self._extract_anthology_entries(bv_id, video_id, webpage)
162 if r is not None:
163 self.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id)
164 return r
165 else:
166 self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
167
168 if 'anime/' not in url:
169 cid = self._search_regex(
170 r'\bcid(?:["\']:|=)(\d+),["\']page(?:["\']:|=)' + str(page_id), webpage, 'cid',
171 default=None
172 ) or self._search_regex(
173 r'\bcid(?:["\']:|=)(\d+)', webpage, 'cid',
174 default=None
175 ) or compat_parse_qs(self._search_regex(
176 [r'EmbedPlayer\([^)]+,\s*"([^"]+)"\)',
177 r'EmbedPlayer\([^)]+,\s*\\"([^"]+)\\"\)',
178 r'<iframe[^>]+src="https://secure\.bilibili\.com/secure,([^"]+)"'],
179 webpage, 'player parameters'))['cid'][0]
180 else:
181 if 'no_bangumi_tip' not in smuggled_data:
182 self.to_screen('Downloading episode %s. To download all videos in anime %s, re-run yt-dlp with %s' % (
183 video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi.bilibili.com/anime/%s' % anime_id)))
184 headers = {
185 'Content-Type': 'application/x-www-form-urlencoded; charset=UTF-8',
186 'Referer': url
187 }
188 headers.update(self.geo_verification_headers())
189
190 js = self._download_json(
191 'http://bangumi.bilibili.com/web_api/get_source', video_id,
192 data=urlencode_postdata({'episode_id': video_id}),
193 headers=headers)
194 if 'result' not in js:
195 self._report_error(js)
196 cid = js['result']['cid']
197
198 headers = {
199 'Accept': 'application/json',
200 'Referer': url
201 }
202 headers.update(self.geo_verification_headers())
203
204 entries = []
205
206 RENDITIONS = ('qn=80&quality=80&type=', 'quality=2&type=mp4')
207 for num, rendition in enumerate(RENDITIONS, start=1):
208 payload = 'appkey=%s&cid=%s&otype=json&%s' % (self._APP_KEY, cid, rendition)
209 sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf-8')).hexdigest()
210
211 video_info = self._download_json(
212 'http://interface.bilibili.com/v2/playurl?%s&sign=%s' % (payload, sign),
213 video_id, note='Downloading video info page',
214 headers=headers, fatal=num == len(RENDITIONS))
215
216 if not video_info:
217 continue
218
219 if 'durl' not in video_info:
220 if num < len(RENDITIONS):
221 continue
222 self._report_error(video_info)
223
224 for idx, durl in enumerate(video_info['durl']):
225 formats = [{
226 'url': durl['url'],
227 'filesize': int_or_none(durl['size']),
228 }]
229 for backup_url in durl.get('backup_url', []):
230 formats.append({
231 'url': backup_url,
232 # backup URLs have lower priorities
233 'quality': -2 if 'hd.mp4' in backup_url else -3,
234 })
235
236 for a_format in formats:
237 a_format.setdefault('http_headers', {}).update({
238 'Referer': url,
239 })
240
241 self._sort_formats(formats)
242
243 entries.append({
244 'id': '%s_part%s' % (video_id, idx),
245 'duration': float_or_none(durl.get('length'), 1000),
246 'formats': formats,
247 })
248 break
249
250 title = self._html_search_regex(
251 (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
252 r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
253 group='title')
254
255 # Get part title for anthologies
256 if page_id is not None:
257 # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload for each video
258 part_title = try_get(
259 self._download_json(
260 f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
261 video_id, note='Extracting videos in anthology'),
262 lambda x: x['data'][int(page_id) - 1]['part'])
263 title = part_title or title
264
265 description = self._html_search_meta('description', webpage)
266 timestamp = unified_timestamp(self._html_search_regex(
267 r'<time[^>]+datetime="([^"]+)"', webpage, 'upload time',
268 default=None) or self._html_search_meta(
269 'uploadDate', webpage, 'timestamp', default=None))
270 thumbnail = self._html_search_meta(['og:image', 'thumbnailUrl'], webpage)
271
272 # TODO 'view_count' requires deobfuscating Javascript
273 info = {
274 'id': str(video_id) if page_id is None else '%s_part%s' % (video_id, page_id),
275 'cid': cid,
276 'title': title,
277 'description': description,
278 'timestamp': timestamp,
279 'thumbnail': thumbnail,
280 'duration': float_or_none(video_info.get('timelength'), scale=1000),
281 }
282
283 uploader_mobj = re.search(
284 r'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
285 webpage)
286 if uploader_mobj:
287 info.update({
288 'uploader': uploader_mobj.group('name').strip(),
289 'uploader_id': uploader_mobj.group('id'),
290 })
291
292 if not info.get('uploader'):
293 info['uploader'] = self._html_search_meta(
294 'author', webpage, 'uploader', default=None)
295
296 top_level_info = {
297 'tags': traverse_obj(self._download_json(
298 f'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
299 video_id, fatal=False, note='Downloading tags'), ('data', ..., 'tag_name')),
300 }
301
302 entries[0]['subtitles'] = {
303 'danmaku': [{
304 'ext': 'xml',
305 'url': f'https://comment.bilibili.com/{cid}.xml',
306 }]
307 }
308
309 r'''
310 # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
311 # See https://github.com/animelover1984/youtube-dl
312
313 raw_danmaku = self._download_webpage(
314 f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
315 danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
316 entries[0]['subtitles'] = {
317 'danmaku': [{
318 'ext': 'ass',
319 'data': danmaku
320 }]
321 }
322 '''
323
324 top_level_info['__post_extractor'] = self.extract_comments(video_id)
325
326 for entry in entries:
327 entry.update(info)
328
329 if len(entries) == 1:
330 entries[0].update(top_level_info)
331 return entries[0]
332
333 for idx, entry in enumerate(entries):
334 entry['id'] = '%s_part%d' % (video_id, (idx + 1))
335
336 return {
337 '_type': 'multi_video',
338 'id': str(video_id),
339 'bv_id': bv_id,
340 'title': title,
341 'description': description,
342 'entries': entries,
343 **info, **top_level_info
344 }
345
346 def _extract_anthology_entries(self, bv_id, video_id, webpage):
347 title = self._html_search_regex(
348 (r'<h1[^>]+\btitle=(["\'])(?P<title>(?:(?!\1).)+)\1',
349 r'(?s)<h1[^>]*>(?P<title>.+?)</h1>'), webpage, 'title',
350 group='title')
351 json_data = self._download_json(
352 f'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
353 video_id, note='Extracting videos in anthology')
354
355 if json_data['data']:
356 return self.playlist_from_matches(
357 json_data['data'], bv_id, title, ie=BiliBiliIE.ie_key(),
358 getter=lambda entry: 'https://www.bilibili.com/video/%s?p=%d' % (bv_id, entry['page']))
359
360 def _get_video_id_set(self, id, is_bv):
361 query = {'bvid': id} if is_bv else {'aid': id}
362 response = self._download_json(
363 "http://api.bilibili.cn/x/web-interface/view",
364 id, query=query,
365 note='Grabbing original ID via API')
366
367 if response['code'] == -400:
368 raise ExtractorError('Video ID does not exist', expected=True, video_id=id)
369 elif response['code'] != 0:
370 raise ExtractorError(f'Unknown error occurred during API check (code {response["code"]})',
371 expected=True, video_id=id)
372 return response['data']['aid'], response['data']['bvid']
373
374 def _get_comments(self, video_id, commentPageNumber=0):
375 for idx in itertools.count(1):
376 replies = traverse_obj(
377 self._download_json(
378 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={video_id}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
379 video_id, note=f'Extracting comments from page {idx}'),
380 ('data', 'replies')) or []
381 for children in map(self._get_all_children, replies):
382 yield from children
383
384 def _get_all_children(self, reply):
385 yield {
386 'author': traverse_obj(reply, ('member', 'uname')),
387 'author_id': traverse_obj(reply, ('member', 'mid')),
388 'id': reply.get('rpid'),
389 'text': traverse_obj(reply, ('content', 'message')),
390 'timestamp': reply.get('ctime'),
391 'parent': reply.get('parent') or 'root',
392 }
393 for children in map(self._get_all_children, reply.get('replies') or []):
394 yield from children
395
396
397 class BiliBiliBangumiIE(InfoExtractor):
398 _VALID_URL = r'https?://bangumi\.bilibili\.com/anime/(?P<id>\d+)'
399
400 IE_NAME = 'bangumi.bilibili.com'
401 IE_DESC = 'BiliBili番剧'
402
403 _TESTS = [{
404 'url': 'http://bangumi.bilibili.com/anime/1869',
405 'info_dict': {
406 'id': '1869',
407 'title': '混沌武士',
408 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
409 },
410 'playlist_count': 26,
411 }, {
412 'url': 'http://bangumi.bilibili.com/anime/1869',
413 'info_dict': {
414 'id': '1869',
415 'title': '混沌武士',
416 'description': 'md5:6a9622b911565794c11f25f81d6a97d2',
417 },
418 'playlist': [{
419 'md5': '91da8621454dd58316851c27c68b0c13',
420 'info_dict': {
421 'id': '40062',
422 'ext': 'mp4',
423 'title': '混沌武士',
424 'description': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子...',
425 'timestamp': 1414538739,
426 'upload_date': '20141028',
427 'episode': '疾风怒涛 Tempestuous Temperaments',
428 'episode_number': 1,
429 },
430 }],
431 'params': {
432 'playlist_items': '1',
433 },
434 }]
435
436 @classmethod
437 def suitable(cls, url):
438 return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
439
440 def _real_extract(self, url):
441 bangumi_id = self._match_id(url)
442
443 # Sometimes this API returns a JSONP response
444 season_info = self._download_json(
445 'http://bangumi.bilibili.com/jsonp/seasoninfo/%s.ver' % bangumi_id,
446 bangumi_id, transform_source=strip_jsonp)['result']
447
448 entries = [{
449 '_type': 'url_transparent',
450 'url': smuggle_url(episode['webplay_url'], {'no_bangumi_tip': 1}),
451 'ie_key': BiliBiliIE.ie_key(),
452 'timestamp': parse_iso8601(episode.get('update_time'), delimiter=' '),
453 'episode': episode.get('index_title'),
454 'episode_number': int_or_none(episode.get('index')),
455 } for episode in season_info['episodes']]
456
457 entries = sorted(entries, key=lambda entry: entry.get('episode_number'))
458
459 return self.playlist_result(
460 entries, bangumi_id,
461 season_info.get('bangumi_title'), season_info.get('evaluate'))
462
463
464 class BilibiliChannelIE(InfoExtractor):
465 _VALID_URL = r'https?://space.bilibili\.com/(?P<id>\d+)'
466 _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
467 _TESTS = [{
468 'url': 'https://space.bilibili.com/3985676/video',
469 'info_dict': {},
470 'playlist_mincount': 112,
471 }]
472
473 def _entries(self, list_id):
474 count, max_count = 0, None
475
476 for page_num in itertools.count(1):
477 data = self._download_json(
478 self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}')['data']
479
480 max_count = max_count or try_get(data, lambda x: x['page']['count'])
481
482 entries = try_get(data, lambda x: x['list']['vlist'])
483 if not entries:
484 return
485 for entry in entries:
486 yield self.url_result(
487 'https://www.bilibili.com/video/%s' % entry['bvid'],
488 BiliBiliIE.ie_key(), entry['bvid'])
489
490 count += len(entries)
491 if max_count and count >= max_count:
492 return
493
494 def _real_extract(self, url):
495 list_id = self._match_id(url)
496 return self.playlist_result(self._entries(list_id), list_id)
497
498
499 class BilibiliCategoryIE(InfoExtractor):
500 IE_NAME = 'Bilibili category extractor'
501 _MAX_RESULTS = 1000000
502 _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
503 _TESTS = [{
504 'url': 'https://www.bilibili.com/v/kichiku/mad',
505 'info_dict': {
506 'id': 'kichiku: mad',
507 'title': 'kichiku: mad'
508 },
509 'playlist_mincount': 45,
510 'params': {
511 'playlistend': 45
512 }
513 }]
514
515 def _fetch_page(self, api_url, num_pages, query, page_num):
516 parsed_json = self._download_json(
517 api_url, query, query={'Search_key': query, 'pn': page_num},
518 note='Extracting results from page %s of %s' % (page_num, num_pages))
519
520 video_list = try_get(parsed_json, lambda x: x['data']['archives'], list)
521 if not video_list:
522 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
523
524 for video in video_list:
525 yield self.url_result(
526 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
527
528 def _entries(self, category, subcategory, query):
529 # map of categories : subcategories : RIDs
530 rid_map = {
531 'kichiku': {
532 'mad': 26,
533 'manual_vocaloid': 126,
534 'guide': 22,
535 'theatre': 216,
536 'course': 127
537 },
538 }
539
540 if category not in rid_map:
541 raise ExtractorError(
542 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
543 if subcategory not in rid_map[category]:
544 raise ExtractorError(
545 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
546 rid_value = rid_map[category][subcategory]
547
548 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
549 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
550 page_data = try_get(page_json, lambda x: x['data']['page'], dict)
551 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
552 if count is None or not size:
553 raise ExtractorError('Failed to calculate either page count or size')
554
555 num_pages = math.ceil(count / size)
556
557 return OnDemandPagedList(functools.partial(
558 self._fetch_page, api_url, num_pages, query), size)
559
560 def _real_extract(self, url):
561 u = compat_urllib_parse_urlparse(url)
562 category, subcategory = u.path.split('/')[2:4]
563 query = '%s: %s' % (category, subcategory)
564
565 return self.playlist_result(self._entries(category, subcategory, query), query, query)
566
567
568 class BiliBiliSearchIE(SearchInfoExtractor):
569 IE_DESC = 'Bilibili video search'
570 _MAX_RESULTS = 100000
571 _SEARCH_KEY = 'bilisearch'
572
573 def _search_results(self, query):
574 for page_num in itertools.count(1):
575 videos = self._download_json(
576 'https://api.bilibili.com/x/web-interface/search/type', query,
577 note=f'Extracting results from page {page_num}', query={
578 'Search_key': query,
579 'keyword': query,
580 'page': page_num,
581 'context': '',
582 'order': 'pubdate',
583 'duration': 0,
584 'tids_2': '',
585 '__refresh__': 'true',
586 'search_type': 'video',
587 'tids': 0,
588 'highlight': 1,
589 })['data'].get('result') or []
590 for video in videos:
591 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
592
593
594 class BilibiliAudioBaseIE(InfoExtractor):
595 def _call_api(self, path, sid, query=None):
596 if not query:
597 query = {'sid': sid}
598 return self._download_json(
599 'https://www.bilibili.com/audio/music-service-c/web/' + path,
600 sid, query=query)['data']
601
602
603 class BilibiliAudioIE(BilibiliAudioBaseIE):
604 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
605 _TEST = {
606 'url': 'https://www.bilibili.com/audio/au1003142',
607 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
608 'info_dict': {
609 'id': '1003142',
610 'ext': 'm4a',
611 'title': '【tsukimi】YELLOW / 神山羊',
612 'artist': 'tsukimi',
613 'comment_count': int,
614 'description': 'YELLOW的mp3版!',
615 'duration': 183,
616 'subtitles': {
617 'origin': [{
618 'ext': 'lrc',
619 }],
620 },
621 'thumbnail': r're:^https?://.+\.jpg',
622 'timestamp': 1564836614,
623 'upload_date': '20190803',
624 'uploader': 'tsukimi-つきみぐー',
625 'view_count': int,
626 },
627 }
628
629 def _real_extract(self, url):
630 au_id = self._match_id(url)
631
632 play_data = self._call_api('url', au_id)
633 formats = [{
634 'url': play_data['cdns'][0],
635 'filesize': int_or_none(play_data.get('size')),
636 'vcodec': 'none'
637 }]
638
639 song = self._call_api('song/info', au_id)
640 title = song['title']
641 statistic = song.get('statistic') or {}
642
643 subtitles = None
644 lyric = song.get('lyric')
645 if lyric:
646 subtitles = {
647 'origin': [{
648 'url': lyric,
649 }]
650 }
651
652 return {
653 'id': au_id,
654 'title': title,
655 'formats': formats,
656 'artist': song.get('author'),
657 'comment_count': int_or_none(statistic.get('comment')),
658 'description': song.get('intro'),
659 'duration': int_or_none(song.get('duration')),
660 'subtitles': subtitles,
661 'thumbnail': song.get('cover'),
662 'timestamp': int_or_none(song.get('passtime')),
663 'uploader': song.get('uname'),
664 'view_count': int_or_none(statistic.get('play')),
665 }
666
667
668 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
669 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
670 _TEST = {
671 'url': 'https://www.bilibili.com/audio/am10624',
672 'info_dict': {
673 'id': '10624',
674 'title': '每日新曲推荐(每日11:00更新)',
675 'description': '每天11:00更新,为你推送最新音乐',
676 },
677 'playlist_count': 19,
678 }
679
680 def _real_extract(self, url):
681 am_id = self._match_id(url)
682
683 songs = self._call_api(
684 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
685
686 entries = []
687 for song in songs:
688 sid = str_or_none(song.get('id'))
689 if not sid:
690 continue
691 entries.append(self.url_result(
692 'https://www.bilibili.com/audio/au' + sid,
693 BilibiliAudioIE.ie_key(), sid))
694
695 if entries:
696 album_data = self._call_api('menu/info', am_id) or {}
697 album_title = album_data.get('title')
698 if album_title:
699 for entry in entries:
700 entry['album'] = album_title
701 return self.playlist_result(
702 entries, am_id, album_title, album_data.get('intro'))
703
704 return self.playlist_result(entries, am_id)
705
706
707 class BiliBiliPlayerIE(InfoExtractor):
708 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
709 _TEST = {
710 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
711 'only_matching': True,
712 }
713
714 def _real_extract(self, url):
715 video_id = self._match_id(url)
716 return self.url_result(
717 'http://www.bilibili.tv/video/av%s/' % video_id,
718 ie=BiliBiliIE.ie_key(), video_id=video_id)
719
720
721 class BiliIntlBaseIE(InfoExtractor):
722 _API_URL = 'https://api.bili{}/intl/gateway{}'
723
724 def _call_api(self, type, endpoint, id):
725 return self._download_json(self._API_URL.format(type, endpoint), id)['data']
726
727 def json2srt(self, json):
728 data = '\n\n'.join(
729 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
730 for i, line in enumerate(json['body']))
731 return data
732
733 def _get_subtitles(self, type, ep_id):
734 sub_json = self._call_api(type, f'/m/subtitle?ep_id={ep_id}&platform=web', ep_id)
735 subtitles = {}
736 for sub in sub_json.get('subtitles', []):
737 sub_url = sub.get('url')
738 if not sub_url:
739 continue
740 sub_data = self._download_json(sub_url, ep_id, fatal=False)
741 if not sub_data:
742 continue
743 subtitles.setdefault(sub.get('key', 'en'), []).append({
744 'ext': 'srt',
745 'data': self.json2srt(sub_data)
746 })
747 return subtitles
748
749 def _get_formats(self, type, ep_id):
750 video_json = self._call_api(type, f'/web/playurl?ep_id={ep_id}&platform=web', ep_id)
751 if not video_json:
752 self.raise_login_required(method='cookies')
753 video_json = video_json['playurl']
754 formats = []
755 for vid in video_json.get('video', []):
756 video_res = vid.get('video_resource') or {}
757 video_info = vid.get('stream_info') or {}
758 if not video_res.get('url'):
759 continue
760 formats.append({
761 'url': video_res['url'],
762 'ext': 'mp4',
763 'format_note': video_info.get('desc_words'),
764 'width': video_res.get('width'),
765 'height': video_res.get('height'),
766 'vbr': video_res.get('bandwidth'),
767 'acodec': 'none',
768 'vcodec': video_res.get('codecs'),
769 'filesize': video_res.get('size'),
770 })
771 for aud in video_json.get('audio_resource', []):
772 if not aud.get('url'):
773 continue
774 formats.append({
775 'url': aud['url'],
776 'ext': 'mp4',
777 'abr': aud.get('bandwidth'),
778 'acodec': aud.get('codecs'),
779 'vcodec': 'none',
780 'filesize': aud.get('size'),
781 })
782
783 self._sort_formats(formats)
784 return formats
785
786 def _extract_ep_info(self, type, episode_data, ep_id):
787 return {
788 'id': ep_id,
789 'title': episode_data.get('long_title') or episode_data['title'],
790 'thumbnail': episode_data.get('cover'),
791 'episode_number': str_to_int(episode_data.get('title')),
792 'formats': self._get_formats(type, ep_id),
793 'subtitles': self._get_subtitles(type, ep_id),
794 'extractor_key': BiliIntlIE.ie_key(),
795 }
796
797
798 class BiliIntlIE(BiliIntlBaseIE):
799 _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<season_id>\d+)/(?P<id>\d+)'
800 _TESTS = [{
801 'url': 'https://www.bilibili.tv/en/play/34613/341736',
802 'info_dict': {
803 'id': '341736',
804 'ext': 'mp4',
805 'title': 'The First Night',
806 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
807 'episode_number': 2,
808 },
809 'params': {
810 'format': 'bv',
811 },
812 }, {
813 'url': 'https://www.biliintl.com/en/play/34613/341736',
814 'info_dict': {
815 'id': '341736',
816 'ext': 'mp4',
817 'title': 'The First Night',
818 'thumbnail': 'https://i0.hdslb.com/bfs/intl/management/91e30e5521235d9b163339a26a0b030ebda54310.png',
819 'episode_number': 2,
820 },
821 'params': {
822 'format': 'bv',
823 },
824 }]
825
826 def _real_extract(self, url):
827 type, season_id, id = self._match_valid_url(url).groups()
828 data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={season_id}', id)
829 episode_data = next(
830 episode for episode in data_json.get('episodes', [])
831 if str(episode.get('ep_id')) == id)
832 return self._extract_ep_info(type, episode_data, id)
833
834
835 class BiliIntlSeriesIE(BiliIntlBaseIE):
836 _VALID_URL = r'https?://(?:www\.)?bili(?P<type>bili\.tv|intl.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
837 _TESTS = [{
838 'url': 'https://www.bilibili.tv/en/play/34613',
839 'playlist_mincount': 15,
840 'info_dict': {
841 'id': '34613',
842 },
843 'params': {
844 'skip_download': True,
845 'format': 'bv',
846 },
847 }, {
848 'url': 'https://www.biliintl.com/en/play/34613',
849 'playlist_mincount': 15,
850 'info_dict': {
851 'id': '34613',
852 },
853 'params': {
854 'skip_download': True,
855 'format': 'bv',
856 },
857 }]
858
859 def _entries(self, id, type):
860 data_json = self._call_api(type, f'/web/view/ogv_collection?season_id={id}', id)
861 for episode in data_json.get('episodes', []):
862 episode_id = str(episode.get('ep_id'))
863 yield self._extract_ep_info(type, episode, episode_id)
864
865 def _real_extract(self, url):
866 type, id = self._match_valid_url(url).groups()
867 return self.playlist_result(self._entries(id, type), playlist_id=id)