8 from .common
import InfoExtractor
, SearchInfoExtractor
12 compat_urllib_parse_urlparse
25 srt_subtitles_timecode
,
36 class BiliBiliIE(InfoExtractor
):
39 (?:(?:www|bangumi)\.)?
44 anime/(?P<anime_id>\d+)/play\#
46 (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
48 (?:/?\?p=(?P<page>\d+))?
52 'url': 'http://www.bilibili.com/video/av1074402/',
53 'md5': '7ac275ec84a99a6552c5d229659a0fe1',
55 'id': '1074402_part1',
58 'uploader_id': '156160',
60 'upload_date': '20140420',
61 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
62 'timestamp': 1398012678,
63 'tags': ['顶上去报复社会', '该来的总会来的', '金克拉是检验歌曲的唯一标准', '坷垃教主', '金坷垃', '邓紫棋', '治愈系坷垃'],
64 'bv_id': 'BV11x411K7CN',
66 'thumbnail': 'http://i2.hdslb.com/bfs/archive/c79a8cf0347cd7a897c53a2f756e96aead128e8c.jpg',
70 # Tested in BiliBiliBangumiIE
71 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
72 'only_matching': True,
75 'url': 'http://www.bilibili.tv/video/av1074402/',
76 'only_matching': True,
78 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
79 'md5': '3f721ad1e75030cc06faf73587cfec57',
83 'title': 'CHAOS;CHILD',
84 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
86 'skip': 'Geo-restricted to China',
88 'url': 'http://www.bilibili.com/video/av8903802/',
90 'id': '8903802_part1',
92 'title': '阿滴英文|英文歌分享#6 "Closer',
93 'upload_date': '20170301',
94 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
95 'timestamp': 1488382634,
96 'uploader_id': '65880958',
98 'thumbnail': 'http://i2.hdslb.com/bfs/archive/49267ce20bc246be6304bf369a3ded0256854c23.jpg',
101 'bv_id': 'BV13x41117TL',
102 'tags': ['人文', '英语', '文化', '公开课', '阿滴英文'],
105 'skip_download': True,
108 # new BV video id format
109 'url': 'https://www.bilibili.com/video/BV1JE411F741',
110 'only_matching': True,
113 'url': 'https://www.bilibili.com/video/BV1bK411W797',
115 'id': 'BV1bK411W797',
116 'title': '物语中的人物是如何吐槽自己的OP的'
118 'playlist_count': 17,
120 # Correct matching of single and double quotes in title
121 'url': 'https://www.bilibili.com/video/BV1NY411E7Rx/',
123 'id': '255513412_part1',
125 'title': 'Vid"eo" Te\'st',
127 'thumbnail': 'http://i2.hdslb.com/bfs/archive/0c0de5a90b6d5b991b8dcc6cde0afbf71d564791.jpg',
128 'upload_date': '20220408',
129 'timestamp': 1649436552,
130 'description': 'Vid"eo" Te\'st',
131 'uploader_id': '1630758804',
132 'bv_id': 'BV1NY411E7Rx',
134 'uploader': 'bili_31244483705',
138 'skip_download': True,
142 _APP_KEY
= 'iVGUTjsxvpLeuDCf'
143 _BILIBILI_KEY
= 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
145 def _report_error(self
, result
):
146 if 'message' in result
:
147 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, result
['message']), expected
=True)
148 elif 'code' in result
:
149 raise ExtractorError('%s returns error %d' % (self
.IE_NAME
, result
['code']), expected
=True)
151 raise ExtractorError('Can\'t extract Bangumi episode ID')
153 def _real_extract(self
, url
):
154 url
, smuggled_data
= unsmuggle_url(url
, {})
156 mobj
= self
._match
_valid
_url
(url
)
157 video_id
= mobj
.group('id_bv') or mobj
.group('id')
159 av_id
, bv_id
= self
._get
_video
_id
_set
(video_id
, mobj
.group('id_bv') is not None)
163 anime_id
= mobj
.group('anime_id')
164 page_id
= mobj
.group('page')
165 webpage
= self
._download
_webpage
(url
, video_id
)
167 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
168 # If the video has no page argument, check to see if it's an anthology
170 if not self
.get_param('noplaylist'):
171 r
= self
._extract
_anthology
_entries
(bv_id
, video_id
, webpage
)
173 self
.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id
)
176 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
)
178 if 'anime/' not in url
:
179 cid
= self
._search
_regex
(
180 r
'\bcid(?:["\']:|
=)(\d
+),["\']page(?:["\']:|
=)' + str(page_id), webpage, 'cid
',
182 ) or self._search_regex(
183 r'\bcid
(?
:["\']:|=)(\d+)', webpage, 'cid',
185 ) or compat_parse_qs(self._search_regex(
186 [r'EmbedPlayer\([^)]+,\s*"([^
"]+)"\
)',
187 r'EmbedPlayer\
([^
)]+,\s
*\\"([^"]+)\\"\)',
188 r'<iframe[^>]+src="https
://secure\
.bilibili\
.com
/secure
,([^
"]+)"'],
189 webpage, 'player parameters
'))['cid
'][0]
191 if 'no_bangumi_tip
' not in smuggled_data:
192 self.to_screen('Downloading episode
%s. To download all videos
in anime
%s, re
-run yt
-dlp
with %s' % (
193 video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi
.bilibili
.com
/anime
/%s' % anime_id)))
195 'Content
-Type
': 'application
/x
-www
-form
-urlencoded
; charset
=UTF
-8',
198 headers.update(self.geo_verification_headers())
200 js = self._download_json(
201 'http
://bangumi
.bilibili
.com
/web_api
/get_source
', video_id,
202 data=urlencode_postdata({'episode_id': video_id}),
204 if 'result
' not in js:
205 self._report_error(js)
206 cid = js['result
']['cid
']
209 'Accept
': 'application
/json
',
212 headers.update(self.geo_verification_headers())
214 video_info = self._parse_json(
215 self._search_regex(r'window
.__playinfo
__\s
*=\s
*({.+?}
)</script
>', webpage, 'video info
', default=None) or '{}',
216 video_id, fatal=False)
217 video_info = video_info.get('data
') or {}
219 durl = traverse_obj(video_info, ('dash
', 'video
'))
220 audios = traverse_obj(video_info, ('dash
', 'audio
')) or []
223 RENDITIONS = ('qn
=80&quality
=80&type=', 'quality
=2&type=mp4
')
224 for num, rendition in enumerate(RENDITIONS, start=1):
225 payload = 'appkey
=%s&cid
=%s&otype
=json
&%s' % (self._APP_KEY, cid, rendition)
226 sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf
-8')).hexdigest()
228 video_info = self._download_json(
229 'http
://interface
.bilibili
.com
/v2
/playurl?
%s&sign
=%s' % (payload, sign),
230 video_id, note='Downloading video info page
',
231 headers=headers, fatal=num == len(RENDITIONS))
235 if not durl and 'durl
' not in video_info:
236 if num < len(RENDITIONS):
238 self._report_error(video_info)
241 for idx, durl in enumerate(durl or video_info['durl
']):
243 'url
': durl.get('baseUrl
') or durl.get('base_url
') or durl.get('url
'),
244 'ext
': mimetype2ext(durl.get('mimeType
') or durl.get('mime_type
')),
245 'fps
': int_or_none(durl.get('frameRate
') or durl.get('frame_rate
')),
246 'width
': int_or_none(durl.get('width
')),
247 'height
': int_or_none(durl.get('height
')),
248 'vcodec
': durl.get('codecs
'),
249 'acodec
': 'none
' if audios else None,
250 'tbr
': float_or_none(durl.get('bandwidth
'), scale=1000),
251 'filesize
': int_or_none(durl.get('size
')),
253 for backup_url in traverse_obj(durl, 'backup_url
', expected_type=list) or []:
256 'quality
': -2 if 'hd
.mp4
' in backup_url else -3,
261 'url
': audio.get('baseUrl
') or audio.get('base_url
') or audio.get('url
'),
262 'ext
': mimetype2ext(audio.get('mimeType
') or audio.get('mime_type
')),
263 'fps
': int_or_none(audio.get('frameRate
') or audio.get('frame_rate
')),
264 'width
': int_or_none(audio.get('width
')),
265 'height
': int_or_none(audio.get('height
')),
266 'acodec
': audio.get('codecs
'),
268 'tbr
': float_or_none(audio.get('bandwidth
'), scale=1000),
269 'filesize
': int_or_none(audio.get('size
'))
271 for backup_url in traverse_obj(audio, 'backup_url
', expected_type=list) or []:
274 # backup URLs have lower priorities
280 'duration
': float_or_none(durl.get('length
'), 1000),
288 self._sort_formats(formats)
290 title = self._html_search_regex((
291 r'<h1
[^
>]+title
=(["])(?P<content>[^"]+)',
292 r'<h1
[^
>]+title
=([\'])(?P
<content
>[^
\']+)',
293 r'(?s
)<h1
[^
>]*>(?P
<content
>.+?
)</h1
>',
294 self._meta_regex('title
')
295 ), webpage, 'title
', group='content
', fatal=False)
297 # Get part title for anthologies
298 if page_id is not None:
299 # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload
for each video
.
300 part_info
= traverse_obj(self
._download
_json
(
301 f
'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
302 video_id
, note
='Extracting videos in anthology'), 'data', expected_type
=list)
303 title
= title
if len(part_info
) == 1 else traverse_obj(part_info
, (int(page_id
) - 1, 'part')) or title
305 description
= self
._html
_search
_meta
('description', webpage
)
306 timestamp
= unified_timestamp(self
._html
_search
_regex
(
307 r
'<time[^>]+datetime="([^"]+)"', webpage
, 'upload time',
308 default
=None) or self
._html
_search
_meta
(
309 'uploadDate', webpage
, 'timestamp', default
=None))
310 thumbnail
= self
._html
_search
_meta
(['og:image', 'thumbnailUrl'], webpage
)
312 # TODO 'view_count' requires deobfuscating Javascript
314 'id': f
'{video_id}_part{page_id or 1}',
317 'description': description
,
318 'timestamp': timestamp
,
319 'thumbnail': thumbnail
,
320 'duration': float_or_none(video_info
.get('timelength'), scale
=1000),
323 uploader_mobj
= re
.search(
324 r
'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
328 'uploader': uploader_mobj
.group('name').strip(),
329 'uploader_id': uploader_mobj
.group('id'),
332 if not info
.get('uploader'):
333 info
['uploader'] = self
._html
_search
_meta
(
334 'author', webpage
, 'uploader', default
=None)
337 'tags': traverse_obj(self
._download
_json
(
338 f
'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
339 video_id
, fatal
=False, note
='Downloading tags'), ('data', ..., 'tag_name')),
342 info
['subtitles'] = {
345 'url': f
'https://comment.bilibili.com/{cid}.xml',
350 # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
351 # See https://github.com/animelover1984/youtube-dl
353 raw_danmaku = self._download_webpage(
354 f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
355 danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
356 entries[0]['subtitles'] = {
364 top_level_info
['__post_extractor'] = self
.extract_comments(video_id
)
366 for entry
in entries
:
369 if len(entries
) == 1:
370 entries
[0].update(top_level_info
)
373 for idx
, entry
in enumerate(entries
):
374 entry
['id'] = '%s_part%d' % (video_id
, (idx
+ 1))
380 'description': description
,
381 **info
, **top_level_info
384 def _extract_anthology_entries(self
, bv_id
, video_id
, webpage
):
385 title
= self
._html
_search
_regex
(
386 (r
'<h1[^>]+\btitle=(["\'])(?P
<title
>(?
:(?
!\
1).)+)\
1',
387 r'(?s
)<h1
[^
>]*>(?P
<title
>.+?
)</h1
>',
388 r'<title
>(?P
<title
>.+?
)</title
>'), webpage, 'title
',
390 json_data = self._download_json(
391 f'https
://api
.bilibili
.com
/x
/player
/pagelist?bvid
={bv_id}
&jsonp
=jsonp
',
392 video_id, note='Extracting videos
in anthology
')
394 if json_data['data
']:
395 return self.playlist_from_matches(
396 json_data['data
'], bv_id, title, ie=BiliBiliIE.ie_key(),
397 getter=lambda entry: 'https
://www
.bilibili
.com
/video
/%s?p
=%d' % (bv_id, entry['page
']))
399 def _get_video_id_set(self, id, is_bv):
400 query = {'bvid': id} if is_bv else {'aid': id}
401 response = self._download_json(
402 "http://api.bilibili.cn/x/web-interface/view",
404 note='Grabbing original ID via API
')
406 if response['code
'] == -400:
407 raise ExtractorError('Video ID does
not exist
', expected=True, video_id=id)
408 elif response['code
'] != 0:
409 raise ExtractorError(f'Unknown error occurred during API
check (code {response["code"]}
)',
410 expected=True, video_id=id)
411 return response['data
']['aid
'], response['data
']['bvid
']
413 def _get_comments(self, video_id, commentPageNumber=0):
414 for idx in itertools.count(1):
415 replies = traverse_obj(
417 f'https
://api
.bilibili
.com
/x
/v2
/reply?pn
={idx}
&oid
={video_id}
&type=1&jsonp
=jsonp
&sort
=2&_
=1567227301685',
418 video_id, note=f'Extracting comments
from page {idx}
', fatal=False),
422 for children in map(self._get_all_children, replies):
425 def _get_all_children(self, reply):
427 'author
': traverse_obj(reply, ('member
', 'uname
')),
428 'author_id
': traverse_obj(reply, ('member
', 'mid
')),
429 'id': reply.get('rpid
'),
430 'text
': traverse_obj(reply, ('content
', 'message
')),
431 'timestamp
': reply.get('ctime
'),
432 'parent
': reply.get('parent
') or 'root
',
434 for children in map(self._get_all_children, reply.get('replies
') or []):
438 class BiliBiliBangumiIE(InfoExtractor):
439 _VALID_URL = r'https?
://bangumi\
.bilibili\
.com
/anime
/(?P
<id>\d
+)'
441 IE_NAME = 'bangumi
.bilibili
.com
'
442 IE_DESC = 'BiliBili番剧
'
445 'url
': 'http
://bangumi
.bilibili
.com
/anime
/1869',
449 'description
': 'md5
:6a9622b911565794c11f25f81d6a97d2
',
451 'playlist_count
': 26,
453 'url
': 'http
://bangumi
.bilibili
.com
/anime
/1869',
457 'description
': 'md5
:6a9622b911565794c11f25f81d6a97d2
',
460 'md5
': '91da8621454dd58316851c27c68b0c13
',
465 'description
': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子
...',
466 'timestamp
': 1414538739,
467 'upload_date
': '20141028',
468 'episode
': '疾风怒涛 Tempestuous Temperaments
',
473 'playlist_items
': '1',
478 def suitable(cls, url):
479 return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
481 def _real_extract(self, url):
482 bangumi_id = self._match_id(url)
484 # Sometimes this API returns a JSONP response
485 season_info = self._download_json(
486 'http
://bangumi
.bilibili
.com
/jsonp
/seasoninfo
/%s.ver
' % bangumi_id,
487 bangumi_id, transform_source=strip_jsonp)['result
']
490 '_type
': 'url_transparent
',
491 'url
': smuggle_url(episode['webplay_url
'], {'no_bangumi_tip': 1}),
492 'ie_key
': BiliBiliIE.ie_key(),
493 'timestamp
': parse_iso8601(episode.get('update_time
'), delimiter=' '),
494 'episode
': episode.get('index_title
'),
495 'episode_number
': int_or_none(episode.get('index
')),
496 } for episode in season_info['episodes
']]
498 entries = sorted(entries, key=lambda entry: entry.get('episode_number
'))
500 return self.playlist_result(
502 season_info.get('bangumi_title
'), season_info.get('evaluate
'))
505 class BilibiliChannelIE(InfoExtractor):
506 _VALID_URL = r'https?
://space
.bilibili\
.com
/(?P
<id>\d
+)'
507 _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
509 'url
': 'https
://space
.bilibili
.com
/3985676/video
',
511 'playlist_mincount
': 112,
514 def _entries(self, list_id):
515 count, max_count = 0, None
517 for page_num in itertools.count(1):
518 data = self._download_json(
519 self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}
')['data
']
521 max_count = max_count or traverse_obj(data, ('page
', 'count
'))
523 entries = traverse_obj(data, ('list', 'vlist
'))
526 for entry in entries:
527 yield self.url_result(
528 'https
://www
.bilibili
.com
/video
/%s' % entry['bvid
'],
529 BiliBiliIE.ie_key(), entry['bvid
'])
531 count += len(entries)
532 if max_count and count >= max_count:
535 def _real_extract(self, url):
536 list_id = self._match_id(url)
537 return self.playlist_result(self._entries(list_id), list_id)
540 class BilibiliCategoryIE(InfoExtractor):
541 IE_NAME = 'Bilibili category extractor
'
542 _MAX_RESULTS = 1000000
543 _VALID_URL = r'https?
://www\
.bilibili\
.com
/v
/[a
-zA
-Z
]+\
/[a
-zA
-Z
]+'
545 'url
': 'https
://www
.bilibili
.com
/v
/kichiku
/mad
',
547 'id': 'kichiku
: mad
',
548 'title
': 'kichiku
: mad
'
550 'playlist_mincount
': 45,
556 def _fetch_page(self, api_url, num_pages, query, page_num):
557 parsed_json = self._download_json(
558 api_url, query, query={'Search_key': query, 'pn': page_num},
559 note='Extracting results
from page
%s of
%s' % (page_num, num_pages))
561 video_list = traverse_obj(parsed_json, ('data
', 'archives
'), expected_type=list)
563 raise ExtractorError('Failed to retrieve video
list for page
%d' % page_num)
565 for video in video_list:
566 yield self.url_result(
567 'https
://www
.bilibili
.com
/video
/%s' % video['bvid
'], 'BiliBili
', video['bvid
'])
569 def _entries(self, category, subcategory, query):
570 # map of categories : subcategories : RIDs
574 'manual_vocaloid
': 126,
581 if category not in rid_map:
582 raise ExtractorError(
583 f'The category {category} isn
\'t supported
. Supported categories
: {list(rid_map.keys())}
')
584 if subcategory not in rid_map[category]:
585 raise ExtractorError(
586 f'The subcategory {subcategory} isn
\'t supported
for this category
. Supported subcategories
: {list(rid_map[category].keys())}
')
587 rid_value = rid_map[category][subcategory]
589 api_url = 'https
://api
.bilibili
.com
/x
/web
-interface
/newlist?rid
=%d&type=1&ps
=20&jsonp
=jsonp
' % rid_value
590 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
591 page_data = traverse_obj(page_json, ('data
', 'page
'), expected_type=dict)
592 count, size = int_or_none(page_data.get('count
')), int_or_none(page_data.get('size
'))
593 if count is None or not size:
594 raise ExtractorError('Failed to calculate either page count
or size
')
596 num_pages = math.ceil(count / size)
598 return OnDemandPagedList(functools.partial(
599 self._fetch_page, api_url, num_pages, query), size)
601 def _real_extract(self, url):
602 u = compat_urllib_parse_urlparse(url)
603 category, subcategory = u.path.split('/')[2:4]
604 query = '%s: %s' % (category, subcategory)
606 return self.playlist_result(self._entries(category, subcategory, query), query, query)
609 class BiliBiliSearchIE(SearchInfoExtractor):
610 IE_DESC = 'Bilibili video search
'
611 _MAX_RESULTS = 100000
612 _SEARCH_KEY = 'bilisearch
'
614 def _search_results(self, query):
615 for page_num in itertools.count(1):
616 videos = self._download_json(
617 'https
://api
.bilibili
.com
/x
/web
-interface
/search
/type', query,
618 note=f'Extracting results
from page {page_num}
', query={
625 '__refresh__
': 'true
',
626 'search_type
': 'video
',
629 })['data
'].get('result
')
633 yield self.url_result(video['arcurl
'], 'BiliBili
', str(video['aid
']))
636 class BilibiliAudioBaseIE(InfoExtractor):
637 def _call_api(self, path, sid, query=None):
640 return self._download_json(
641 'https
://www
.bilibili
.com
/audio
/music
-service
-c
/web
/' + path,
642 sid, query=query)['data
']
645 class BilibiliAudioIE(BilibiliAudioBaseIE):
646 _VALID_URL = r'https?
://(?
:www\
.)?bilibili\
.com
/audio
/au(?P
<id>\d
+)'
648 'url
': 'https
://www
.bilibili
.com
/audio
/au1003142
',
649 'md5
': 'fec4987014ec94ef9e666d4d158ad03b
',
653 'title
': '【tsukimi】YELLOW
/ 神山羊
',
655 'comment_count
': int,
656 'description
': 'YELLOW的mp3版!
',
663 'thumbnail
': r're
:^https?
://.+\
.jpg
',
664 'timestamp
': 1564836614,
665 'upload_date
': '20190803',
666 'uploader
': 'tsukimi
-つきみぐー
',
671 def _real_extract(self, url):
672 au_id = self._match_id(url)
674 play_data = self._call_api('url
', au_id)
676 'url
': play_data['cdns
'][0],
677 'filesize
': int_or_none(play_data.get('size
')),
681 for a_format in formats:
682 a_format.setdefault('http_headers
', {}).update({
686 song = self._call_api('song
/info
', au_id)
687 title = song['title
']
688 statistic = song.get('statistic
') or {}
691 lyric = song.get('lyric
')
703 'artist
': song.get('author
'),
704 'comment_count
': int_or_none(statistic.get('comment
')),
705 'description
': song.get('intro
'),
706 'duration
': int_or_none(song.get('duration
')),
707 'subtitles
': subtitles,
708 'thumbnail
': song.get('cover
'),
709 'timestamp
': int_or_none(song.get('passtime
')),
710 'uploader
': song.get('uname
'),
711 'view_count
': int_or_none(statistic.get('play
')),
715 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
716 _VALID_URL = r'https?
://(?
:www\
.)?bilibili\
.com
/audio
/am(?P
<id>\d
+)'
718 'url
': 'https
://www
.bilibili
.com
/audio
/am10624
',
721 'title
': '每日新曲推荐(每日
11:00更新)
',
722 'description
': '每天
11:00更新,为你推送最新音乐
',
724 'playlist_count
': 19,
727 def _real_extract(self, url):
728 am_id = self._match_id(url)
730 songs = self._call_api(
731 'song
/of
-menu
', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data
']
735 sid = str_or_none(song.get('id'))
738 entries.append(self.url_result(
739 'https
://www
.bilibili
.com
/audio
/au
' + sid,
740 BilibiliAudioIE.ie_key(), sid))
743 album_data = self._call_api('menu
/info
', am_id) or {}
744 album_title = album_data.get('title
')
746 for entry in entries:
747 entry['album
'] = album_title
748 return self.playlist_result(
749 entries, am_id, album_title, album_data.get('intro
'))
751 return self.playlist_result(entries, am_id)
754 class BiliBiliPlayerIE(InfoExtractor):
755 _VALID_URL = r'https?
://player\
.bilibili\
.com
/player\
.html
\?.*?
\baid
=(?P
<id>\d
+)'
757 'url
': 'http
://player
.bilibili
.com
/player
.html?aid
=92494333&cid
=157926707&page
=1',
758 'only_matching
': True,
761 def _real_extract(self, url):
762 video_id = self._match_id(url)
763 return self.url_result(
764 'http
://www
.bilibili
.tv
/video
/av
%s/' % video_id,
765 ie=BiliBiliIE.ie_key(), video_id=video_id)
768 class BiliIntlBaseIE(InfoExtractor):
769 _API_URL = 'https
://api
.bilibili
.tv
/intl
/gateway
'
770 _NETRC_MACHINE = 'biliintl
'
772 def _call_api(self, endpoint, *args, **kwargs):
773 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
775 if json['code
'] in (10004004, 10004005, 10023006):
776 self.raise_login_required()
777 elif json['code
'] == 10004001:
778 self.raise_geo_restricted()
780 if json.get('message
') and str(json['code
']) != json['message
']:
781 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}
: {self.IE_NAME} said
: {json["message"]}
'
783 errmsg = kwargs.get('errnote
', 'Unable to download JSON metadata
')
784 if kwargs.get('fatal
'):
785 raise ExtractorError(errmsg)
787 self.report_warning(errmsg)
788 return json.get('data
')
790 def json2srt(self, json):
792 f'{i + 1}
\n{srt_subtitles_timecode(line["from"])}
--> {srt_subtitles_timecode(line["to"])}
\n{line["content"]}
'
793 for i, line in enumerate(traverse_obj(json, (
794 'body
', lambda _, l: l['content
'] and l['from'] and l['to
']))))
797 def _get_subtitles(self, *, ep_id=None, aid=None):
798 sub_json = self._call_api(
799 '/web
/v2
/subtitle
', ep_id or aid, fatal=False,
800 note='Downloading subtitles
list', errnote='Unable to download subtitles
list',
808 for sub in sub_json.get('subtitles
') or []:
809 sub_url = sub.get('url
')
812 sub_data = self._download_json(
813 sub_url, ep_id or aid, errnote='Unable to download subtitles
', fatal=False,
814 note='Downloading subtitles
%s' % f' for {sub["lang"]}
' if sub.get('lang
') else '')
817 subtitles.setdefault(sub.get('lang_key
', 'en
'), []).append({
819 'data
': self.json2srt(sub_data)
823 def _get_formats(self, *, ep_id=None, aid=None):
824 video_json = self._call_api(
825 '/web
/playurl
', ep_id or aid, note='Downloading video formats
',
826 errnote='Unable to download video formats
', query=filter_dict({
831 video_json = video_json['playurl
']
833 for vid in video_json.get('video
') or []:
834 video_res = vid.get('video_resource
') or {}
835 video_info = vid.get('stream_info
') or {}
836 if not video_res.get('url
'):
839 'url
': video_res['url
'],
841 'format_note
': video_info.get('desc_words
'),
842 'width
': video_res.get('width
'),
843 'height
': video_res.get('height
'),
844 'vbr
': video_res.get('bandwidth
'),
846 'vcodec
': video_res.get('codecs
'),
847 'filesize
': video_res.get('size
'),
849 for aud in video_json.get('audio_resource
') or []:
850 if not aud.get('url
'):
855 'abr
': aud.get('bandwidth
'),
856 'acodec
': aud.get('codecs
'),
858 'filesize
': aud.get('size
'),
861 self._sort_formats(formats)
864 def _extract_video_info(self, video_data, *, ep_id=None, aid=None):
867 'title
': video_data.get('title_display
') or video_data.get('title
'),
868 'thumbnail
': video_data.get('cover
'),
869 'episode_number
': int_or_none(self._search_regex(
870 r'^
E(\d
+)(?
:$|
- )', video_data.get('title_display
') or '', 'episode number
', default=None)),
871 'formats
': self._get_formats(ep_id=ep_id, aid=aid),
872 'subtitles
': self._get_subtitles(ep_id=ep_id, aid=aid),
873 'extractor_key
': BiliIntlIE.ie_key(),
876 def _perform_login(self, username, password):
878 from Cryptodome.PublicKey import RSA
879 from Cryptodome.Cipher import PKCS1_v1_5
882 from Crypto.PublicKey import RSA
883 from Crypto.Cipher import PKCS1_v1_5
885 raise ExtractorError('pycryptodomex
not found
. Please install
', expected=True)
887 key_data = self._download_json(
888 'https
://passport
.bilibili
.tv
/x
/intl
/passport
-login
/web
/key?lang
=en
-US
', None,
889 note='Downloading login key
', errnote='Unable to download login key
')['data
']
891 public_key = RSA.importKey(key_data['key
'])
892 password_hash = PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf
-8'))
893 login_post = self._download_json(
894 'https
://passport
.bilibili
.tv
/x
/intl
/passport
-login
/web
/login
/password?lang
=en
-US
', None, data=urlencode_postdata({
895 'username
': username,
896 'password
': base64.b64encode(password_hash).decode('ascii
'),
900 }), note='Logging
in', errnote='Unable to log
in')
901 if login_post.get('code
'):
902 if login_post.get('message
'):
903 raise ExtractorError(f'Unable to log
in: {self.IE_NAME} said
: {login_post["message"]}
', expected=True)
905 raise ExtractorError('Unable to log
in')
908 class BiliIntlIE(BiliIntlBaseIE):
909 _VALID_URL = r'https?
://(?
:www\
.)?
bili(?
:bili\
.tv|intl\
.com
)/(?
:[a
-z
]{2}
/)?
(play
/(?P
<season_id
>\d
+)/(?P
<ep_id
>\d
+)|video
/(?P
<aid
>\d
+))'
912 'url
': 'https
://www
.bilibili
.tv
/en
/play
/34613/341736',
916 'title
': 'E2
- The First Night
',
917 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
922 'url
': 'https
://www
.bilibili
.tv
/en
/play
/1033760/11005006',
926 'title
': 'E3
- Who?
',
927 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
931 # Subtitle with empty content
932 'url
': 'https
://www
.bilibili
.tv
/en
/play
/1005144/10131790',
936 'title
': 'E140
- Two Heartbeats
: Kabuto
\'s Trap
',
937 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
938 'episode_number
': 140,
940 'skip
': 'According to the copyright owner
\'s request
, you may only watch the video after you log
in.'
942 'url
': 'https
://www
.biliintl
.com
/en
/play
/34613/341736',
943 'only_matching
': True,
945 # User-generated content (as opposed to a series licensed from a studio)
946 'url
': 'https
://bilibili
.tv
/en
/video
/2019955076',
947 'only_matching
': True,
950 'url
': 'https
://www
.bilibili
.tv
/video
/2019955076',
951 'only_matching
': True,
954 def _real_extract(self, url):
955 season_id, ep_id, aid = self._match_valid_url(url).group('season_id
', 'ep_id
', 'aid
')
956 video_id = ep_id or aid
957 webpage = self._download_webpage(url, video_id)
960 self._search_json(r'window\
.__INITIAL
_(?
:DATA|STATE
)__\s
*=', webpage, 'preload state
', video_id, default={})
961 or self._search_nuxt_data(webpage, video_id, '__initialState
', fatal=False, traverse=None))
962 video_data = traverse_obj(
963 initial_data, ('OgvVideo
', 'epDetail
'), ('UgcVideo
', 'videoData
'), ('ugc
', 'archive
'), expected_type=dict)
965 if season_id and not video_data:
966 # Non-Bstation layout, read through episode list
967 season_json = self._call_api(f'/web
/v2
/ogv
/play
/episodes?season_id
={season_id}
&platform
=web
', video_id)
968 video_data = traverse_obj(season_json,
969 ('sections
', ..., 'episodes
', lambda _, v: str(v['episode_id
']) == ep_id),
970 expected_type=dict, get_all=False)
971 return self._extract_video_info(video_data or {}, ep_id=ep_id, aid=aid)
974 class BiliIntlSeriesIE(BiliIntlBaseIE):
975 _VALID_URL = r'https?
://(?
:www\
.)?
bili(?
:bili\
.tv|intl\
.com
)/(?
:[a
-z
]{2}
/)?play
/(?P
<id>\d
+)$
'
977 'url
': 'https
://www
.bilibili
.tv
/en
/play
/34613',
978 'playlist_mincount
': 15,
981 'title
': 'Fly Me to the Moon
',
982 'description
': 'md5
:a861ee1c4dc0acfad85f557cc42ac627
',
983 'categories
': ['Romance
', 'Comedy
', 'Slice of life
'],
984 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
988 'skip_download
': True,
991 'url
': 'https
://www
.biliintl
.com
/en
/play
/34613',
992 'only_matching
': True,
995 def _entries(self, series_id):
996 series_json = self._call_api(f'/web
/v2
/ogv
/play
/episodes?season_id
={series_id}
&platform
=web
', series_id)
997 for episode in traverse_obj(series_json, ('sections
', ..., 'episodes
', ...), expected_type=dict, default=[]):
998 episode_id = str(episode.get('episode_id
'))
999 yield self._extract_video_info(episode, ep_id=episode_id)
1001 def _real_extract(self, url):
1002 series_id = self._match_id(url)
1003 series_info = self._call_api(f'/web
/v2
/ogv
/play
/season_info?season_id
={series_id}
&platform
=web
', series_id).get('season
') or {}
1004 return self.playlist_result(
1005 self._entries(series_id), series_id, series_info.get('title
'), series_info.get('description
'),
1006 categories=traverse_obj(series_info, ('styles
', ..., 'title
'), expected_type=str_or_none),
1007 thumbnail=url_or_none(series_info.get('horizontal_cover
')), view_count=parse_count(series_info.get('view
')))
1010 class BiliLiveIE(InfoExtractor):
1011 _VALID_URL = r'https?
://live
.bilibili
.com
/(?P
<id>\d
+)'
1014 'url
': 'https
://live
.bilibili
.com
/196',
1017 'description
': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1019 'title
': "太空狼人杀联动,不被爆杀就算赢",
1020 'thumbnail
': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1021 'timestamp
': 1650802769,
1025 'url
': 'https
://live
.bilibili
.com
/196?broadcast_type
=0&is_room_feed
=1?spm_id_from
=333.999.space_home
.strengthen_live_card
.click
',
1026 'only_matching
': True
1030 80: {'format_id': 'low', 'format_note': '流畅'},
1031 150: {'format_id': 'high_res', 'format_note': '高清'},
1032 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1033 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1034 10000: {'format_id': 'source', 'format_note': '原画'},
1035 20000: {'format_id': '4K', 'format_note': '4K'},
1036 30000: {'format_id': 'dolby', 'format_note': '杜比'},
1039 _quality = staticmethod(qualities(list(_FORMATS)))
1041 def _call_api(self, path, room_id, query):
1042 api_result = self._download_json(f'https
://api
.live
.bilibili
.com
/{path}
', room_id, query=query)
1043 if api_result.get('code
') != 0:
1044 raise ExtractorError(api_result.get('message
') or 'Unable to download JSON metadata
')
1045 return api_result.get('data
') or {}
1047 def _parse_formats(self, qn, fmt):
1048 for codec in fmt.get('codec
') or []:
1049 if codec.get('current_qn
') != qn:
1051 for url_info in codec['url_info
']:
1053 'url
': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}
',
1054 'ext
': fmt.get('format_name
'),
1055 'vcodec
': codec.get('codec_name
'),
1056 'quality
': self._quality(qn),
1057 **self._FORMATS[qn],
1060 def _real_extract(self, url):
1061 room_id = self._match_id(url)
1062 room_data = self._call_api('room
/v1
/Room
/get_info
', room_id, {'id': room_id})
1063 if room_data.get('live_status
') == 0:
1064 raise ExtractorError('Streamer
is not live
', expected=True)
1067 for qn in self._FORMATS.keys():
1068 stream_data = self._call_api('xlive
/web
-room
/v2
/index
/getRoomPlayInfo
', room_id, {
1078 for fmt in traverse_obj(stream_data, ('playurl_info
', 'playurl
', 'stream
', ..., 'format
', ...)) or []:
1079 formats.extend(self._parse_formats(qn, fmt))
1080 self._sort_formats(formats)
1084 'title
': room_data.get('title
'),
1085 'description
': room_data.get('description
'),
1086 'thumbnail
': room_data.get('user_cover
'),
1087 'timestamp
': stream_data.get('live_time
'),