10 from .common
import InfoExtractor
, SearchInfoExtractor
11 from ..dependencies
import Cryptodome
12 from ..networking
.exceptions
import HTTPError
31 srt_subtitles_timecode
,
43 class BilibiliBaseIE(InfoExtractor
):
44 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
46 def extract_formats(self
, play_info
):
48 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
49 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
52 audios
= traverse_obj(play_info
, ('dash', (None, 'dolby'), 'audio', ..., {dict}
))
53 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
55 audios
.append(flac_audio
)
57 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
58 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
59 'acodec': traverse_obj(audio
, ('codecs', {str.lower}
)),
61 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
62 'filesize': int_or_none(audio
.get('size')),
63 'format_id': str_or_none(audio
.get('id')),
64 } for audio
in audios
]
67 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
68 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
69 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
70 'width': int_or_none(video
.get('width')),
71 'height': int_or_none(video
.get('height')),
72 'vcodec': video
.get('codecs'),
73 'acodec': 'none' if audios
else None,
74 'dynamic_range': {126: 'DV', 125: 'HDR10'}
.get(int_or_none(video
.get('id'))),
75 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
76 'filesize': int_or_none(video
.get('size')),
77 'quality': int_or_none(video
.get('id')),
78 'format_id': traverse_obj(
79 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
80 ('id', {str_or_none}
), get_all
=False),
81 'format': format_names
.get(video
.get('id')),
82 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
84 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
86 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
87 f
'you have to login or become premium member to download them. {self._login_hint()}')
91 def json2srt(self
, json_data
):
93 for idx
, line
in enumerate(json_data
.get('body') or []):
94 srt_data
+= (f
'{idx + 1}\n'
95 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
96 f
'{line["content"]}\n\n')
99 def _get_subtitles(self
, video_id
, aid
, cid
):
103 'url': f
'https://comment.bilibili.com/{cid}.xml',
107 video_info_json
= self
._download
_json
(f
'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id
)
108 for s
in traverse_obj(video_info_json
, ('data', 'subtitle', 'subtitles', ...)):
109 subtitles
.setdefault(s
['lan'], []).append({
111 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
))
115 def _get_chapters(self
, aid
, cid
):
116 chapters
= aid
and cid
and self
._download
_json
(
117 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
118 note
='Extracting chapters', fatal
=False)
119 return traverse_obj(chapters
, ('data', 'view_points', ..., {
121 'start_time': 'from',
125 def _get_comments(self
, aid
):
126 for idx
in itertools
.count(1):
127 replies
= traverse_obj(
129 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
130 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
134 for children
in map(self
._get
_all
_children
, replies
):
137 def _get_all_children(self
, reply
):
139 'author': traverse_obj(reply
, ('member', 'uname')),
140 'author_id': traverse_obj(reply
, ('member', 'mid')),
141 'id': reply
.get('rpid'),
142 'text': traverse_obj(reply
, ('content', 'message')),
143 'timestamp': reply
.get('ctime'),
144 'parent': reply
.get('parent') or 'root',
146 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
149 def _get_episodes_from_season(self
, ss_id
, url
):
150 season_info
= self
._download
_json
(
151 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
152 note
='Downloading season info', query
={'season_id': ss_id}
,
153 headers
={'Referer': url, **self.geo_verification_headers()}
)
155 for entry
in traverse_obj(season_info
, (
156 'result', 'main_section', 'episodes',
157 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
158 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, f
'ep{entry["id"]}')
161 class BiliBiliIE(BilibiliBaseIE
):
162 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
165 'url': 'https://www.bilibili.com/video/BV13x41117TL',
167 'id': 'BV13x41117TL',
168 'title': '阿滴英文|英文歌分享#6 "Closer',
170 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
171 'uploader_id': '65880958',
173 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
176 'comment_count': int,
177 'upload_date': '20170301',
178 'timestamp': 1488353834,
184 'url': 'http://www.bilibili.com/video/av1074402/',
186 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
189 'uploader_id': '156160',
190 'id': 'BV11x411K7CN',
193 'upload_date': '20140420',
194 'timestamp': 1397983878,
195 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
197 'comment_count': int,
201 'params': {'skip_download': True}
,
204 'url': 'https://www.bilibili.com/video/BV1bK411W797',
206 'id': 'BV1bK411W797',
207 'title': '物语中的人物是如何吐槽自己的OP的'
209 'playlist_count': 18,
212 'id': 'BV1bK411W797_p1',
214 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
216 'timestamp': 1589601697,
217 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
218 'uploader': '打牌还是打桩',
219 'uploader_id': '150259984',
221 'comment_count': int,
222 'upload_date': '20200516',
224 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
229 'note': 'Specific page of Anthology',
230 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
232 'id': 'BV1bK411W797_p1',
234 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
236 'timestamp': 1589601697,
237 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
238 'uploader': '打牌还是打桩',
239 'uploader_id': '150259984',
241 'comment_count': int,
242 'upload_date': '20200516',
244 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
248 'note': 'video has subtitles',
249 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
251 'id': 'BV12N4y1M7rh',
253 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
255 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
257 'upload_date': '20220709',
259 'timestamp': 1657347907,
260 'uploader_id': '1326814124',
261 'comment_count': int,
264 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
265 'subtitles': 'count:2'
267 'params': {'listsubtitles': True}
,
269 'url': 'https://www.bilibili.com/video/av8903802/',
271 'id': 'BV13x41117TL',
273 'title': '阿滴英文|英文歌分享#6 "Closer',
274 'upload_date': '20170301',
275 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
276 'timestamp': 1488353834,
277 'uploader_id': '65880958',
279 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
282 'comment_count': int,
287 'skip_download': True,
290 'note': 'video has chapter',
291 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
293 'id': 'BV1vL411G7N7',
295 'title': '如何为你的B站视频添加进度条分段',
296 'timestamp': 1634554558,
297 'upload_date': '20211018',
298 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
300 'uploader': '爱喝咖啡的当麻',
302 'uploader_id': '1680903',
303 'chapters': 'count:6',
304 'comment_count': int,
307 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
309 'params': {'skip_download': True}
,
311 'note': 'video redirects to festival page',
312 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
314 'id': 'BV1wP4y1P72h',
316 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
317 'timestamp': 1643947497,
318 'upload_date': '20220204',
319 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
320 'uploader': '叨叨冯聊音乐',
322 'uploader_id': '528182630',
325 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
327 'params': {'skip_download': True}
,
329 'note': 'newer festival video',
330 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
332 'id': 'BV1ay4y1d77f',
334 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
335 'timestamp': 1674273600,
336 'upload_date': '20230121',
337 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
339 'duration': 1111.722,
340 'uploader_id': '8469526',
343 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
345 'params': {'skip_download': True}
,
348 def _real_extract(self
, url
):
349 video_id
= self
._match
_id
(url
)
350 webpage
= self
._download
_webpage
(url
, video_id
)
351 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
353 is_festival
= 'videoData' not in initial_state
355 video_data
= initial_state
['videoInfo']
357 play_info
= self
._search
_json
(r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
)['data']
358 video_data
= initial_state
['videoData']
360 video_id
, title
= video_data
['bvid'], video_data
.get('title')
362 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
363 page_list_json
= not is_festival
and traverse_obj(
365 'https://api.bilibili.com/x/player/pagelist', video_id
,
366 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
367 note
='Extracting videos in anthology'),
368 'data', expected_type
=list) or []
369 is_anthology
= len(page_list_json
) > 1
371 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
372 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
373 return self
.playlist_from_matches(
374 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
375 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
378 part_id
= part_id
or 1
379 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
381 aid
= video_data
.get('aid')
382 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
384 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
388 play_info
= self
._download
_json
(
389 'https://api.bilibili.com/x/player/playurl', video_id
,
390 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
391 note
='Extracting festival video formats')['data']
393 festival_info
= traverse_obj(initial_state
, {
394 'uploader': ('videoInfo', 'upName'),
395 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
396 'like_count': ('videoStatus', 'like', {int_or_none}
),
397 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
401 **traverse_obj(initial_state
, {
402 'uploader': ('upData', 'name'),
403 'uploader_id': ('upData', 'mid', {str_or_none}
),
404 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
405 'tags': ('tags', ..., 'tag_name'),
406 'thumbnail': ('videoData', 'pic', {url_or_none}
),
409 **traverse_obj(video_data
, {
410 'description': 'desc',
411 'timestamp': ('pubdate', {int_or_none}
),
412 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
413 'comment_count': ('stat', 'reply', {int_or_none}
),
415 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
416 'formats': self
.extract_formats(play_info
),
417 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
419 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
420 'chapters': self
._get
_chapters
(aid
, cid
),
421 'subtitles': self
.extract_subtitles(video_id
, aid
, cid
),
422 '__post_extractor': self
.extract_comments(aid
),
423 'http_headers': {'Referer': url}
,
427 class BiliBiliBangumiIE(BilibiliBaseIE
):
428 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
431 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
438 'season_id': '26801',
441 'episode_id': '267851',
444 'duration': 1425.256,
445 'timestamp': 1554566400,
446 'upload_date': '20190406',
447 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
449 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
452 def _real_extract(self
, url
):
453 video_id
= self
._match
_id
(url
)
454 episode_id
= video_id
[2:]
455 webpage
= self
._download
_webpage
(url
, video_id
)
457 if '您所在的地区无法观看本片' in webpage
:
458 raise GeoRestrictedError('This video is restricted')
459 elif '正在观看预览,大会员免费看全片' in webpage
:
460 self
.raise_login_required('This video is for premium members only')
462 headers
= {'Referer': url, **self.geo_verification_headers()}
463 play_info
= self
._download
_json
(
464 'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id
,
465 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
467 premium_only
= play_info
.get('code') == -10403
468 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
470 formats
= self
.extract_formats(play_info
)
471 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
472 self
.raise_login_required('This video is for premium members only')
474 bangumi_info
= self
._download
_json
(
475 'https://api.bilibili.com/pgc/view/web/season', video_id
, 'Get episode details',
476 query
={'ep_id': episode_id}
, headers
=headers
)['result']
478 episode_number
, episode_info
= next((
479 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
480 bangumi_info
, ('episodes', ..., {dict}
)), 1)
481 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
483 season_id
= bangumi_info
.get('season_id')
484 season_number
= season_id
and next((
485 idx
+ 1 for idx
, e
in enumerate(
486 traverse_obj(bangumi_info
, ('seasons', ...)))
487 if e
.get('season_id') == season_id
490 aid
= episode_info
.get('aid')
495 **traverse_obj(bangumi_info
, {
496 'series': ('series', 'series_title', {str}
),
497 'series_id': ('series', 'series_id', {str_or_none}
),
498 'thumbnail': ('square_cover', {url_or_none}
),
500 'title': join_nonempty('title', 'long_title', delim
=' ', from_dict
=episode_info
),
501 'episode': episode_info
.get('long_title'),
502 'episode_id': episode_id
,
503 'episode_number': int_or_none(episode_info
.get('title')) or episode_number
,
504 'season_id': str_or_none(season_id
),
505 'season_number': season_number
,
506 'timestamp': int_or_none(episode_info
.get('pub_time')),
507 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
508 'subtitles': self
.extract_subtitles(video_id
, aid
, episode_info
.get('cid')),
509 '__post_extractor': self
.extract_comments(aid
),
510 'http_headers': headers
,
514 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
515 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
517 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
521 'playlist_mincount': 25,
524 def _real_extract(self
, url
):
525 media_id
= self
._match
_id
(url
)
526 webpage
= self
._download
_webpage
(url
, media_id
)
527 ss_id
= self
._search
_json
(
528 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)['mediaInfo']['season_id']
530 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
)
533 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
534 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
536 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
540 'playlist_mincount': 26
543 def _real_extract(self
, url
):
544 ss_id
= self
._match
_id
(url
)
546 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
)
549 class BilibiliSpaceBaseIE(InfoExtractor
):
550 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
551 first_page
= fetch_page(0)
552 metadata
= get_metadata(first_page
)
554 paged_list
= InAdvancePagedList(
555 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
556 metadata
['page_count'], metadata
['page_size'])
558 return metadata
, paged_list
561 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
562 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
564 'url': 'https://space.bilibili.com/3985676/video',
568 'playlist_mincount': 178,
570 'url': 'https://space.bilibili.com/313580179/video',
574 'playlist_mincount': 92,
577 def _extract_signature(self
, playlist_id
):
578 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
580 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
581 img_key
= traverse_obj(
582 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
583 sub_key
= traverse_obj(
584 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
586 session_key
= img_key
+ sub_key
588 signature_values
= []
590 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
591 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
592 57, 62, 11, 36, 20, 34, 44, 52
594 char_at_position
= try_call(lambda: session_key
[position
])
596 signature_values
.append(char_at_position
)
598 return ''.join(signature_values
)[:32]
600 def _real_extract(self
, url
):
601 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
603 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
604 'To download audios, add a "/audio" to the URL')
606 signature
= self
._extract
_signature
(playlist_id
)
608 def fetch_page(page_idx
):
613 'order_avoided': 'true',
618 'web_location': 1550101,
619 'wts': int(time
.time()),
621 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
624 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
625 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
)
626 except ExtractorError
as e
:
627 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
628 raise ExtractorError(
629 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
631 if response
['code'] == -401:
632 raise ExtractorError(
633 'Request is blocked by server (401), please add cookies, wait and try later.', expected
=True)
634 return response
['data']
636 def get_metadata(page_data
):
637 page_size
= page_data
['page']['ps']
638 entry_count
= page_data
['page']['count']
640 'page_count': math
.ceil(entry_count
/ page_size
),
641 'page_size': page_size
,
644 def get_entries(page_data
):
645 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
646 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
648 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
649 return self
.playlist_result(paged_list
, playlist_id
)
652 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
653 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
655 'url': 'https://space.bilibili.com/313580179/audio',
659 'playlist_mincount': 1,
662 def _real_extract(self
, url
):
663 playlist_id
= self
._match
_id
(url
)
665 def fetch_page(page_idx
):
666 return self
._download
_json
(
667 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
668 note
=f
'Downloading page {page_idx}',
669 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
671 def get_metadata(page_data
):
673 'page_count': page_data
['pageCount'],
674 'page_size': page_data
['pageSize'],
677 def get_entries(page_data
):
678 for entry
in page_data
.get('data', []):
679 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
681 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
682 return self
.playlist_result(paged_list
, playlist_id
)
685 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
686 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
687 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
688 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
690 def _get_uploader(self
, uid
, playlist_id
):
691 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
692 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
694 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
695 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
696 metadata
.pop('page_count', None)
697 metadata
.pop('page_size', None)
698 return metadata
, page_list
701 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
702 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
704 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
706 'id': '2142762_57445',
707 'title': '【完结】《底特律 变人》全结局流程解说',
710 'uploader_id': '2142762',
713 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
715 'playlist_mincount': 31,
718 def _real_extract(self
, url
):
719 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
720 playlist_id
= f
'{mid}_{sid}'
722 def fetch_page(page_idx
):
723 return self
._download
_json
(
724 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
725 playlist_id
, note
=f
'Downloading page {page_idx}',
726 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
728 def get_metadata(page_data
):
729 page_size
= page_data
['page']['page_size']
730 entry_count
= page_data
['page']['total']
732 'page_count': math
.ceil(entry_count
/ page_size
),
733 'page_size': page_size
,
734 'uploader': self
._get
_uploader
(mid
, playlist_id
),
735 **traverse_obj(page_data
, {
736 'title': ('meta', 'name', {str}
),
737 'description': ('meta', 'description', {str}
),
738 'uploader_id': ('meta', 'mid', {str_or_none}
),
739 'timestamp': ('meta', 'ptime', {int_or_none}
),
740 'thumbnail': ('meta', 'cover', {url_or_none}
),
744 def get_entries(page_data
):
745 return self
._get
_entries
(page_data
, 'archives')
747 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
748 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
751 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
752 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
754 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
756 'id': '1958703906_547718',
758 'description': '直播回放',
759 'uploader': '靡烟miya',
760 'uploader_id': '1958703906',
761 'timestamp': 1637985853,
762 'upload_date': '20211127',
763 'modified_timestamp': int,
764 'modified_date': str,
766 'playlist_mincount': 513,
769 def _real_extract(self
, url
):
770 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
771 playlist_id
= f
'{mid}_{sid}'
772 playlist_meta
= traverse_obj(self
._download
_json
(
773 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False
775 'title': ('data', 'meta', 'name', {str}
),
776 'description': ('data', 'meta', 'description', {str}
),
777 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
778 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
779 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
782 def fetch_page(page_idx
):
783 return self
._download
_json
(
784 'https://api.bilibili.com/x/series/archives',
785 playlist_id
, note
=f
'Downloading page {page_idx}',
786 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
788 def get_metadata(page_data
):
789 page_size
= page_data
['page']['size']
790 entry_count
= page_data
['page']['total']
792 'page_count': math
.ceil(entry_count
/ page_size
),
793 'page_size': page_size
,
794 'uploader': self
._get
_uploader
(mid
, playlist_id
),
798 def get_entries(page_data
):
799 return self
._get
_entries
(page_data
, 'archives')
801 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
802 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
805 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
806 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
808 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
814 'uploader_id': '84912',
815 'timestamp': 1604905176,
816 'upload_date': '20201109',
817 'modified_timestamp': int,
818 'modified_date': str,
819 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
823 'playlist_mincount': 22,
825 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
826 'only_matching': True,
829 def _real_extract(self
, url
):
830 fid
= self
._match
_id
(url
)
832 list_info
= self
._download
_json
(
833 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
834 fid
, note
='Downloading favlist metadata')
835 if list_info
['code'] == -403:
836 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
838 entries
= self
._get
_entries
(self
._download
_json
(
839 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
840 fid
, note
='Download favlist entries'), 'data')
842 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
843 'title': ('title', {str}
),
844 'description': ('intro', {str}
),
845 'uploader': ('upper', 'name', {str}
),
846 'uploader_id': ('upper', 'mid', {str_or_none}
),
847 'timestamp': ('ctime', {int_or_none}
),
848 'modified_timestamp': ('mtime', {int_or_none}
),
849 'thumbnail': ('cover', {url_or_none}
),
850 'view_count': ('cnt_info', 'play', {int_or_none}
),
851 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
855 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
856 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
858 'url': 'https://www.bilibili.com/watchlater/#/list',
859 'info_dict': {'id': 'watchlater'}
,
860 'playlist_mincount': 0,
861 'skip': 'login required',
864 def _real_extract(self
, url
):
865 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
866 watchlater_info
= self
._download
_json
(
867 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
868 if watchlater_info
['code'] == -101:
869 self
.raise_login_required(msg
='You need to login to access your watchlater list')
870 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
871 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
874 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
875 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
877 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
881 'uploader': '靡烟miya',
882 'uploader_id': '1958703906',
883 'timestamp': 1637985853,
884 'upload_date': '20211127',
886 'playlist_mincount': 513,
888 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
892 'playlist_mincount': 513,
893 'skip': 'redirect url',
895 'url': 'https://www.bilibili.com/list/ml1103407912',
897 'id': '3_1103407912',
900 'uploader_id': '84912',
901 'timestamp': 1604905176,
902 'upload_date': '20201109',
903 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
905 'playlist_mincount': 22,
907 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
909 'id': '3_1103407912',
911 'playlist_mincount': 22,
912 'skip': 'redirect url',
914 'url': 'https://www.bilibili.com/list/watchlater',
915 'info_dict': {'id': 'watchlater'}
,
916 'playlist_mincount': 0,
917 'skip': 'login required',
919 'url': 'https://www.bilibili.com/medialist/play/watchlater',
920 'info_dict': {'id': 'watchlater'}
,
921 'playlist_mincount': 0,
922 'skip': 'login required',
925 def _extract_medialist(self
, query
, list_id
):
926 for page_num
in itertools
.count(1):
927 page_data
= self
._download
_json
(
928 'https://api.bilibili.com/x/v2/medialist/resource/list',
929 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}'
931 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
932 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
933 if not page_data
.get('has_more', False):
936 def _real_extract(self
, url
):
937 list_id
= self
._match
_id
(url
)
938 webpage
= self
._download
_webpage
(url
, list_id
)
939 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
940 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
941 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
942 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
943 if error_code
== -400 and list_id
== 'watchlater':
944 self
.raise_login_required('You need to login to access your watchlater playlist')
945 elif error_code
== -403:
946 self
.raise_login_required('This is a private playlist. You need to login as its owner')
947 elif error_code
== 11010:
948 raise ExtractorError('Playlist is no longer available', expected
=True)
949 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
953 'with_current': False,
954 **traverse_obj(initial_state
, {
955 'type': ('playlist', 'type', {int_or_none}
),
956 'biz_id': ('playlist', 'id', {int_or_none}
),
957 'tid': ('tid', {int_or_none}
),
958 'sort_field': ('sortFiled', {int_or_none}
),
959 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
963 'id': f
'{query["type"]}_{query["biz_id"]}',
964 **traverse_obj(initial_state
, ('mediaListInfo', {
965 'title': ('title', {str}
),
966 'uploader': ('upper', 'name', {str}
),
967 'uploader_id': ('upper', 'mid', {str_or_none}
),
968 'timestamp': ('ctime', {int_or_none}
),
969 'thumbnail': ('cover', {url_or_none}
),
972 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
975 class BilibiliCategoryIE(InfoExtractor
):
976 IE_NAME
= 'Bilibili category extractor'
977 _MAX_RESULTS
= 1000000
978 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
980 'url': 'https://www.bilibili.com/v/kichiku/mad',
982 'id': 'kichiku: mad',
983 'title': 'kichiku: mad'
985 'playlist_mincount': 45,
991 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
992 parsed_json
= self
._download
_json
(
993 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
994 note
='Extracting results from page %s of %s' % (page_num
, num_pages
))
996 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
998 raise ExtractorError('Failed to retrieve video list for page %d' % page_num
)
1000 for video
in video_list
:
1001 yield self
.url_result(
1002 'https://www.bilibili.com/video/%s' % video
['bvid'], 'BiliBili', video
['bvid'])
1004 def _entries(self
, category
, subcategory
, query
):
1005 # map of categories : subcategories : RIDs
1009 'manual_vocaloid': 126,
1016 if category
not in rid_map
:
1017 raise ExtractorError(
1018 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1019 if subcategory
not in rid_map
[category
]:
1020 raise ExtractorError(
1021 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1022 rid_value
= rid_map
[category
][subcategory
]
1024 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1025 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1026 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1027 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1028 if count
is None or not size
:
1029 raise ExtractorError('Failed to calculate either page count or size')
1031 num_pages
= math
.ceil(count
/ size
)
1033 return OnDemandPagedList(functools
.partial(
1034 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1036 def _real_extract(self
, url
):
1037 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1038 query
= '%s: %s' % (category
, subcategory
)
1040 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1043 class BiliBiliSearchIE(SearchInfoExtractor
):
1044 IE_DESC
= 'Bilibili video search'
1045 _MAX_RESULTS
= 100000
1046 _SEARCH_KEY
= 'bilisearch'
1048 def _search_results(self
, query
):
1049 for page_num
in itertools
.count(1):
1050 videos
= self
._download
_json
(
1051 'https://api.bilibili.com/x/web-interface/search/type', query
,
1052 note
=f
'Extracting results from page {page_num}', query
={
1053 'Search_key': query
,
1059 '__refresh__': 'true',
1060 'search_type': 'video',
1063 })['data'].get('result')
1066 for video
in videos
:
1067 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1070 class BilibiliAudioBaseIE(InfoExtractor
):
1071 def _call_api(self
, path
, sid
, query
=None):
1073 query
= {'sid': sid}
1074 return self
._download
_json
(
1075 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1076 sid
, query
=query
)['data']
1079 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1080 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1082 'url': 'https://www.bilibili.com/audio/au1003142',
1083 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1087 'title': '【tsukimi】YELLOW / 神山羊',
1088 'artist': 'tsukimi',
1089 'comment_count': int,
1090 'description': 'YELLOW的mp3版!',
1097 'thumbnail': r
're:^https?://.+\.jpg',
1098 'timestamp': 1564836614,
1099 'upload_date': '20190803',
1100 'uploader': 'tsukimi-つきみぐー',
1105 def _real_extract(self
, url
):
1106 au_id
= self
._match
_id
(url
)
1108 play_data
= self
._call
_api
('url', au_id
)
1110 'url': play_data
['cdns'][0],
1111 'filesize': int_or_none(play_data
.get('size')),
1115 for a_format
in formats
:
1116 a_format
.setdefault('http_headers', {}).update({
1120 song
= self
._call
_api
('song/info', au_id
)
1121 title
= song
['title']
1122 statistic
= song
.get('statistic') or {}
1125 lyric
= song
.get('lyric')
1137 'artist': song
.get('author'),
1138 'comment_count': int_or_none(statistic
.get('comment')),
1139 'description': song
.get('intro'),
1140 'duration': int_or_none(song
.get('duration')),
1141 'subtitles': subtitles
,
1142 'thumbnail': song
.get('cover'),
1143 'timestamp': int_or_none(song
.get('passtime')),
1144 'uploader': song
.get('uname'),
1145 'view_count': int_or_none(statistic
.get('play')),
1149 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1150 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1152 'url': 'https://www.bilibili.com/audio/am10624',
1155 'title': '每日新曲推荐(每日11:00更新)',
1156 'description': '每天11:00更新,为你推送最新音乐',
1158 'playlist_count': 19,
1161 def _real_extract(self
, url
):
1162 am_id
= self
._match
_id
(url
)
1164 songs
= self
._call
_api
(
1165 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1169 sid
= str_or_none(song
.get('id'))
1172 entries
.append(self
.url_result(
1173 'https://www.bilibili.com/audio/au' + sid
,
1174 BilibiliAudioIE
.ie_key(), sid
))
1177 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1178 album_title
= album_data
.get('title')
1180 for entry
in entries
:
1181 entry
['album'] = album_title
1182 return self
.playlist_result(
1183 entries
, am_id
, album_title
, album_data
.get('intro'))
1185 return self
.playlist_result(entries
, am_id
)
1188 class BiliBiliPlayerIE(InfoExtractor
):
1189 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1191 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1192 'only_matching': True,
1195 def _real_extract(self
, url
):
1196 video_id
= self
._match
_id
(url
)
1197 return self
.url_result(
1198 'http://www.bilibili.tv/video/av%s/' % video_id
,
1199 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1202 class BiliIntlBaseIE(InfoExtractor
):
1203 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1204 _NETRC_MACHINE
= 'biliintl'
1206 def _call_api(self
, endpoint
, *args
, **kwargs
):
1207 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1208 if json
.get('code'):
1209 if json
['code'] in (10004004, 10004005, 10023006):
1210 self
.raise_login_required()
1211 elif json
['code'] == 10004001:
1212 self
.raise_geo_restricted()
1214 if json
.get('message') and str(json
['code']) != json
['message']:
1215 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1217 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1218 if kwargs
.get('fatal'):
1219 raise ExtractorError(errmsg
)
1221 self
.report_warning(errmsg
)
1222 return json
.get('data')
1224 def json2srt(self
, json
):
1226 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1227 for i
, line
in enumerate(traverse_obj(json
, (
1228 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1231 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1232 sub_json
= self
._call
_api
(
1233 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1234 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1237 's_locale': 'en_US',
1238 'episode_id': ep_id
,
1242 for sub
in sub_json
.get('subtitles') or []:
1243 sub_url
= sub
.get('url')
1246 sub_data
= self
._download
_json
(
1247 sub_url
, ep_id
or aid
, errnote
='Unable to download subtitles', fatal
=False,
1248 note
='Downloading subtitles%s' % f
' for {sub["lang"]}' if sub
.get('lang') else '')
1251 subtitles
.setdefault(sub
.get('lang_key', 'en'), []).append({
1253 'data': self
.json2srt(sub_data
)
1257 def _get_formats(self
, *, ep_id
=None, aid
=None):
1258 video_json
= self
._call
_api
(
1259 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1260 errnote
='Unable to download video formats', query
=filter_dict({
1265 video_json
= video_json
['playurl']
1267 for vid
in video_json
.get('video') or []:
1268 video_res
= vid
.get('video_resource') or {}
1269 video_info
= vid
.get('stream_info') or {}
1270 if not video_res
.get('url'):
1273 'url': video_res
['url'],
1275 'format_note': video_info
.get('desc_words'),
1276 'width': video_res
.get('width'),
1277 'height': video_res
.get('height'),
1278 'vbr': video_res
.get('bandwidth'),
1280 'vcodec': video_res
.get('codecs'),
1281 'filesize': video_res
.get('size'),
1283 for aud
in video_json
.get('audio_resource') or []:
1284 if not aud
.get('url'):
1289 'abr': aud
.get('bandwidth'),
1290 'acodec': aud
.get('codecs'),
1292 'filesize': aud
.get('size'),
1297 def _parse_video_metadata(self
, video_data
):
1299 'title': video_data
.get('title_display') or video_data
.get('title'),
1300 'thumbnail': video_data
.get('cover'),
1301 'episode_number': int_or_none(self
._search
_regex
(
1302 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1305 def _perform_login(self
, username
, password
):
1306 if not Cryptodome
.RSA
:
1307 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1309 key_data
= self
._download
_json
(
1310 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1311 note
='Downloading login key', errnote
='Unable to download login key')['data']
1313 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1314 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode('utf-8'))
1315 login_post
= self
._download
_json
(
1316 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1317 'username': username
,
1318 'password': base64
.b64encode(password_hash
).decode('ascii'),
1320 's_locale': 'en_US',
1322 }), note
='Logging in', errnote
='Unable to log in')
1323 if login_post
.get('code'):
1324 if login_post
.get('message'):
1325 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1327 raise ExtractorError('Unable to log in')
1330 class BiliIntlIE(BiliIntlBaseIE
):
1331 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1334 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1338 'title': 'E2 - The First Night',
1339 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1340 'episode_number': 2,
1341 'upload_date': '20201009',
1342 'episode': 'Episode 2',
1343 'timestamp': 1602259500,
1344 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1348 'title': '<Untitled Chapter 1>'
1350 'start_time': 76.242,
1351 'end_time': 161.161,
1354 'start_time': 1325.742,
1355 'end_time': 1403.903,
1361 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1365 'title': 'E3 - Who?',
1366 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1367 'episode_number': 3,
1368 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1369 'episode': 'Episode 3',
1370 'upload_date': '20211219',
1371 'timestamp': 1639928700,
1375 'title': '<Untitled Chapter 1>'
1381 'start_time': 1173.0,
1382 'end_time': 1259.535,
1387 # Subtitle with empty content
1388 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1392 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1393 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1394 'episode_number': 140,
1396 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1398 'url': 'https://www.bilibili.tv/en/video/2041863208',
1402 'timestamp': 1670874843,
1403 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1404 'thumbnail': r
're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1405 'upload_date': '20221212',
1406 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1409 # episode comment extraction
1410 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1414 'timestamp': 1604057820,
1415 'upload_date': '20201030',
1416 'episode_number': 5,
1417 'title': 'E5 - My Own Steel',
1418 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1419 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1420 'episode': 'Episode 5',
1421 'comment_count': int,
1425 'title': '<Untitled Chapter 1>'
1431 'start_time': 1290.0,
1440 # user generated content comment extraction
1441 'url': 'https://www.bilibili.tv/en/video/2045730385',
1445 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1446 'timestamp': 1667891924,
1447 'upload_date': '20221108',
1448 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1449 'comment_count': int,
1450 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1456 # episode id without intro and outro
1457 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1461 'title': 'E1 - Operation \'Strix\' <Owl>',
1462 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1463 'timestamp': 1649516400,
1464 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1465 'episode': 'Episode 1',
1466 'episode_number': 1,
1467 'upload_date': '20220409',
1470 'url': 'https://www.biliintl.com/en/play/34613/341736',
1471 'only_matching': True,
1473 # User-generated content (as opposed to a series licensed from a studio)
1474 'url': 'https://bilibili.tv/en/video/2019955076',
1475 'only_matching': True,
1477 # No language in URL
1478 'url': 'https://www.bilibili.tv/video/2019955076',
1479 'only_matching': True,
1481 # Uppercase language in URL
1482 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1483 'only_matching': True,
1486 def _make_url(video_id
, series_id
=None):
1488 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1489 return f
'https://www.bilibili.tv/en/video/{video_id}'
1491 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1492 url
, smuggled_data
= unsmuggle_url(url
, {})
1493 if smuggled_data
.get('title'):
1494 return smuggled_data
1496 webpage
= self
._download
_webpage
(url
, video_id
)
1499 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1500 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1501 video_data
= traverse_obj(
1502 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1504 if season_id
and not video_data
:
1505 # Non-Bstation layout, read through episode list
1506 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1507 video_data
= traverse_obj(season_json
, (
1508 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
1509 ), expected_type
=dict, get_all
=False)
1511 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1513 self
._parse
_video
_metadata
(video_data
), self
._search
_json
_ld
(webpage
, video_id
, fatal
=False), {
1514 'title': self
._html
_search
_meta
('og:title', webpage
),
1515 'description': self
._html
_search
_meta
('og:description', webpage
)
1518 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
1519 comment_api_raw_data
= self
._download
_json
(
1520 'https://api.bilibili.tv/reply/web/detail', display_id
,
1521 note
=f
'Downloading reply comment of {root_id} - {next_id}',
1524 'ps': 20, # comment's reply per page (default: 3)
1529 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1531 'author': traverse_obj(replies
, ('member', 'name')),
1532 'author_id': traverse_obj(replies
, ('member', 'mid')),
1533 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1534 'text': traverse_obj(replies
, ('content', 'message')),
1535 'id': replies
.get('rpid'),
1536 'like_count': int_or_none(replies
.get('like_count')),
1537 'parent': replies
.get('parent'),
1538 'timestamp': unified_timestamp(replies
.get('ctime_text'))
1541 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1542 yield from self
._get
_comments
_reply
(
1543 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
1545 def _get_comments(self
, video_id
, ep_id
):
1546 for i
in itertools
.count(0):
1547 comment_api_raw_data
= self
._download
_json
(
1548 'https://api.bilibili.tv/reply/web/root', video_id
,
1549 note
=f
'Downloading comment page {i + 1}',
1552 'pn': i
, # page number
1553 'ps': 20, # comment per page (default: 20)
1555 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
1556 'sort_type': 1, # 1: best, 2: recent
1559 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1561 'author': traverse_obj(replies
, ('member', 'name')),
1562 'author_id': traverse_obj(replies
, ('member', 'mid')),
1563 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1564 'text': traverse_obj(replies
, ('content', 'message')),
1565 'id': replies
.get('rpid'),
1566 'like_count': int_or_none(replies
.get('like_count')),
1567 'timestamp': unified_timestamp(replies
.get('ctime_text')),
1568 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
1570 if replies
.get('count'):
1571 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
1573 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1576 def _real_extract(self
, url
):
1577 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
1578 video_id
= ep_id
or aid
1582 intro_ending_json
= self
._call
_api
(
1583 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1584 video_id
, fatal
=False) or {}
1585 if intro_ending_json
.get('skip'):
1586 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1587 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1589 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
1590 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
1593 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
1594 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
1600 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
1601 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
1602 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
1603 'chapters': chapters
,
1604 '__post_extractor': self
.extract_comments(video_id
, ep_id
)
1608 class BiliIntlSeriesIE(BiliIntlBaseIE
):
1609 IE_NAME
= 'biliIntl:series'
1610 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
1612 'url': 'https://www.bilibili.tv/en/play/34613',
1613 'playlist_mincount': 15,
1616 'title': 'TONIKAWA: Over the Moon For You',
1617 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1618 'categories': ['Slice of life', 'Comedy', 'Romance'],
1619 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1623 'skip_download': True,
1626 'url': 'https://www.bilibili.tv/en/media/1048837',
1629 'title': 'SPY×FAMILY',
1630 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1631 'categories': ['Adventure', 'Action', 'Comedy'],
1632 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1635 'playlist_mincount': 25,
1637 'url': 'https://www.biliintl.com/en/play/34613',
1638 'only_matching': True,
1640 'url': 'https://www.biliintl.com/EN/play/34613',
1641 'only_matching': True,
1644 def _entries(self
, series_id
):
1645 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
1646 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
1647 episode_id
= str(episode
['episode_id'])
1648 yield self
.url_result(smuggle_url(
1649 BiliIntlIE
._make
_url
(episode_id
, series_id
),
1650 self
._parse
_video
_metadata
(episode
)
1651 ), BiliIntlIE
, episode_id
)
1653 def _real_extract(self
, url
):
1654 series_id
= self
._match
_id
(url
)
1655 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
1656 return self
.playlist_result(
1657 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
1658 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
1659 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
1662 class BiliLiveIE(InfoExtractor
):
1663 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
1666 'url': 'https://live.bilibili.com/196',
1669 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1671 'title': "太空狼人杀联动,不被爆杀就算赢",
1672 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1673 'timestamp': 1650802769,
1677 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1678 'only_matching': True
1680 'url': 'https://live.bilibili.com/blanc/196',
1681 'only_matching': True
1685 80: {'format_id': 'low', 'format_note': '流畅'}
,
1686 150: {'format_id': 'high_res', 'format_note': '高清'}
,
1687 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
1688 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
1689 10000: {'format_id': 'source', 'format_note': '原画'}
,
1690 20000: {'format_id': '4K', 'format_note': '4K'}
,
1691 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
1694 _quality
= staticmethod(qualities(list(_FORMATS
)))
1696 def _call_api(self
, path
, room_id
, query
):
1697 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
1698 if api_result
.get('code') != 0:
1699 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
1700 return api_result
.get('data') or {}
1702 def _parse_formats(self
, qn
, fmt
):
1703 for codec
in fmt
.get('codec') or []:
1704 if codec
.get('current_qn') != qn
:
1706 for url_info
in codec
['url_info']:
1708 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1709 'ext': fmt
.get('format_name'),
1710 'vcodec': codec
.get('codec_name'),
1711 'quality': self
._quality
(qn
),
1712 **self
._FORMATS
[qn
],
1715 def _real_extract(self
, url
):
1716 room_id
= self
._match
_id
(url
)
1717 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
1718 if room_data
.get('live_status') == 0:
1719 raise ExtractorError('Streamer is not live', expected
=True)
1722 for qn
in self
._FORMATS
.keys():
1723 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
1733 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1734 formats
.extend(self
._parse
_formats
(qn
, fmt
))
1738 'title': room_data
.get('title'),
1739 'description': room_data
.get('description'),
1740 'thumbnail': room_data
.get('user_cover'),
1741 'timestamp': stream_data
.get('live_time'),