srt_subtitles_timecode,
str_or_none,
traverse_obj,
+ unified_timestamp,
unsmuggle_url,
url_or_none,
urlencode_postdata,
'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
'upload_date': '20221212',
'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
+ },
+ }, {
+ # episode comment extraction
+ 'url': 'https://www.bilibili.tv/en/play/34580/340317',
+ 'info_dict': {
+ 'id': '340317',
+ 'ext': 'mp4',
+ 'timestamp': 1604057820,
+ 'upload_date': '20201030',
+ 'episode_number': 5,
+ 'title': 'E5 - My Own Steel',
+ 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
+ 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
+ 'episode': 'Episode 5',
+ 'comment_count': int,
+ 'chapters': [{
+ 'start_time': 0,
+ 'end_time': 61.0,
+ 'title': '<Untitled Chapter 1>'
+ }, {
+ 'start_time': 61.0,
+ 'end_time': 134.0,
+ 'title': 'Intro'
+ }, {
+ 'start_time': 1290.0,
+ 'end_time': 1379.0,
+ 'title': 'Outro'
+ }],
+ },
+ 'params': {
+ 'getcomments': True
+ }
+ }, {
+ # user generated content comment extraction
+ 'url': 'https://www.bilibili.tv/en/video/2045730385',
+ 'info_dict': {
+ 'id': '2045730385',
+ 'ext': 'mp4',
+ 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
+ 'timestamp': 1667891924,
+ 'upload_date': '20221108',
+ 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
+ 'comment_count': int,
+ 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
+ },
+ 'params': {
+ 'getcomments': True
}
}, {
# episode id without intro and outro
# XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
return merge_dicts(
- self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
+ self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
'title': self._html_search_meta('og:title', webpage),
'description': self._html_search_meta('og:description', webpage)
})
+ def _get_comments_reply(self, root_id, next_id=0, display_id=None):
+ comment_api_raw_data = self._download_json(
+ 'https://api.bilibili.tv/reply/web/detail', display_id,
+ note=f'Downloading reply comment of {root_id} - {next_id}',
+ query={
+ 'platform': 'web',
+ 'ps': 20, # comment's reply per page (default: 3)
+ 'root': root_id,
+ 'next': next_id,
+ })
+
+ for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+ yield {
+ 'author': traverse_obj(replies, ('member', 'name')),
+ 'author_id': traverse_obj(replies, ('member', 'mid')),
+ 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+ 'text': traverse_obj(replies, ('content', 'message')),
+ 'id': replies.get('rpid'),
+ 'like_count': int_or_none(replies.get('like_count')),
+ 'parent': replies.get('parent'),
+ 'timestamp': unified_timestamp(replies.get('ctime_text'))
+ }
+
+ if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+ yield from self._get_comments_reply(
+ root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
+
+ def _get_comments(self, video_id, ep_id):
+ for i in itertools.count(0):
+ comment_api_raw_data = self._download_json(
+ 'https://api.bilibili.tv/reply/web/root', video_id,
+ note=f'Downloading comment page {i + 1}',
+ query={
+ 'platform': 'web',
+ 'pn': i, # page number
+ 'ps': 20, # comment per page (default: 20)
+ 'oid': video_id,
+ 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
+ 'sort_type': 1, # 1: best, 2: recent
+ })
+
+ for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
+ yield {
+ 'author': traverse_obj(replies, ('member', 'name')),
+ 'author_id': traverse_obj(replies, ('member', 'mid')),
+ 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
+ 'text': traverse_obj(replies, ('content', 'message')),
+ 'id': replies.get('rpid'),
+ 'like_count': int_or_none(replies.get('like_count')),
+ 'timestamp': unified_timestamp(replies.get('ctime_text')),
+ 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
+ }
+ if replies.get('count'):
+ yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
+
+ if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
+ break
+
def _real_extract(self, url):
season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
video_id = ep_id or aid
**self._extract_video_metadata(url, video_id, season_id),
'formats': self._get_formats(ep_id=ep_id, aid=aid),
'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
- 'chapters': chapters
+ 'chapters': chapters,
+ '__post_extractor': self.extract_comments(video_id, ep_id)
}