yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import math
   6 import time
   7 import urllib.parse
   8
   9 from .common import InfoExtractor, SearchInfoExtractor
  10 from ..dependencies import Cryptodome
  11 from ..networking.exceptions import HTTPError
  12 from ..utils import (
  13     ExtractorError,
  14     GeoRestrictedError,
  15     InAdvancePagedList,
  16     OnDemandPagedList,
  17     filter_dict,
  18     float_or_none,
  19     format_field,
  20     int_or_none,
  21     join_nonempty,
  22     make_archive_id,
  23     merge_dicts,
  24     mimetype2ext,
  25     parse_count,
  26     parse_qs,
  27     qualities,
  28     smuggle_url,
  29     srt_subtitles_timecode,
  30     str_or_none,
  31     traverse_obj,
  32     try_call,
  33     unified_timestamp,
  34     unsmuggle_url,
  35     url_or_none,
  36     urlencode_postdata,
  37 )
  38
  39
  40 class BilibiliBaseIE(InfoExtractor):
  41     def extract_formats(self, play_info):
  42         format_names = {
  43             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  44             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  45         }
  46
  47         audios = traverse_obj(play_info, ('dash', 'audio', ...))
  48         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  49         if flac_audio:
  50             audios.append(flac_audio)
  51         formats = [{
  52             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  53             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  54             'acodec': audio.get('codecs'),
  55             'vcodec': 'none',
  56             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  57             'filesize': int_or_none(audio.get('size'))
  58         } for audio in audios]
  59
  60         formats.extend({
  61             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  62             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  63             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  64             'width': int_or_none(video.get('width')),
  65             'height': int_or_none(video.get('height')),
  66             'vcodec': video.get('codecs'),
  67             'acodec': 'none' if audios else None,
  68             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  69             'filesize': int_or_none(video.get('size')),
  70             'quality': int_or_none(video.get('id')),
  71             'format': format_names.get(video.get('id')),
  72         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  73
  74         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  75         if missing_formats:
  76             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  77                            f'you have to login or become premium member to download them. {self._login_hint()}')
  78
  79         return formats
  80
  81     def json2srt(self, json_data):
  82         srt_data = ''
  83         for idx, line in enumerate(json_data.get('body') or []):
  84             srt_data += (f'{idx + 1}\n'
  85                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
  86                          f'{line["content"]}\n\n')
  87         return srt_data
  88
  89     def _get_subtitles(self, video_id, aid, cid):
  90         subtitles = {
  91             'danmaku': [{
  92                 'ext': 'xml',
  93                 'url': f'https://comment.bilibili.com/{cid}.xml',
  94             }]
  95         }
  96
  97         video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
  98         for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
  99             subtitles.setdefault(s['lan'], []).append({
 100                 'ext': 'srt',
 101                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 102             })
 103         return subtitles
 104
 105     def _get_chapters(self, aid, cid):
 106         chapters = aid and cid and self._download_json(
 107             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 108             note='Extracting chapters', fatal=False)
 109         return traverse_obj(chapters, ('data', 'view_points', ..., {
 110             'title': 'content',
 111             'start_time': 'from',
 112             'end_time': 'to',
 113         })) or None
 114
 115     def _get_comments(self, aid):
 116         for idx in itertools.count(1):
 117             replies = traverse_obj(
 118                 self._download_json(
 119                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 120                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 121                 ('data', 'replies'))
 122             if not replies:
 123                 return
 124             for children in map(self._get_all_children, replies):
 125                 yield from children
 126
 127     def _get_all_children(self, reply):
 128         yield {
 129             'author': traverse_obj(reply, ('member', 'uname')),
 130             'author_id': traverse_obj(reply, ('member', 'mid')),
 131             'id': reply.get('rpid'),
 132             'text': traverse_obj(reply, ('content', 'message')),
 133             'timestamp': reply.get('ctime'),
 134             'parent': reply.get('parent') or 'root',
 135         }
 136         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 137             yield from children
 138
 139     def _get_episodes_from_season(self, ss_id, url):
 140         season_info = self._download_json(
 141             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 142             note='Downloading season info', query={'season_id': ss_id},
 143             headers={'Referer': url, **self.geo_verification_headers()})
 144
 145         for entry in traverse_obj(season_info, (
 146                 'result', 'main_section', 'episodes',
 147                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 148             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
 149
 150
 151 class BiliBiliIE(BilibiliBaseIE):
 152     _VALID_URL = r'https?://www\.bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 153
 154     _TESTS = [{
 155         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 156         'info_dict': {
 157             'id': 'BV13x41117TL',
 158             'title': '阿滴英文｜英文歌分享#6 "Closer',
 159             'ext': 'mp4',
 160             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 161             'uploader_id': '65880958',
 162             'uploader': '阿滴英文',
 163             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 164             'duration': 554.117,
 165             'tags': list,
 166             'comment_count': int,
 167             'upload_date': '20170301',
 168             'timestamp': 1488353834,
 169             'like_count': int,
 170             'view_count': int,
 171         },
 172     }, {
 173         # old av URL version
 174         'url': 'http://www.bilibili.com/video/av1074402/',
 175         'info_dict': {
 176             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 177             'ext': 'mp4',
 178             'uploader': '菊子桑',
 179             'uploader_id': '156160',
 180             'id': 'BV11x411K7CN',
 181             'title': '【金坷垃】金泡沫',
 182             'duration': 308.36,
 183             'upload_date': '20140420',
 184             'timestamp': 1397983878,
 185             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 186             'like_count': int,
 187             'comment_count': int,
 188             'view_count': int,
 189             'tags': list,
 190         },
 191         'params': {'skip_download': True},
 192     }, {
 193         'note': 'Anthology',
 194         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 195         'info_dict': {
 196             'id': 'BV1bK411W797',
 197             'title': '物语中的人物是如何吐槽自己的OP的'
 198         },
 199         'playlist_count': 18,
 200         'playlist': [{
 201             'info_dict': {
 202                 'id': 'BV1bK411W797_p1',
 203                 'ext': 'mp4',
 204                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 205                 'tags': 'count:11',
 206                 'timestamp': 1589601697,
 207                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 208                 'uploader': '打牌还是打桩',
 209                 'uploader_id': '150259984',
 210                 'like_count': int,
 211                 'comment_count': int,
 212                 'upload_date': '20200516',
 213                 'view_count': int,
 214                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 215                 'duration': 90.314,
 216             }
 217         }]
 218     }, {
 219         'note': 'Specific page of Anthology',
 220         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 221         'info_dict': {
 222             'id': 'BV1bK411W797_p1',
 223             'ext': 'mp4',
 224             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 225             'tags': 'count:11',
 226             'timestamp': 1589601697,
 227             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 228             'uploader': '打牌还是打桩',
 229             'uploader_id': '150259984',
 230             'like_count': int,
 231             'comment_count': int,
 232             'upload_date': '20200516',
 233             'view_count': int,
 234             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 235             'duration': 90.314,
 236         }
 237     }, {
 238         'note': 'video has subtitles',
 239         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 240         'info_dict': {
 241             'id': 'BV12N4y1M7rh',
 242             'ext': 'mp4',
 243             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 244             'tags': list,
 245             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 246             'duration': 313.557,
 247             'upload_date': '20220709',
 248             'uploader': '小夫Tech',
 249             'timestamp': 1657347907,
 250             'uploader_id': '1326814124',
 251             'comment_count': int,
 252             'view_count': int,
 253             'like_count': int,
 254             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 255             'subtitles': 'count:2'
 256         },
 257         'params': {'listsubtitles': True},
 258     }, {
 259         'url': 'https://www.bilibili.com/video/av8903802/',
 260         'info_dict': {
 261             'id': 'BV13x41117TL',
 262             'ext': 'mp4',
 263             'title': '阿滴英文｜英文歌分享#6 "Closer',
 264             'upload_date': '20170301',
 265             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 266             'timestamp': 1488353834,
 267             'uploader_id': '65880958',
 268             'uploader': '阿滴英文',
 269             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 270             'duration': 554.117,
 271             'tags': list,
 272             'comment_count': int,
 273             'view_count': int,
 274             'like_count': int,
 275         },
 276         'params': {
 277             'skip_download': True,
 278         },
 279     }, {
 280         'note': 'video has chapter',
 281         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 282         'info_dict': {
 283             'id': 'BV1vL411G7N7',
 284             'ext': 'mp4',
 285             'title': '如何为你的B站视频添加进度条分段',
 286             'timestamp': 1634554558,
 287             'upload_date': '20211018',
 288             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 289             'tags': list,
 290             'uploader': '爱喝咖啡的当麻',
 291             'duration': 669.482,
 292             'uploader_id': '1680903',
 293             'chapters': 'count:6',
 294             'comment_count': int,
 295             'view_count': int,
 296             'like_count': int,
 297             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 298         },
 299         'params': {'skip_download': True},
 300     }, {
 301         'note': 'video redirects to festival page',
 302         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 303         'info_dict': {
 304             'id': 'BV1wP4y1P72h',
 305             'ext': 'mp4',
 306             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 307             'timestamp': 1643947497,
 308             'upload_date': '20220204',
 309             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 310             'uploader': '叨叨冯聊音乐',
 311             'duration': 246.719,
 312             'uploader_id': '528182630',
 313             'view_count': int,
 314             'like_count': int,
 315             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 316         },
 317         'params': {'skip_download': True},
 318     }, {
 319         'note': 'newer festival video',
 320         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 321         'info_dict': {
 322             'id': 'BV1ay4y1d77f',
 323             'ext': 'mp4',
 324             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 325             'timestamp': 1674273600,
 326             'upload_date': '20230121',
 327             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 328             'uploader': '果蝇轰',
 329             'duration': 1111.722,
 330             'uploader_id': '8469526',
 331             'view_count': int,
 332             'like_count': int,
 333             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 334         },
 335         'params': {'skip_download': True},
 336     }]
 337
 338     def _real_extract(self, url):
 339         video_id = self._match_id(url)
 340         webpage = self._download_webpage(url, video_id)
 341         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 342
 343         is_festival = 'videoData' not in initial_state
 344         if is_festival:
 345             video_data = initial_state['videoInfo']
 346         else:
 347             play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
 348             video_data = initial_state['videoData']
 349
 350         video_id, title = video_data['bvid'], video_data.get('title')
 351
 352         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 353         page_list_json = not is_festival and traverse_obj(
 354             self._download_json(
 355                 'https://api.bilibili.com/x/player/pagelist', video_id,
 356                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 357                 note='Extracting videos in anthology'),
 358             'data', expected_type=list) or []
 359         is_anthology = len(page_list_json) > 1
 360
 361         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 362         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 363             return self.playlist_from_matches(
 364                 page_list_json, video_id, title, ie=BiliBiliIE,
 365                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 366
 367         if is_anthology:
 368             part_id = part_id or 1
 369             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 370
 371         aid = video_data.get('aid')
 372         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 373
 374         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 375
 376         festival_info = {}
 377         if is_festival:
 378             play_info = self._download_json(
 379                 'https://api.bilibili.com/x/player/playurl', video_id,
 380                 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 381                 note='Extracting festival video formats')['data']
 382
 383             festival_info = traverse_obj(initial_state, {
 384                 'uploader': ('videoInfo', 'upName'),
 385                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 386                 'like_count': ('videoStatus', 'like', {int_or_none}),
 387                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 388             }, get_all=False)
 389
 390         return {
 391             **traverse_obj(initial_state, {
 392                 'uploader': ('upData', 'name'),
 393                 'uploader_id': ('upData', 'mid', {str_or_none}),
 394                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 395                 'tags': ('tags', ..., 'tag_name'),
 396                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 397             }),
 398             **festival_info,
 399             **traverse_obj(video_data, {
 400                 'description': 'desc',
 401                 'timestamp': ('pubdate', {int_or_none}),
 402                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 403                 'comment_count': ('stat', 'reply', {int_or_none}),
 404             }, get_all=False),
 405             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 406             'formats': self.extract_formats(play_info),
 407             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 408             'title': title,
 409             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 410             'chapters': self._get_chapters(aid, cid),
 411             'subtitles': self.extract_subtitles(video_id, aid, cid),
 412             '__post_extractor': self.extract_comments(aid),
 413             'http_headers': {'Referer': url},
 414         }
 415
 416
 417 class BiliBiliBangumiIE(BilibiliBaseIE):
 418     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
 419
 420     _TESTS = [{
 421         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 422         'info_dict': {
 423             'id': '267851',
 424             'ext': 'mp4',
 425             'series': '鬼灭之刃',
 426             'series_id': '4358',
 427             'season': '鬼灭之刃',
 428             'season_id': '26801',
 429             'season_number': 1,
 430             'episode': '残酷',
 431             'episode_id': '267851',
 432             'episode_number': 1,
 433             'title': '1 残酷',
 434             'duration': 1425.256,
 435             'timestamp': 1554566400,
 436             'upload_date': '20190406',
 437             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 438         },
 439         'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
 440     }]
 441
 442     def _real_extract(self, url):
 443         video_id = self._match_id(url)
 444         episode_id = video_id[2:]
 445         webpage = self._download_webpage(url, video_id)
 446
 447         if '您所在的地区无法观看本片' in webpage:
 448             raise GeoRestrictedError('This video is restricted')
 449         elif '正在观看预览，大会员免费看全片' in webpage:
 450             self.raise_login_required('This video is for premium members only')
 451
 452         headers = {'Referer': url, **self.geo_verification_headers()}
 453         play_info = self._download_json(
 454             'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
 455             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 456             headers=headers)
 457         premium_only = play_info.get('code') == -10403
 458         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 459
 460         formats = self.extract_formats(play_info)
 461         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 462             self.raise_login_required('This video is for premium members only')
 463
 464         bangumi_info = self._download_json(
 465             'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
 466             query={'ep_id': episode_id}, headers=headers)['result']
 467
 468         episode_number, episode_info = next((
 469             (idx, ep) for idx, ep in enumerate(traverse_obj(
 470                 bangumi_info, ('episodes', ..., {dict})), 1)
 471             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 472
 473         season_id = bangumi_info.get('season_id')
 474         season_number = season_id and next((
 475             idx + 1 for idx, e in enumerate(
 476                 traverse_obj(bangumi_info, ('seasons', ...)))
 477             if e.get('season_id') == season_id
 478         ), None)
 479
 480         aid = episode_info.get('aid')
 481
 482         return {
 483             'id': video_id,
 484             'formats': formats,
 485             **traverse_obj(bangumi_info, {
 486                 'series': ('series', 'series_title', {str}),
 487                 'series_id': ('series', 'series_id', {str_or_none}),
 488                 'thumbnail': ('square_cover', {url_or_none}),
 489             }),
 490             'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
 491             'episode': episode_info.get('long_title'),
 492             'episode_id': episode_id,
 493             'episode_number': int_or_none(episode_info.get('title')) or episode_number,
 494             'season_id': str_or_none(season_id),
 495             'season_number': season_number,
 496             'timestamp': int_or_none(episode_info.get('pub_time')),
 497             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 498             'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
 499             '__post_extractor': self.extract_comments(aid),
 500             'http_headers': headers,
 501         }
 502
 503
 504 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 505     _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
 506     _TESTS = [{
 507         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 508         'info_dict': {
 509             'id': '24097891',
 510         },
 511         'playlist_mincount': 25,
 512     }]
 513
 514     def _real_extract(self, url):
 515         media_id = self._match_id(url)
 516         webpage = self._download_webpage(url, media_id)
 517         ss_id = self._search_json(
 518             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
 519
 520         return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
 521
 522
 523 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 524     _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 525     _TESTS = [{
 526         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 527         'info_dict': {
 528             'id': '26801'
 529         },
 530         'playlist_mincount': 26
 531     }]
 532
 533     def _real_extract(self, url):
 534         ss_id = self._match_id(url)
 535
 536         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
 537
 538
 539 class BilibiliSpaceBaseIE(InfoExtractor):
 540     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 541         first_page = fetch_page(0)
 542         metadata = get_metadata(first_page)
 543
 544         paged_list = InAdvancePagedList(
 545             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 546             metadata['page_count'], metadata['page_size'])
 547
 548         return metadata, paged_list
 549
 550
 551 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 552     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 553     _TESTS = [{
 554         'url': 'https://space.bilibili.com/3985676/video',
 555         'info_dict': {
 556             'id': '3985676',
 557         },
 558         'playlist_mincount': 178,
 559     }, {
 560         'url': 'https://space.bilibili.com/313580179/video',
 561         'info_dict': {
 562             'id': '313580179',
 563         },
 564         'playlist_mincount': 92,
 565     }]
 566
 567     def _extract_signature(self, playlist_id):
 568         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
 569
 570         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
 571         img_key = traverse_obj(
 572             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
 573         sub_key = traverse_obj(
 574             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
 575
 576         session_key = img_key + sub_key
 577
 578         signature_values = []
 579         for position in (
 580             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
 581             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
 582             57, 62, 11, 36, 20, 34, 44, 52
 583         ):
 584             char_at_position = try_call(lambda: session_key[position])
 585             if char_at_position:
 586                 signature_values.append(char_at_position)
 587
 588         return ''.join(signature_values)[:32]
 589
 590     def _real_extract(self, url):
 591         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
 592         if not is_video_url:
 593             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
 594                            'To download audios, add a "/audio" to the URL')
 595
 596         signature = self._extract_signature(playlist_id)
 597
 598         def fetch_page(page_idx):
 599             query = {
 600                 'keyword': '',
 601                 'mid': playlist_id,
 602                 'order': 'pubdate',
 603                 'order_avoided': 'true',
 604                 'platform': 'web',
 605                 'pn': page_idx + 1,
 606                 'ps': 30,
 607                 'tid': 0,
 608                 'web_location': 1550101,
 609                 'wts': int(time.time()),
 610             }
 611             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
 612
 613             try:
 614                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
 615                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
 616             except ExtractorError as e:
 617                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
 618                     raise ExtractorError(
 619                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
 620                 raise
 621             if response['code'] == -401:
 622                 raise ExtractorError(
 623                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
 624             return response['data']
 625
 626         def get_metadata(page_data):
 627             page_size = page_data['page']['ps']
 628             entry_count = page_data['page']['count']
 629             return {
 630                 'page_count': math.ceil(entry_count / page_size),
 631                 'page_size': page_size,
 632             }
 633
 634         def get_entries(page_data):
 635             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
 636                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
 637
 638         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 639         return self.playlist_result(paged_list, playlist_id)
 640
 641
 642 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
 643     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
 644     _TESTS = [{
 645         'url': 'https://space.bilibili.com/313580179/audio',
 646         'info_dict': {
 647             'id': '313580179',
 648         },
 649         'playlist_mincount': 1,
 650     }]
 651
 652     def _real_extract(self, url):
 653         playlist_id = self._match_id(url)
 654
 655         def fetch_page(page_idx):
 656             return self._download_json(
 657                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
 658                 note=f'Downloading page {page_idx}',
 659                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
 660
 661         def get_metadata(page_data):
 662             return {
 663                 'page_count': page_data['pageCount'],
 664                 'page_size': page_data['pageSize'],
 665             }
 666
 667         def get_entries(page_data):
 668             for entry in page_data.get('data', []):
 669                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
 670
 671         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 672         return self.playlist_result(paged_list, playlist_id)
 673
 674
 675 class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
 676     _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
 677     _TESTS = [{
 678         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
 679         'info_dict': {
 680             'id': '2142762_57445',
 681             'title': '《底特律 变人》'
 682         },
 683         'playlist_mincount': 31,
 684     }]
 685
 686     def _real_extract(self, url):
 687         mid, sid = self._match_valid_url(url).group('mid', 'sid')
 688         playlist_id = f'{mid}_{sid}'
 689
 690         def fetch_page(page_idx):
 691             return self._download_json(
 692                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
 693                 playlist_id, note=f'Downloading page {page_idx}',
 694                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
 695
 696         def get_metadata(page_data):
 697             page_size = page_data['page']['page_size']
 698             entry_count = page_data['page']['total']
 699             return {
 700                 'page_count': math.ceil(entry_count / page_size),
 701                 'page_size': page_size,
 702                 'title': traverse_obj(page_data, ('meta', 'name'))
 703             }
 704
 705         def get_entries(page_data):
 706             for entry in page_data.get('archives', []):
 707                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
 708                                       BiliBiliIE, entry['bvid'])
 709
 710         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 711         return self.playlist_result(paged_list, playlist_id, metadata['title'])
 712
 713
 714 class BilibiliCategoryIE(InfoExtractor):
 715     IE_NAME = 'Bilibili category extractor'
 716     _MAX_RESULTS = 1000000
 717     _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
 718     _TESTS = [{
 719         'url': 'https://www.bilibili.com/v/kichiku/mad',
 720         'info_dict': {
 721             'id': 'kichiku: mad',
 722             'title': 'kichiku: mad'
 723         },
 724         'playlist_mincount': 45,
 725         'params': {
 726             'playlistend': 45
 727         }
 728     }]
 729
 730     def _fetch_page(self, api_url, num_pages, query, page_num):
 731         parsed_json = self._download_json(
 732             api_url, query, query={'Search_key': query, 'pn': page_num},
 733             note='Extracting results from page %s of %s' % (page_num, num_pages))
 734
 735         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
 736         if not video_list:
 737             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
 738
 739         for video in video_list:
 740             yield self.url_result(
 741                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
 742
 743     def _entries(self, category, subcategory, query):
 744         # map of categories : subcategories : RIDs
 745         rid_map = {
 746             'kichiku': {
 747                 'mad': 26,
 748                 'manual_vocaloid': 126,
 749                 'guide': 22,
 750                 'theatre': 216,
 751                 'course': 127
 752             },
 753         }
 754
 755         if category not in rid_map:
 756             raise ExtractorError(
 757                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
 758         if subcategory not in rid_map[category]:
 759             raise ExtractorError(
 760                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
 761         rid_value = rid_map[category][subcategory]
 762
 763         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
 764         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
 765         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
 766         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
 767         if count is None or not size:
 768             raise ExtractorError('Failed to calculate either page count or size')
 769
 770         num_pages = math.ceil(count / size)
 771
 772         return OnDemandPagedList(functools.partial(
 773             self._fetch_page, api_url, num_pages, query), size)
 774
 775     def _real_extract(self, url):
 776         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
 777         query = '%s: %s' % (category, subcategory)
 778
 779         return self.playlist_result(self._entries(category, subcategory, query), query, query)
 780
 781
 782 class BiliBiliSearchIE(SearchInfoExtractor):
 783     IE_DESC = 'Bilibili video search'
 784     _MAX_RESULTS = 100000
 785     _SEARCH_KEY = 'bilisearch'
 786
 787     def _search_results(self, query):
 788         for page_num in itertools.count(1):
 789             videos = self._download_json(
 790                 'https://api.bilibili.com/x/web-interface/search/type', query,
 791                 note=f'Extracting results from page {page_num}', query={
 792                     'Search_key': query,
 793                     'keyword': query,
 794                     'page': page_num,
 795                     'context': '',
 796                     'duration': 0,
 797                     'tids_2': '',
 798                     '__refresh__': 'true',
 799                     'search_type': 'video',
 800                     'tids': 0,
 801                     'highlight': 1,
 802                 })['data'].get('result')
 803             if not videos:
 804                 break
 805             for video in videos:
 806                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
 807
 808
 809 class BilibiliAudioBaseIE(InfoExtractor):
 810     def _call_api(self, path, sid, query=None):
 811         if not query:
 812             query = {'sid': sid}
 813         return self._download_json(
 814             'https://www.bilibili.com/audio/music-service-c/web/' + path,
 815             sid, query=query)['data']
 816
 817
 818 class BilibiliAudioIE(BilibiliAudioBaseIE):
 819     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
 820     _TEST = {
 821         'url': 'https://www.bilibili.com/audio/au1003142',
 822         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
 823         'info_dict': {
 824             'id': '1003142',
 825             'ext': 'm4a',
 826             'title': '【tsukimi】YELLOW / 神山羊',
 827             'artist': 'tsukimi',
 828             'comment_count': int,
 829             'description': 'YELLOW的mp3版！',
 830             'duration': 183,
 831             'subtitles': {
 832                 'origin': [{
 833                     'ext': 'lrc',
 834                 }],
 835             },
 836             'thumbnail': r're:^https?://.+\.jpg',
 837             'timestamp': 1564836614,
 838             'upload_date': '20190803',
 839             'uploader': 'tsukimi-つきみぐー',
 840             'view_count': int,
 841         },
 842     }
 843
 844     def _real_extract(self, url):
 845         au_id = self._match_id(url)
 846
 847         play_data = self._call_api('url', au_id)
 848         formats = [{
 849             'url': play_data['cdns'][0],
 850             'filesize': int_or_none(play_data.get('size')),
 851             'vcodec': 'none'
 852         }]
 853
 854         for a_format in formats:
 855             a_format.setdefault('http_headers', {}).update({
 856                 'Referer': url,
 857             })
 858
 859         song = self._call_api('song/info', au_id)
 860         title = song['title']
 861         statistic = song.get('statistic') or {}
 862
 863         subtitles = None
 864         lyric = song.get('lyric')
 865         if lyric:
 866             subtitles = {
 867                 'origin': [{
 868                     'url': lyric,
 869                 }]
 870             }
 871
 872         return {
 873             'id': au_id,
 874             'title': title,
 875             'formats': formats,
 876             'artist': song.get('author'),
 877             'comment_count': int_or_none(statistic.get('comment')),
 878             'description': song.get('intro'),
 879             'duration': int_or_none(song.get('duration')),
 880             'subtitles': subtitles,
 881             'thumbnail': song.get('cover'),
 882             'timestamp': int_or_none(song.get('passtime')),
 883             'uploader': song.get('uname'),
 884             'view_count': int_or_none(statistic.get('play')),
 885         }
 886
 887
 888 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
 889     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
 890     _TEST = {
 891         'url': 'https://www.bilibili.com/audio/am10624',
 892         'info_dict': {
 893             'id': '10624',
 894             'title': '每日新曲推荐（每日11:00更新）',
 895             'description': '每天11:00更新，为你推送最新音乐',
 896         },
 897         'playlist_count': 19,
 898     }
 899
 900     def _real_extract(self, url):
 901         am_id = self._match_id(url)
 902
 903         songs = self._call_api(
 904             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
 905
 906         entries = []
 907         for song in songs:
 908             sid = str_or_none(song.get('id'))
 909             if not sid:
 910                 continue
 911             entries.append(self.url_result(
 912                 'https://www.bilibili.com/audio/au' + sid,
 913                 BilibiliAudioIE.ie_key(), sid))
 914
 915         if entries:
 916             album_data = self._call_api('menu/info', am_id) or {}
 917             album_title = album_data.get('title')
 918             if album_title:
 919                 for entry in entries:
 920                     entry['album'] = album_title
 921                 return self.playlist_result(
 922                     entries, am_id, album_title, album_data.get('intro'))
 923
 924         return self.playlist_result(entries, am_id)
 925
 926
 927 class BiliBiliPlayerIE(InfoExtractor):
 928     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
 929     _TEST = {
 930         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
 931         'only_matching': True,
 932     }
 933
 934     def _real_extract(self, url):
 935         video_id = self._match_id(url)
 936         return self.url_result(
 937             'http://www.bilibili.tv/video/av%s/' % video_id,
 938             ie=BiliBiliIE.ie_key(), video_id=video_id)
 939
 940
 941 class BiliIntlBaseIE(InfoExtractor):
 942     _API_URL = 'https://api.bilibili.tv/intl/gateway'
 943     _NETRC_MACHINE = 'biliintl'
 944
 945     def _call_api(self, endpoint, *args, **kwargs):
 946         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
 947         if json.get('code'):
 948             if json['code'] in (10004004, 10004005, 10023006):
 949                 self.raise_login_required()
 950             elif json['code'] == 10004001:
 951                 self.raise_geo_restricted()
 952             else:
 953                 if json.get('message') and str(json['code']) != json['message']:
 954                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
 955                 else:
 956                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
 957                 if kwargs.get('fatal'):
 958                     raise ExtractorError(errmsg)
 959                 else:
 960                     self.report_warning(errmsg)
 961         return json.get('data')
 962
 963     def json2srt(self, json):
 964         data = '\n\n'.join(
 965             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
 966             for i, line in enumerate(traverse_obj(json, (
 967                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
 968         return data
 969
 970     def _get_subtitles(self, *, ep_id=None, aid=None):
 971         sub_json = self._call_api(
 972             '/web/v2/subtitle', ep_id or aid, fatal=False,
 973             note='Downloading subtitles list', errnote='Unable to download subtitles list',
 974             query=filter_dict({
 975                 'platform': 'web',
 976                 's_locale': 'en_US',
 977                 'episode_id': ep_id,
 978                 'aid': aid,
 979             })) or {}
 980         subtitles = {}
 981         for sub in sub_json.get('subtitles') or []:
 982             sub_url = sub.get('url')
 983             if not sub_url:
 984                 continue
 985             sub_data = self._download_json(
 986                 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
 987                 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
 988             if not sub_data:
 989                 continue
 990             subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
 991                 'ext': 'srt',
 992                 'data': self.json2srt(sub_data)
 993             })
 994         return subtitles
 995
 996     def _get_formats(self, *, ep_id=None, aid=None):
 997         video_json = self._call_api(
 998             '/web/playurl', ep_id or aid, note='Downloading video formats',
 999             errnote='Unable to download video formats', query=filter_dict({
1000                 'platform': 'web',
1001                 'ep_id': ep_id,
1002                 'aid': aid,
1003             }))
1004         video_json = video_json['playurl']
1005         formats = []
1006         for vid in video_json.get('video') or []:
1007             video_res = vid.get('video_resource') or {}
1008             video_info = vid.get('stream_info') or {}
1009             if not video_res.get('url'):
1010                 continue
1011             formats.append({
1012                 'url': video_res['url'],
1013                 'ext': 'mp4',
1014                 'format_note': video_info.get('desc_words'),
1015                 'width': video_res.get('width'),
1016                 'height': video_res.get('height'),
1017                 'vbr': video_res.get('bandwidth'),
1018                 'acodec': 'none',
1019                 'vcodec': video_res.get('codecs'),
1020                 'filesize': video_res.get('size'),
1021             })
1022         for aud in video_json.get('audio_resource') or []:
1023             if not aud.get('url'):
1024                 continue
1025             formats.append({
1026                 'url': aud['url'],
1027                 'ext': 'mp4',
1028                 'abr': aud.get('bandwidth'),
1029                 'acodec': aud.get('codecs'),
1030                 'vcodec': 'none',
1031                 'filesize': aud.get('size'),
1032             })
1033
1034         return formats
1035
1036     def _parse_video_metadata(self, video_data):
1037         return {
1038             'title': video_data.get('title_display') or video_data.get('title'),
1039             'thumbnail': video_data.get('cover'),
1040             'episode_number': int_or_none(self._search_regex(
1041                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1042         }
1043
1044     def _perform_login(self, username, password):
1045         if not Cryptodome.RSA:
1046             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1047
1048         key_data = self._download_json(
1049             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1050             note='Downloading login key', errnote='Unable to download login key')['data']
1051
1052         public_key = Cryptodome.RSA.importKey(key_data['key'])
1053         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1054         login_post = self._download_json(
1055             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1056                 'username': username,
1057                 'password': base64.b64encode(password_hash).decode('ascii'),
1058                 'keep_me': 'true',
1059                 's_locale': 'en_US',
1060                 'isTrusted': 'true'
1061             }), note='Logging in', errnote='Unable to log in')
1062         if login_post.get('code'):
1063             if login_post.get('message'):
1064                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1065             else:
1066                 raise ExtractorError('Unable to log in')
1067
1068
1069 class BiliIntlIE(BiliIntlBaseIE):
1070     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1071     _TESTS = [{
1072         # Bstation page
1073         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1074         'info_dict': {
1075             'id': '341736',
1076             'ext': 'mp4',
1077             'title': 'E2 - The First Night',
1078             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1079             'episode_number': 2,
1080             'upload_date': '20201009',
1081             'episode': 'Episode 2',
1082             'timestamp': 1602259500,
1083             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1084             'chapters': [{
1085                 'start_time': 0,
1086                 'end_time': 76.242,
1087                 'title': '<Untitled Chapter 1>'
1088             }, {
1089                 'start_time': 76.242,
1090                 'end_time': 161.161,
1091                 'title': 'Intro'
1092             }, {
1093                 'start_time': 1325.742,
1094                 'end_time': 1403.903,
1095                 'title': 'Outro'
1096             }],
1097         }
1098     }, {
1099         # Non-Bstation page
1100         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1101         'info_dict': {
1102             'id': '11005006',
1103             'ext': 'mp4',
1104             'title': 'E3 - Who?',
1105             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1106             'episode_number': 3,
1107             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1108             'episode': 'Episode 3',
1109             'upload_date': '20211219',
1110             'timestamp': 1639928700,
1111             'chapters': [{
1112                 'start_time': 0,
1113                 'end_time': 88.0,
1114                 'title': '<Untitled Chapter 1>'
1115             }, {
1116                 'start_time': 88.0,
1117                 'end_time': 156.0,
1118                 'title': 'Intro'
1119             }, {
1120                 'start_time': 1173.0,
1121                 'end_time': 1259.535,
1122                 'title': 'Outro'
1123             }],
1124         }
1125     }, {
1126         # Subtitle with empty content
1127         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1128         'info_dict': {
1129             'id': '10131790',
1130             'ext': 'mp4',
1131             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1132             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1133             'episode_number': 140,
1134         },
1135         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1136     }, {
1137         'url': 'https://www.bilibili.tv/en/video/2041863208',
1138         'info_dict': {
1139             'id': '2041863208',
1140             'ext': 'mp4',
1141             'timestamp': 1670874843,
1142             'description': 'Scheduled for April 2023.\nStudio: ufotable',
1143             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1144             'upload_date': '20221212',
1145             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1146         },
1147     }, {
1148         # episode comment extraction
1149         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1150         'info_dict': {
1151             'id': '340317',
1152             'ext': 'mp4',
1153             'timestamp': 1604057820,
1154             'upload_date': '20201030',
1155             'episode_number': 5,
1156             'title': 'E5 - My Own Steel',
1157             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1158             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1159             'episode': 'Episode 5',
1160             'comment_count': int,
1161             'chapters': [{
1162                 'start_time': 0,
1163                 'end_time': 61.0,
1164                 'title': '<Untitled Chapter 1>'
1165             }, {
1166                 'start_time': 61.0,
1167                 'end_time': 134.0,
1168                 'title': 'Intro'
1169             }, {
1170                 'start_time': 1290.0,
1171                 'end_time': 1379.0,
1172                 'title': 'Outro'
1173             }],
1174         },
1175         'params': {
1176             'getcomments': True
1177         }
1178     }, {
1179         # user generated content comment extraction
1180         'url': 'https://www.bilibili.tv/en/video/2045730385',
1181         'info_dict': {
1182             'id': '2045730385',
1183             'ext': 'mp4',
1184             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1185             'timestamp': 1667891924,
1186             'upload_date': '20221108',
1187             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1188             'comment_count': int,
1189             'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1190         },
1191         'params': {
1192             'getcomments': True
1193         }
1194     }, {
1195         # episode id without intro and outro
1196         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1197         'info_dict': {
1198             'id': '11246489',
1199             'ext': 'mp4',
1200             'title': 'E1 - Operation \'Strix\' <Owl>',
1201             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1202             'timestamp': 1649516400,
1203             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1204             'episode': 'Episode 1',
1205             'episode_number': 1,
1206             'upload_date': '20220409',
1207         },
1208     }, {
1209         'url': 'https://www.biliintl.com/en/play/34613/341736',
1210         'only_matching': True,
1211     }, {
1212         # User-generated content (as opposed to a series licensed from a studio)
1213         'url': 'https://bilibili.tv/en/video/2019955076',
1214         'only_matching': True,
1215     }, {
1216         # No language in URL
1217         'url': 'https://www.bilibili.tv/video/2019955076',
1218         'only_matching': True,
1219     }, {
1220         # Uppercase language in URL
1221         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1222         'only_matching': True,
1223     }]
1224
1225     def _make_url(video_id, series_id=None):
1226         if series_id:
1227             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1228         return f'https://www.bilibili.tv/en/video/{video_id}'
1229
1230     def _extract_video_metadata(self, url, video_id, season_id):
1231         url, smuggled_data = unsmuggle_url(url, {})
1232         if smuggled_data.get('title'):
1233             return smuggled_data
1234
1235         webpage = self._download_webpage(url, video_id)
1236         # Bstation layout
1237         initial_data = (
1238             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1239             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1240         video_data = traverse_obj(
1241             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1242
1243         if season_id and not video_data:
1244             # Non-Bstation layout, read through episode list
1245             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1246             video_data = traverse_obj(season_json, (
1247                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1248             ), expected_type=dict, get_all=False)
1249
1250         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1251         return merge_dicts(
1252             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
1253                 'title': self._html_search_meta('og:title', webpage),
1254                 'description': self._html_search_meta('og:description', webpage)
1255             })
1256
1257     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1258         comment_api_raw_data = self._download_json(
1259             'https://api.bilibili.tv/reply/web/detail', display_id,
1260             note=f'Downloading reply comment of {root_id} - {next_id}',
1261             query={
1262                 'platform': 'web',
1263                 'ps': 20,  # comment's reply per page (default: 3)
1264                 'root': root_id,
1265                 'next': next_id,
1266             })
1267
1268         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1269             yield {
1270                 'author': traverse_obj(replies, ('member', 'name')),
1271                 'author_id': traverse_obj(replies, ('member', 'mid')),
1272                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1273                 'text': traverse_obj(replies, ('content', 'message')),
1274                 'id': replies.get('rpid'),
1275                 'like_count': int_or_none(replies.get('like_count')),
1276                 'parent': replies.get('parent'),
1277                 'timestamp': unified_timestamp(replies.get('ctime_text'))
1278             }
1279
1280         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1281             yield from self._get_comments_reply(
1282                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1283
1284     def _get_comments(self, video_id, ep_id):
1285         for i in itertools.count(0):
1286             comment_api_raw_data = self._download_json(
1287                 'https://api.bilibili.tv/reply/web/root', video_id,
1288                 note=f'Downloading comment page {i + 1}',
1289                 query={
1290                     'platform': 'web',
1291                     'pn': i,  # page number
1292                     'ps': 20,  # comment per page (default: 20)
1293                     'oid': video_id,
1294                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
1295                     'sort_type': 1,  # 1: best, 2: recent
1296                 })
1297
1298             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1299                 yield {
1300                     'author': traverse_obj(replies, ('member', 'name')),
1301                     'author_id': traverse_obj(replies, ('member', 'mid')),
1302                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1303                     'text': traverse_obj(replies, ('content', 'message')),
1304                     'id': replies.get('rpid'),
1305                     'like_count': int_or_none(replies.get('like_count')),
1306                     'timestamp': unified_timestamp(replies.get('ctime_text')),
1307                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1308                 }
1309                 if replies.get('count'):
1310                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1311
1312             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1313                 break
1314
1315     def _real_extract(self, url):
1316         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1317         video_id = ep_id or aid
1318         chapters = None
1319
1320         if ep_id:
1321             intro_ending_json = self._call_api(
1322                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1323                 video_id, fatal=False) or {}
1324             if intro_ending_json.get('skip'):
1325                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1326                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1327                 chapters = [{
1328                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1329                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1330                     'title': 'Intro'
1331                 }, {
1332                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1333                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1334                     'title': 'Outro'
1335                 }]
1336
1337         return {
1338             'id': video_id,
1339             **self._extract_video_metadata(url, video_id, season_id),
1340             'formats': self._get_formats(ep_id=ep_id, aid=aid),
1341             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
1342             'chapters': chapters,
1343             '__post_extractor': self.extract_comments(video_id, ep_id)
1344         }
1345
1346
1347 class BiliIntlSeriesIE(BiliIntlBaseIE):
1348     IE_NAME = 'biliIntl:series'
1349     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
1350     _TESTS = [{
1351         'url': 'https://www.bilibili.tv/en/play/34613',
1352         'playlist_mincount': 15,
1353         'info_dict': {
1354             'id': '34613',
1355             'title': 'TONIKAWA: Over the Moon For You',
1356             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1357             'categories': ['Slice of life', 'Comedy', 'Romance'],
1358             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1359             'view_count': int,
1360         },
1361         'params': {
1362             'skip_download': True,
1363         },
1364     }, {
1365         'url': 'https://www.bilibili.tv/en/media/1048837',
1366         'info_dict': {
1367             'id': '1048837',
1368             'title': 'SPY×FAMILY',
1369             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1370             'categories': ['Adventure', 'Action', 'Comedy'],
1371             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1372             'view_count': int,
1373         },
1374         'playlist_mincount': 25,
1375     }, {
1376         'url': 'https://www.biliintl.com/en/play/34613',
1377         'only_matching': True,
1378     }, {
1379         'url': 'https://www.biliintl.com/EN/play/34613',
1380         'only_matching': True,
1381     }]
1382
1383     def _entries(self, series_id):
1384         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
1385         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1386             episode_id = str(episode['episode_id'])
1387             yield self.url_result(smuggle_url(
1388                 BiliIntlIE._make_url(episode_id, series_id),
1389                 self._parse_video_metadata(episode)
1390             ), BiliIntlIE, episode_id)
1391
1392     def _real_extract(self, url):
1393         series_id = self._match_id(url)
1394         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1395         return self.playlist_result(
1396             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1397             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1398             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
1399
1400
1401 class BiliLiveIE(InfoExtractor):
1402     _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
1403
1404     _TESTS = [{
1405         'url': 'https://live.bilibili.com/196',
1406         'info_dict': {
1407             'id': '33989',
1408             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
1409             'ext': 'flv',
1410             'title': "太空狼人杀联动，不被爆杀就算赢",
1411             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1412             'timestamp': 1650802769,
1413         },
1414         'skip': 'not live'
1415     }, {
1416         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1417         'only_matching': True
1418     }, {
1419         'url': 'https://live.bilibili.com/blanc/196',
1420         'only_matching': True
1421     }]
1422
1423     _FORMATS = {
1424         80: {'format_id': 'low', 'format_note': '流畅'},
1425         150: {'format_id': 'high_res', 'format_note': '高清'},
1426         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1427         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1428         10000: {'format_id': 'source', 'format_note': '原画'},
1429         20000: {'format_id': '4K', 'format_note': '4K'},
1430         30000: {'format_id': 'dolby', 'format_note': '杜比'},
1431     }
1432
1433     _quality = staticmethod(qualities(list(_FORMATS)))
1434
1435     def _call_api(self, path, room_id, query):
1436         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1437         if api_result.get('code') != 0:
1438             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1439         return api_result.get('data') or {}
1440
1441     def _parse_formats(self, qn, fmt):
1442         for codec in fmt.get('codec') or []:
1443             if codec.get('current_qn') != qn:
1444                 continue
1445             for url_info in codec['url_info']:
1446                 yield {
1447                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1448                     'ext': fmt.get('format_name'),
1449                     'vcodec': codec.get('codec_name'),
1450                     'quality': self._quality(qn),
1451                     **self._FORMATS[qn],
1452                 }
1453
1454     def _real_extract(self, url):
1455         room_id = self._match_id(url)
1456         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1457         if room_data.get('live_status') == 0:
1458             raise ExtractorError('Streamer is not live', expected=True)
1459
1460         formats = []
1461         for qn in self._FORMATS.keys():
1462             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1463                 'room_id': room_id,
1464                 'qn': qn,
1465                 'codec': '0,1',
1466                 'format': '0,2',
1467                 'mask': '0',
1468                 'no_playurl': '0',
1469                 'platform': 'web',
1470                 'protocol': '0,1',
1471             })
1472             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1473                 formats.extend(self._parse_formats(qn, fmt))
1474
1475         return {
1476             'id': room_id,
1477             'title': room_data.get('title'),
1478             'description': room_data.get('description'),
1479             'thumbnail': room_data.get('user_cover'),
1480             'timestamp': stream_data.get('live_time'),
1481             'formats': formats,
1482             'is_live': True,
1483             'http_headers': {
1484                 'Referer': url,
1485             },
1486         }