yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import math
   6 import re
   7 import time
   8 import urllib.parse
   9
  10 from .common import InfoExtractor, SearchInfoExtractor
  11 from ..dependencies import Cryptodome
  12 from ..networking.exceptions import HTTPError
  13 from ..utils import (
  14     ExtractorError,
  15     GeoRestrictedError,
  16     InAdvancePagedList,
  17     OnDemandPagedList,
  18     bool_or_none,
  19     filter_dict,
  20     float_or_none,
  21     format_field,
  22     int_or_none,
  23     join_nonempty,
  24     make_archive_id,
  25     merge_dicts,
  26     mimetype2ext,
  27     parse_count,
  28     parse_qs,
  29     qualities,
  30     smuggle_url,
  31     srt_subtitles_timecode,
  32     str_or_none,
  33     traverse_obj,
  34     try_call,
  35     unified_timestamp,
  36     unsmuggle_url,
  37     url_or_none,
  38     urlencode_postdata,
  39     variadic,
  40 )
  41
  42
  43 class BilibiliBaseIE(InfoExtractor):
  44     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  45
  46     def extract_formats(self, play_info):
  47         format_names = {
  48             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  49             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  50         }
  51
  52         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  53         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  54         if flac_audio:
  55             audios.append(flac_audio)
  56         formats = [{
  57             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  58             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  59             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  60             'vcodec': 'none',
  61             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  62             'filesize': int_or_none(audio.get('size')),
  63             'format_id': str_or_none(audio.get('id')),
  64         } for audio in audios]
  65
  66         formats.extend({
  67             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  68             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  69             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  70             'width': int_or_none(video.get('width')),
  71             'height': int_or_none(video.get('height')),
  72             'vcodec': video.get('codecs'),
  73             'acodec': 'none' if audios else None,
  74             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  75             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  76             'filesize': int_or_none(video.get('size')),
  77             'quality': int_or_none(video.get('id')),
  78             'format_id': traverse_obj(
  79                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  80                 ('id', {str_or_none}), get_all=False),
  81             'format': format_names.get(video.get('id')),
  82         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  83
  84         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  85         if missing_formats:
  86             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  87                            f'you have to login or become premium member to download them. {self._login_hint()}')
  88
  89         return formats
  90
  91     def json2srt(self, json_data):
  92         srt_data = ''
  93         for idx, line in enumerate(json_data.get('body') or []):
  94             srt_data += (f'{idx + 1}\n'
  95                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
  96                          f'{line["content"]}\n\n')
  97         return srt_data
  98
  99     def _get_subtitles(self, video_id, aid, cid):
 100         subtitles = {
 101             'danmaku': [{
 102                 'ext': 'xml',
 103                 'url': f'https://comment.bilibili.com/{cid}.xml',
 104             }]
 105         }
 106
 107         video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
 108         for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
 109             subtitles.setdefault(s['lan'], []).append({
 110                 'ext': 'srt',
 111                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 112             })
 113         return subtitles
 114
 115     def _get_chapters(self, aid, cid):
 116         chapters = aid and cid and self._download_json(
 117             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 118             note='Extracting chapters', fatal=False)
 119         return traverse_obj(chapters, ('data', 'view_points', ..., {
 120             'title': 'content',
 121             'start_time': 'from',
 122             'end_time': 'to',
 123         })) or None
 124
 125     def _get_comments(self, aid):
 126         for idx in itertools.count(1):
 127             replies = traverse_obj(
 128                 self._download_json(
 129                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 130                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 131                 ('data', 'replies'))
 132             if not replies:
 133                 return
 134             for children in map(self._get_all_children, replies):
 135                 yield from children
 136
 137     def _get_all_children(self, reply):
 138         yield {
 139             'author': traverse_obj(reply, ('member', 'uname')),
 140             'author_id': traverse_obj(reply, ('member', 'mid')),
 141             'id': reply.get('rpid'),
 142             'text': traverse_obj(reply, ('content', 'message')),
 143             'timestamp': reply.get('ctime'),
 144             'parent': reply.get('parent') or 'root',
 145         }
 146         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 147             yield from children
 148
 149     def _get_episodes_from_season(self, ss_id, url):
 150         season_info = self._download_json(
 151             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 152             note='Downloading season info', query={'season_id': ss_id},
 153             headers={'Referer': url, **self.geo_verification_headers()})
 154
 155         for entry in traverse_obj(season_info, (
 156                 'result', 'main_section', 'episodes',
 157                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 158             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
 159
 160
 161 class BiliBiliIE(BilibiliBaseIE):
 162     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 163
 164     _TESTS = [{
 165         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 166         'info_dict': {
 167             'id': 'BV13x41117TL',
 168             'title': '阿滴英文｜英文歌分享#6 "Closer',
 169             'ext': 'mp4',
 170             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 171             'uploader_id': '65880958',
 172             'uploader': '阿滴英文',
 173             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 174             'duration': 554.117,
 175             'tags': list,
 176             'comment_count': int,
 177             'upload_date': '20170301',
 178             'timestamp': 1488353834,
 179             'like_count': int,
 180             'view_count': int,
 181         },
 182     }, {
 183         # old av URL version
 184         'url': 'http://www.bilibili.com/video/av1074402/',
 185         'info_dict': {
 186             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 187             'ext': 'mp4',
 188             'uploader': '菊子桑',
 189             'uploader_id': '156160',
 190             'id': 'BV11x411K7CN',
 191             'title': '【金坷垃】金泡沫',
 192             'duration': 308.36,
 193             'upload_date': '20140420',
 194             'timestamp': 1397983878,
 195             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 196             'like_count': int,
 197             'comment_count': int,
 198             'view_count': int,
 199             'tags': list,
 200         },
 201         'params': {'skip_download': True},
 202     }, {
 203         'note': 'Anthology',
 204         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 205         'info_dict': {
 206             'id': 'BV1bK411W797',
 207             'title': '物语中的人物是如何吐槽自己的OP的'
 208         },
 209         'playlist_count': 18,
 210         'playlist': [{
 211             'info_dict': {
 212                 'id': 'BV1bK411W797_p1',
 213                 'ext': 'mp4',
 214                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 215                 'tags': 'count:11',
 216                 'timestamp': 1589601697,
 217                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 218                 'uploader': '打牌还是打桩',
 219                 'uploader_id': '150259984',
 220                 'like_count': int,
 221                 'comment_count': int,
 222                 'upload_date': '20200516',
 223                 'view_count': int,
 224                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 225                 'duration': 90.314,
 226             }
 227         }]
 228     }, {
 229         'note': 'Specific page of Anthology',
 230         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 231         'info_dict': {
 232             'id': 'BV1bK411W797_p1',
 233             'ext': 'mp4',
 234             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 235             'tags': 'count:11',
 236             'timestamp': 1589601697,
 237             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 238             'uploader': '打牌还是打桩',
 239             'uploader_id': '150259984',
 240             'like_count': int,
 241             'comment_count': int,
 242             'upload_date': '20200516',
 243             'view_count': int,
 244             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 245             'duration': 90.314,
 246         }
 247     }, {
 248         'note': 'video has subtitles',
 249         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 250         'info_dict': {
 251             'id': 'BV12N4y1M7rh',
 252             'ext': 'mp4',
 253             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 254             'tags': list,
 255             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 256             'duration': 313.557,
 257             'upload_date': '20220709',
 258             'uploader': '小夫太渴',
 259             'timestamp': 1657347907,
 260             'uploader_id': '1326814124',
 261             'comment_count': int,
 262             'view_count': int,
 263             'like_count': int,
 264             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 265             'subtitles': 'count:2'
 266         },
 267         'params': {'listsubtitles': True},
 268     }, {
 269         'url': 'https://www.bilibili.com/video/av8903802/',
 270         'info_dict': {
 271             'id': 'BV13x41117TL',
 272             'ext': 'mp4',
 273             'title': '阿滴英文｜英文歌分享#6 "Closer',
 274             'upload_date': '20170301',
 275             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 276             'timestamp': 1488353834,
 277             'uploader_id': '65880958',
 278             'uploader': '阿滴英文',
 279             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 280             'duration': 554.117,
 281             'tags': list,
 282             'comment_count': int,
 283             'view_count': int,
 284             'like_count': int,
 285         },
 286         'params': {
 287             'skip_download': True,
 288         },
 289     }, {
 290         'note': 'video has chapter',
 291         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 292         'info_dict': {
 293             'id': 'BV1vL411G7N7',
 294             'ext': 'mp4',
 295             'title': '如何为你的B站视频添加进度条分段',
 296             'timestamp': 1634554558,
 297             'upload_date': '20211018',
 298             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 299             'tags': list,
 300             'uploader': '爱喝咖啡的当麻',
 301             'duration': 669.482,
 302             'uploader_id': '1680903',
 303             'chapters': 'count:6',
 304             'comment_count': int,
 305             'view_count': int,
 306             'like_count': int,
 307             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 308         },
 309         'params': {'skip_download': True},
 310     }, {
 311         'note': 'video redirects to festival page',
 312         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 313         'info_dict': {
 314             'id': 'BV1wP4y1P72h',
 315             'ext': 'mp4',
 316             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 317             'timestamp': 1643947497,
 318             'upload_date': '20220204',
 319             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 320             'uploader': '叨叨冯聊音乐',
 321             'duration': 246.719,
 322             'uploader_id': '528182630',
 323             'view_count': int,
 324             'like_count': int,
 325             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 326         },
 327         'params': {'skip_download': True},
 328     }, {
 329         'note': 'newer festival video',
 330         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 331         'info_dict': {
 332             'id': 'BV1ay4y1d77f',
 333             'ext': 'mp4',
 334             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 335             'timestamp': 1674273600,
 336             'upload_date': '20230121',
 337             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 338             'uploader': '果蝇轰',
 339             'duration': 1111.722,
 340             'uploader_id': '8469526',
 341             'view_count': int,
 342             'like_count': int,
 343             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 344         },
 345         'params': {'skip_download': True},
 346     }]
 347
 348     def _real_extract(self, url):
 349         video_id = self._match_id(url)
 350         webpage = self._download_webpage(url, video_id)
 351         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 352
 353         is_festival = 'videoData' not in initial_state
 354         if is_festival:
 355             video_data = initial_state['videoInfo']
 356         else:
 357             play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
 358             video_data = initial_state['videoData']
 359
 360         video_id, title = video_data['bvid'], video_data.get('title')
 361
 362         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 363         page_list_json = not is_festival and traverse_obj(
 364             self._download_json(
 365                 'https://api.bilibili.com/x/player/pagelist', video_id,
 366                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 367                 note='Extracting videos in anthology'),
 368             'data', expected_type=list) or []
 369         is_anthology = len(page_list_json) > 1
 370
 371         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 372         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 373             return self.playlist_from_matches(
 374                 page_list_json, video_id, title, ie=BiliBiliIE,
 375                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 376
 377         if is_anthology:
 378             part_id = part_id or 1
 379             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 380
 381         aid = video_data.get('aid')
 382         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 383
 384         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 385
 386         festival_info = {}
 387         if is_festival:
 388             play_info = self._download_json(
 389                 'https://api.bilibili.com/x/player/playurl', video_id,
 390                 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 391                 note='Extracting festival video formats')['data']
 392
 393             festival_info = traverse_obj(initial_state, {
 394                 'uploader': ('videoInfo', 'upName'),
 395                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 396                 'like_count': ('videoStatus', 'like', {int_or_none}),
 397                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 398             }, get_all=False)
 399
 400         return {
 401             **traverse_obj(initial_state, {
 402                 'uploader': ('upData', 'name'),
 403                 'uploader_id': ('upData', 'mid', {str_or_none}),
 404                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 405                 'tags': ('tags', ..., 'tag_name'),
 406                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 407             }),
 408             **festival_info,
 409             **traverse_obj(video_data, {
 410                 'description': 'desc',
 411                 'timestamp': ('pubdate', {int_or_none}),
 412                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 413                 'comment_count': ('stat', 'reply', {int_or_none}),
 414             }, get_all=False),
 415             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 416             'formats': self.extract_formats(play_info),
 417             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 418             'title': title,
 419             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 420             'chapters': self._get_chapters(aid, cid),
 421             'subtitles': self.extract_subtitles(video_id, aid, cid),
 422             '__post_extractor': self.extract_comments(aid),
 423             'http_headers': {'Referer': url},
 424         }
 425
 426
 427 class BiliBiliBangumiIE(BilibiliBaseIE):
 428     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
 429
 430     _TESTS = [{
 431         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 432         'info_dict': {
 433             'id': '267851',
 434             'ext': 'mp4',
 435             'series': '鬼灭之刃',
 436             'series_id': '4358',
 437             'season': '鬼灭之刃',
 438             'season_id': '26801',
 439             'season_number': 1,
 440             'episode': '残酷',
 441             'episode_id': '267851',
 442             'episode_number': 1,
 443             'title': '1 残酷',
 444             'duration': 1425.256,
 445             'timestamp': 1554566400,
 446             'upload_date': '20190406',
 447             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 448         },
 449         'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
 450     }]
 451
 452     def _real_extract(self, url):
 453         video_id = self._match_id(url)
 454         episode_id = video_id[2:]
 455         webpage = self._download_webpage(url, video_id)
 456
 457         if '您所在的地区无法观看本片' in webpage:
 458             raise GeoRestrictedError('This video is restricted')
 459         elif '正在观看预览，大会员免费看全片' in webpage:
 460             self.raise_login_required('This video is for premium members only')
 461
 462         headers = {'Referer': url, **self.geo_verification_headers()}
 463         play_info = self._download_json(
 464             'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
 465             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 466             headers=headers)
 467         premium_only = play_info.get('code') == -10403
 468         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 469
 470         formats = self.extract_formats(play_info)
 471         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 472             self.raise_login_required('This video is for premium members only')
 473
 474         bangumi_info = self._download_json(
 475             'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
 476             query={'ep_id': episode_id}, headers=headers)['result']
 477
 478         episode_number, episode_info = next((
 479             (idx, ep) for idx, ep in enumerate(traverse_obj(
 480                 bangumi_info, ('episodes', ..., {dict})), 1)
 481             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 482
 483         season_id = bangumi_info.get('season_id')
 484         season_number = season_id and next((
 485             idx + 1 for idx, e in enumerate(
 486                 traverse_obj(bangumi_info, ('seasons', ...)))
 487             if e.get('season_id') == season_id
 488         ), None)
 489
 490         aid = episode_info.get('aid')
 491
 492         return {
 493             'id': video_id,
 494             'formats': formats,
 495             **traverse_obj(bangumi_info, {
 496                 'series': ('series', 'series_title', {str}),
 497                 'series_id': ('series', 'series_id', {str_or_none}),
 498                 'thumbnail': ('square_cover', {url_or_none}),
 499             }),
 500             'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
 501             'episode': episode_info.get('long_title'),
 502             'episode_id': episode_id,
 503             'episode_number': int_or_none(episode_info.get('title')) or episode_number,
 504             'season_id': str_or_none(season_id),
 505             'season_number': season_number,
 506             'timestamp': int_or_none(episode_info.get('pub_time')),
 507             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 508             'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
 509             '__post_extractor': self.extract_comments(aid),
 510             'http_headers': headers,
 511         }
 512
 513
 514 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 515     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 516     _TESTS = [{
 517         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 518         'info_dict': {
 519             'id': '24097891',
 520         },
 521         'playlist_mincount': 25,
 522     }]
 523
 524     def _real_extract(self, url):
 525         media_id = self._match_id(url)
 526         webpage = self._download_webpage(url, media_id)
 527         ss_id = self._search_json(
 528             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
 529
 530         return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
 531
 532
 533 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 534     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 535     _TESTS = [{
 536         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 537         'info_dict': {
 538             'id': '26801'
 539         },
 540         'playlist_mincount': 26
 541     }]
 542
 543     def _real_extract(self, url):
 544         ss_id = self._match_id(url)
 545
 546         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
 547
 548
 549 class BilibiliSpaceBaseIE(InfoExtractor):
 550     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 551         first_page = fetch_page(0)
 552         metadata = get_metadata(first_page)
 553
 554         paged_list = InAdvancePagedList(
 555             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 556             metadata['page_count'], metadata['page_size'])
 557
 558         return metadata, paged_list
 559
 560
 561 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 562     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 563     _TESTS = [{
 564         'url': 'https://space.bilibili.com/3985676/video',
 565         'info_dict': {
 566             'id': '3985676',
 567         },
 568         'playlist_mincount': 178,
 569     }, {
 570         'url': 'https://space.bilibili.com/313580179/video',
 571         'info_dict': {
 572             'id': '313580179',
 573         },
 574         'playlist_mincount': 92,
 575     }]
 576
 577     def _extract_signature(self, playlist_id):
 578         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
 579
 580         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
 581         img_key = traverse_obj(
 582             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
 583         sub_key = traverse_obj(
 584             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
 585
 586         session_key = img_key + sub_key
 587
 588         signature_values = []
 589         for position in (
 590             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
 591             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
 592             57, 62, 11, 36, 20, 34, 44, 52
 593         ):
 594             char_at_position = try_call(lambda: session_key[position])
 595             if char_at_position:
 596                 signature_values.append(char_at_position)
 597
 598         return ''.join(signature_values)[:32]
 599
 600     def _real_extract(self, url):
 601         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
 602         if not is_video_url:
 603             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
 604                            'To download audios, add a "/audio" to the URL')
 605
 606         signature = self._extract_signature(playlist_id)
 607
 608         def fetch_page(page_idx):
 609             query = {
 610                 'keyword': '',
 611                 'mid': playlist_id,
 612                 'order': 'pubdate',
 613                 'order_avoided': 'true',
 614                 'platform': 'web',
 615                 'pn': page_idx + 1,
 616                 'ps': 30,
 617                 'tid': 0,
 618                 'web_location': 1550101,
 619                 'wts': int(time.time()),
 620             }
 621             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
 622
 623             try:
 624                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
 625                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
 626             except ExtractorError as e:
 627                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
 628                     raise ExtractorError(
 629                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
 630                 raise
 631             if response['code'] == -401:
 632                 raise ExtractorError(
 633                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
 634             return response['data']
 635
 636         def get_metadata(page_data):
 637             page_size = page_data['page']['ps']
 638             entry_count = page_data['page']['count']
 639             return {
 640                 'page_count': math.ceil(entry_count / page_size),
 641                 'page_size': page_size,
 642             }
 643
 644         def get_entries(page_data):
 645             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
 646                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
 647
 648         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 649         return self.playlist_result(paged_list, playlist_id)
 650
 651
 652 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
 653     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
 654     _TESTS = [{
 655         'url': 'https://space.bilibili.com/313580179/audio',
 656         'info_dict': {
 657             'id': '313580179',
 658         },
 659         'playlist_mincount': 1,
 660     }]
 661
 662     def _real_extract(self, url):
 663         playlist_id = self._match_id(url)
 664
 665         def fetch_page(page_idx):
 666             return self._download_json(
 667                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
 668                 note=f'Downloading page {page_idx}',
 669                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
 670
 671         def get_metadata(page_data):
 672             return {
 673                 'page_count': page_data['pageCount'],
 674                 'page_size': page_data['pageSize'],
 675             }
 676
 677         def get_entries(page_data):
 678             for entry in page_data.get('data', []):
 679                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
 680
 681         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 682         return self.playlist_result(paged_list, playlist_id)
 683
 684
 685 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
 686     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
 687         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
 688             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
 689
 690     def _get_uploader(self, uid, playlist_id):
 691         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
 692         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
 693
 694     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 695         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
 696         metadata.pop('page_count', None)
 697         metadata.pop('page_size', None)
 698         return metadata, page_list
 699
 700
 701 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
 702     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
 703     _TESTS = [{
 704         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
 705         'info_dict': {
 706             'id': '2142762_57445',
 707             'title': '【完结】《底特律 变人》全结局流程解说',
 708             'description': '',
 709             'uploader': '老戴在此',
 710             'uploader_id': '2142762',
 711             'timestamp': int,
 712             'upload_date': str,
 713             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
 714         },
 715         'playlist_mincount': 31,
 716     }]
 717
 718     def _real_extract(self, url):
 719         mid, sid = self._match_valid_url(url).group('mid', 'sid')
 720         playlist_id = f'{mid}_{sid}'
 721
 722         def fetch_page(page_idx):
 723             return self._download_json(
 724                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
 725                 playlist_id, note=f'Downloading page {page_idx}',
 726                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
 727
 728         def get_metadata(page_data):
 729             page_size = page_data['page']['page_size']
 730             entry_count = page_data['page']['total']
 731             return {
 732                 'page_count': math.ceil(entry_count / page_size),
 733                 'page_size': page_size,
 734                 'uploader': self._get_uploader(mid, playlist_id),
 735                 **traverse_obj(page_data, {
 736                     'title': ('meta', 'name', {str}),
 737                     'description': ('meta', 'description', {str}),
 738                     'uploader_id': ('meta', 'mid', {str_or_none}),
 739                     'timestamp': ('meta', 'ptime', {int_or_none}),
 740                     'thumbnail': ('meta', 'cover', {url_or_none}),
 741                 })
 742             }
 743
 744         def get_entries(page_data):
 745             return self._get_entries(page_data, 'archives')
 746
 747         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 748         return self.playlist_result(paged_list, playlist_id, **metadata)
 749
 750
 751 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
 752     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
 753     _TESTS = [{
 754         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
 755         'info_dict': {
 756             'id': '1958703906_547718',
 757             'title': '直播回放',
 758             'description': '直播回放',
 759             'uploader': '靡烟miya',
 760             'uploader_id': '1958703906',
 761             'timestamp': 1637985853,
 762             'upload_date': '20211127',
 763             'modified_timestamp': int,
 764             'modified_date': str,
 765         },
 766         'playlist_mincount': 513,
 767     }]
 768
 769     def _real_extract(self, url):
 770         mid, sid = self._match_valid_url(url).group('mid', 'sid')
 771         playlist_id = f'{mid}_{sid}'
 772         playlist_meta = traverse_obj(self._download_json(
 773             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
 774         ), {
 775             'title': ('data', 'meta', 'name', {str}),
 776             'description': ('data', 'meta', 'description', {str}),
 777             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
 778             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
 779             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
 780         })
 781
 782         def fetch_page(page_idx):
 783             return self._download_json(
 784                 'https://api.bilibili.com/x/series/archives',
 785                 playlist_id, note=f'Downloading page {page_idx}',
 786                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
 787
 788         def get_metadata(page_data):
 789             page_size = page_data['page']['size']
 790             entry_count = page_data['page']['total']
 791             return {
 792                 'page_count': math.ceil(entry_count / page_size),
 793                 'page_size': page_size,
 794                 'uploader': self._get_uploader(mid, playlist_id),
 795                 **playlist_meta
 796             }
 797
 798         def get_entries(page_data):
 799             return self._get_entries(page_data, 'archives')
 800
 801         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 802         return self.playlist_result(paged_list, playlist_id, **metadata)
 803
 804
 805 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
 806     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
 807     _TESTS = [{
 808         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
 809         'info_dict': {
 810             'id': '1103407912',
 811             'title': '【V2】（旧）',
 812             'description': '',
 813             'uploader': '晓月春日',
 814             'uploader_id': '84912',
 815             'timestamp': 1604905176,
 816             'upload_date': '20201109',
 817             'modified_timestamp': int,
 818             'modified_date': str,
 819             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
 820             'view_count': int,
 821             'like_count': int,
 822         },
 823         'playlist_mincount': 22,
 824     }, {
 825         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
 826         'only_matching': True,
 827     }]
 828
 829     def _real_extract(self, url):
 830         fid = self._match_id(url)
 831
 832         list_info = self._download_json(
 833             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
 834             fid, note='Downloading favlist metadata')
 835         if list_info['code'] == -403:
 836             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
 837
 838         entries = self._get_entries(self._download_json(
 839             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
 840             fid, note='Download favlist entries'), 'data')
 841
 842         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
 843             'title': ('title', {str}),
 844             'description': ('intro', {str}),
 845             'uploader': ('upper', 'name', {str}),
 846             'uploader_id': ('upper', 'mid', {str_or_none}),
 847             'timestamp': ('ctime', {int_or_none}),
 848             'modified_timestamp': ('mtime', {int_or_none}),
 849             'thumbnail': ('cover', {url_or_none}),
 850             'view_count': ('cnt_info', 'play', {int_or_none}),
 851             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
 852         })))
 853
 854
 855 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
 856     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
 857     _TESTS = [{
 858         'url': 'https://www.bilibili.com/watchlater/#/list',
 859         'info_dict': {'id': 'watchlater'},
 860         'playlist_mincount': 0,
 861         'skip': 'login required',
 862     }]
 863
 864     def _real_extract(self, url):
 865         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
 866         watchlater_info = self._download_json(
 867             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
 868         if watchlater_info['code'] == -101:
 869             self.raise_login_required(msg='You need to login to access your watchlater list')
 870         entries = self._get_entries(watchlater_info, ('data', 'list'))
 871         return self.playlist_result(entries, id=list_id, title='稍后再看')
 872
 873
 874 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
 875     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
 876     _TESTS = [{
 877         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
 878         'info_dict': {
 879             'id': '5_547718',
 880             'title': '直播回放',
 881             'uploader': '靡烟miya',
 882             'uploader_id': '1958703906',
 883             'timestamp': 1637985853,
 884             'upload_date': '20211127',
 885         },
 886         'playlist_mincount': 513,
 887     }, {
 888         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
 889         'info_dict': {
 890             'id': '5_547718',
 891         },
 892         'playlist_mincount': 513,
 893         'skip': 'redirect url',
 894     }, {
 895         'url': 'https://www.bilibili.com/list/ml1103407912',
 896         'info_dict': {
 897             'id': '3_1103407912',
 898             'title': '【V2】（旧）',
 899             'uploader': '晓月春日',
 900             'uploader_id': '84912',
 901             'timestamp': 1604905176,
 902             'upload_date': '20201109',
 903             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
 904         },
 905         'playlist_mincount': 22,
 906     }, {
 907         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
 908         'info_dict': {
 909             'id': '3_1103407912',
 910         },
 911         'playlist_mincount': 22,
 912         'skip': 'redirect url',
 913     }, {
 914         'url': 'https://www.bilibili.com/list/watchlater',
 915         'info_dict': {'id': 'watchlater'},
 916         'playlist_mincount': 0,
 917         'skip': 'login required',
 918     }, {
 919         'url': 'https://www.bilibili.com/medialist/play/watchlater',
 920         'info_dict': {'id': 'watchlater'},
 921         'playlist_mincount': 0,
 922         'skip': 'login required',
 923     }]
 924
 925     def _extract_medialist(self, query, list_id):
 926         for page_num in itertools.count(1):
 927             page_data = self._download_json(
 928                 'https://api.bilibili.com/x/v2/medialist/resource/list',
 929                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
 930             )['data']
 931             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
 932             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
 933             if not page_data.get('has_more', False):
 934                 break
 935
 936     def _real_extract(self, url):
 937         list_id = self._match_id(url)
 938         webpage = self._download_webpage(url, list_id)
 939         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
 940         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
 941             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
 942             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
 943             if error_code == -400 and list_id == 'watchlater':
 944                 self.raise_login_required('You need to login to access your watchlater playlist')
 945             elif error_code == -403:
 946                 self.raise_login_required('This is a private playlist. You need to login as its owner')
 947             elif error_code == 11010:
 948                 raise ExtractorError('Playlist is no longer available', expected=True)
 949             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
 950
 951         query = {
 952             'ps': 20,
 953             'with_current': False,
 954             **traverse_obj(initial_state, {
 955                 'type': ('playlist', 'type', {int_or_none}),
 956                 'biz_id': ('playlist', 'id', {int_or_none}),
 957                 'tid': ('tid', {int_or_none}),
 958                 'sort_field': ('sortFiled', {int_or_none}),
 959                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
 960             })
 961         }
 962         metadata = {
 963             'id': f'{query["type"]}_{query["biz_id"]}',
 964             **traverse_obj(initial_state, ('mediaListInfo', {
 965                 'title': ('title', {str}),
 966                 'uploader': ('upper', 'name', {str}),
 967                 'uploader_id': ('upper', 'mid', {str_or_none}),
 968                 'timestamp': ('ctime', {int_or_none}),
 969                 'thumbnail': ('cover', {url_or_none}),
 970             })),
 971         }
 972         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
 973
 974
 975 class BilibiliCategoryIE(InfoExtractor):
 976     IE_NAME = 'Bilibili category extractor'
 977     _MAX_RESULTS = 1000000
 978     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
 979     _TESTS = [{
 980         'url': 'https://www.bilibili.com/v/kichiku/mad',
 981         'info_dict': {
 982             'id': 'kichiku: mad',
 983             'title': 'kichiku: mad'
 984         },
 985         'playlist_mincount': 45,
 986         'params': {
 987             'playlistend': 45
 988         }
 989     }]
 990
 991     def _fetch_page(self, api_url, num_pages, query, page_num):
 992         parsed_json = self._download_json(
 993             api_url, query, query={'Search_key': query, 'pn': page_num},
 994             note='Extracting results from page %s of %s' % (page_num, num_pages))
 995
 996         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
 997         if not video_list:
 998             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
 999
1000         for video in video_list:
1001             yield self.url_result(
1002                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1003
1004     def _entries(self, category, subcategory, query):
1005         # map of categories : subcategories : RIDs
1006         rid_map = {
1007             'kichiku': {
1008                 'mad': 26,
1009                 'manual_vocaloid': 126,
1010                 'guide': 22,
1011                 'theatre': 216,
1012                 'course': 127
1013             },
1014         }
1015
1016         if category not in rid_map:
1017             raise ExtractorError(
1018                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1019         if subcategory not in rid_map[category]:
1020             raise ExtractorError(
1021                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1022         rid_value = rid_map[category][subcategory]
1023
1024         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1025         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1026         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1027         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1028         if count is None or not size:
1029             raise ExtractorError('Failed to calculate either page count or size')
1030
1031         num_pages = math.ceil(count / size)
1032
1033         return OnDemandPagedList(functools.partial(
1034             self._fetch_page, api_url, num_pages, query), size)
1035
1036     def _real_extract(self, url):
1037         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1038         query = '%s: %s' % (category, subcategory)
1039
1040         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1041
1042
1043 class BiliBiliSearchIE(SearchInfoExtractor):
1044     IE_DESC = 'Bilibili video search'
1045     _MAX_RESULTS = 100000
1046     _SEARCH_KEY = 'bilisearch'
1047
1048     def _search_results(self, query):
1049         for page_num in itertools.count(1):
1050             videos = self._download_json(
1051                 'https://api.bilibili.com/x/web-interface/search/type', query,
1052                 note=f'Extracting results from page {page_num}', query={
1053                     'Search_key': query,
1054                     'keyword': query,
1055                     'page': page_num,
1056                     'context': '',
1057                     'duration': 0,
1058                     'tids_2': '',
1059                     '__refresh__': 'true',
1060                     'search_type': 'video',
1061                     'tids': 0,
1062                     'highlight': 1,
1063                 })['data'].get('result')
1064             if not videos:
1065                 break
1066             for video in videos:
1067                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1068
1069
1070 class BilibiliAudioBaseIE(InfoExtractor):
1071     def _call_api(self, path, sid, query=None):
1072         if not query:
1073             query = {'sid': sid}
1074         return self._download_json(
1075             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1076             sid, query=query)['data']
1077
1078
1079 class BilibiliAudioIE(BilibiliAudioBaseIE):
1080     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1081     _TEST = {
1082         'url': 'https://www.bilibili.com/audio/au1003142',
1083         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1084         'info_dict': {
1085             'id': '1003142',
1086             'ext': 'm4a',
1087             'title': '【tsukimi】YELLOW / 神山羊',
1088             'artist': 'tsukimi',
1089             'comment_count': int,
1090             'description': 'YELLOW的mp3版！',
1091             'duration': 183,
1092             'subtitles': {
1093                 'origin': [{
1094                     'ext': 'lrc',
1095                 }],
1096             },
1097             'thumbnail': r're:^https?://.+\.jpg',
1098             'timestamp': 1564836614,
1099             'upload_date': '20190803',
1100             'uploader': 'tsukimi-つきみぐー',
1101             'view_count': int,
1102         },
1103     }
1104
1105     def _real_extract(self, url):
1106         au_id = self._match_id(url)
1107
1108         play_data = self._call_api('url', au_id)
1109         formats = [{
1110             'url': play_data['cdns'][0],
1111             'filesize': int_or_none(play_data.get('size')),
1112             'vcodec': 'none'
1113         }]
1114
1115         for a_format in formats:
1116             a_format.setdefault('http_headers', {}).update({
1117                 'Referer': url,
1118             })
1119
1120         song = self._call_api('song/info', au_id)
1121         title = song['title']
1122         statistic = song.get('statistic') or {}
1123
1124         subtitles = None
1125         lyric = song.get('lyric')
1126         if lyric:
1127             subtitles = {
1128                 'origin': [{
1129                     'url': lyric,
1130                 }]
1131             }
1132
1133         return {
1134             'id': au_id,
1135             'title': title,
1136             'formats': formats,
1137             'artist': song.get('author'),
1138             'comment_count': int_or_none(statistic.get('comment')),
1139             'description': song.get('intro'),
1140             'duration': int_or_none(song.get('duration')),
1141             'subtitles': subtitles,
1142             'thumbnail': song.get('cover'),
1143             'timestamp': int_or_none(song.get('passtime')),
1144             'uploader': song.get('uname'),
1145             'view_count': int_or_none(statistic.get('play')),
1146         }
1147
1148
1149 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1150     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1151     _TEST = {
1152         'url': 'https://www.bilibili.com/audio/am10624',
1153         'info_dict': {
1154             'id': '10624',
1155             'title': '每日新曲推荐（每日11:00更新）',
1156             'description': '每天11:00更新，为你推送最新音乐',
1157         },
1158         'playlist_count': 19,
1159     }
1160
1161     def _real_extract(self, url):
1162         am_id = self._match_id(url)
1163
1164         songs = self._call_api(
1165             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1166
1167         entries = []
1168         for song in songs:
1169             sid = str_or_none(song.get('id'))
1170             if not sid:
1171                 continue
1172             entries.append(self.url_result(
1173                 'https://www.bilibili.com/audio/au' + sid,
1174                 BilibiliAudioIE.ie_key(), sid))
1175
1176         if entries:
1177             album_data = self._call_api('menu/info', am_id) or {}
1178             album_title = album_data.get('title')
1179             if album_title:
1180                 for entry in entries:
1181                     entry['album'] = album_title
1182                 return self.playlist_result(
1183                     entries, am_id, album_title, album_data.get('intro'))
1184
1185         return self.playlist_result(entries, am_id)
1186
1187
1188 class BiliBiliPlayerIE(InfoExtractor):
1189     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1190     _TEST = {
1191         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1192         'only_matching': True,
1193     }
1194
1195     def _real_extract(self, url):
1196         video_id = self._match_id(url)
1197         return self.url_result(
1198             'http://www.bilibili.tv/video/av%s/' % video_id,
1199             ie=BiliBiliIE.ie_key(), video_id=video_id)
1200
1201
1202 class BiliIntlBaseIE(InfoExtractor):
1203     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1204     _NETRC_MACHINE = 'biliintl'
1205
1206     def _call_api(self, endpoint, *args, **kwargs):
1207         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1208         if json.get('code'):
1209             if json['code'] in (10004004, 10004005, 10023006):
1210                 self.raise_login_required()
1211             elif json['code'] == 10004001:
1212                 self.raise_geo_restricted()
1213             else:
1214                 if json.get('message') and str(json['code']) != json['message']:
1215                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1216                 else:
1217                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1218                 if kwargs.get('fatal'):
1219                     raise ExtractorError(errmsg)
1220                 else:
1221                     self.report_warning(errmsg)
1222         return json.get('data')
1223
1224     def json2srt(self, json):
1225         data = '\n\n'.join(
1226             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1227             for i, line in enumerate(traverse_obj(json, (
1228                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1229         return data
1230
1231     def _get_subtitles(self, *, ep_id=None, aid=None):
1232         sub_json = self._call_api(
1233             '/web/v2/subtitle', ep_id or aid, fatal=False,
1234             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1235             query=filter_dict({
1236                 'platform': 'web',
1237                 's_locale': 'en_US',
1238                 'episode_id': ep_id,
1239                 'aid': aid,
1240             })) or {}
1241         subtitles = {}
1242         for sub in sub_json.get('subtitles') or []:
1243             sub_url = sub.get('url')
1244             if not sub_url:
1245                 continue
1246             sub_data = self._download_json(
1247                 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
1248                 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
1249             if not sub_data:
1250                 continue
1251             subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
1252                 'ext': 'srt',
1253                 'data': self.json2srt(sub_data)
1254             })
1255         return subtitles
1256
1257     def _get_formats(self, *, ep_id=None, aid=None):
1258         video_json = self._call_api(
1259             '/web/playurl', ep_id or aid, note='Downloading video formats',
1260             errnote='Unable to download video formats', query=filter_dict({
1261                 'platform': 'web',
1262                 'ep_id': ep_id,
1263                 'aid': aid,
1264             }))
1265         video_json = video_json['playurl']
1266         formats = []
1267         for vid in video_json.get('video') or []:
1268             video_res = vid.get('video_resource') or {}
1269             video_info = vid.get('stream_info') or {}
1270             if not video_res.get('url'):
1271                 continue
1272             formats.append({
1273                 'url': video_res['url'],
1274                 'ext': 'mp4',
1275                 'format_note': video_info.get('desc_words'),
1276                 'width': video_res.get('width'),
1277                 'height': video_res.get('height'),
1278                 'vbr': video_res.get('bandwidth'),
1279                 'acodec': 'none',
1280                 'vcodec': video_res.get('codecs'),
1281                 'filesize': video_res.get('size'),
1282             })
1283         for aud in video_json.get('audio_resource') or []:
1284             if not aud.get('url'):
1285                 continue
1286             formats.append({
1287                 'url': aud['url'],
1288                 'ext': 'mp4',
1289                 'abr': aud.get('bandwidth'),
1290                 'acodec': aud.get('codecs'),
1291                 'vcodec': 'none',
1292                 'filesize': aud.get('size'),
1293             })
1294
1295         return formats
1296
1297     def _parse_video_metadata(self, video_data):
1298         return {
1299             'title': video_data.get('title_display') or video_data.get('title'),
1300             'thumbnail': video_data.get('cover'),
1301             'episode_number': int_or_none(self._search_regex(
1302                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1303         }
1304
1305     def _perform_login(self, username, password):
1306         if not Cryptodome.RSA:
1307             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1308
1309         key_data = self._download_json(
1310             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1311             note='Downloading login key', errnote='Unable to download login key')['data']
1312
1313         public_key = Cryptodome.RSA.importKey(key_data['key'])
1314         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1315         login_post = self._download_json(
1316             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1317                 'username': username,
1318                 'password': base64.b64encode(password_hash).decode('ascii'),
1319                 'keep_me': 'true',
1320                 's_locale': 'en_US',
1321                 'isTrusted': 'true'
1322             }), note='Logging in', errnote='Unable to log in')
1323         if login_post.get('code'):
1324             if login_post.get('message'):
1325                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1326             else:
1327                 raise ExtractorError('Unable to log in')
1328
1329
1330 class BiliIntlIE(BiliIntlBaseIE):
1331     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1332     _TESTS = [{
1333         # Bstation page
1334         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1335         'info_dict': {
1336             'id': '341736',
1337             'ext': 'mp4',
1338             'title': 'E2 - The First Night',
1339             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1340             'episode_number': 2,
1341             'upload_date': '20201009',
1342             'episode': 'Episode 2',
1343             'timestamp': 1602259500,
1344             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1345             'chapters': [{
1346                 'start_time': 0,
1347                 'end_time': 76.242,
1348                 'title': '<Untitled Chapter 1>'
1349             }, {
1350                 'start_time': 76.242,
1351                 'end_time': 161.161,
1352                 'title': 'Intro'
1353             }, {
1354                 'start_time': 1325.742,
1355                 'end_time': 1403.903,
1356                 'title': 'Outro'
1357             }],
1358         }
1359     }, {
1360         # Non-Bstation page
1361         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1362         'info_dict': {
1363             'id': '11005006',
1364             'ext': 'mp4',
1365             'title': 'E3 - Who?',
1366             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1367             'episode_number': 3,
1368             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1369             'episode': 'Episode 3',
1370             'upload_date': '20211219',
1371             'timestamp': 1639928700,
1372             'chapters': [{
1373                 'start_time': 0,
1374                 'end_time': 88.0,
1375                 'title': '<Untitled Chapter 1>'
1376             }, {
1377                 'start_time': 88.0,
1378                 'end_time': 156.0,
1379                 'title': 'Intro'
1380             }, {
1381                 'start_time': 1173.0,
1382                 'end_time': 1259.535,
1383                 'title': 'Outro'
1384             }],
1385         }
1386     }, {
1387         # Subtitle with empty content
1388         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1389         'info_dict': {
1390             'id': '10131790',
1391             'ext': 'mp4',
1392             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1393             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1394             'episode_number': 140,
1395         },
1396         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1397     }, {
1398         'url': 'https://www.bilibili.tv/en/video/2041863208',
1399         'info_dict': {
1400             'id': '2041863208',
1401             'ext': 'mp4',
1402             'timestamp': 1670874843,
1403             'description': 'Scheduled for April 2023.\nStudio: ufotable',
1404             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1405             'upload_date': '20221212',
1406             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1407         },
1408     }, {
1409         # episode comment extraction
1410         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1411         'info_dict': {
1412             'id': '340317',
1413             'ext': 'mp4',
1414             'timestamp': 1604057820,
1415             'upload_date': '20201030',
1416             'episode_number': 5,
1417             'title': 'E5 - My Own Steel',
1418             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1419             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1420             'episode': 'Episode 5',
1421             'comment_count': int,
1422             'chapters': [{
1423                 'start_time': 0,
1424                 'end_time': 61.0,
1425                 'title': '<Untitled Chapter 1>'
1426             }, {
1427                 'start_time': 61.0,
1428                 'end_time': 134.0,
1429                 'title': 'Intro'
1430             }, {
1431                 'start_time': 1290.0,
1432                 'end_time': 1379.0,
1433                 'title': 'Outro'
1434             }],
1435         },
1436         'params': {
1437             'getcomments': True
1438         }
1439     }, {
1440         # user generated content comment extraction
1441         'url': 'https://www.bilibili.tv/en/video/2045730385',
1442         'info_dict': {
1443             'id': '2045730385',
1444             'ext': 'mp4',
1445             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1446             'timestamp': 1667891924,
1447             'upload_date': '20221108',
1448             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1449             'comment_count': int,
1450             'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1451         },
1452         'params': {
1453             'getcomments': True
1454         }
1455     }, {
1456         # episode id without intro and outro
1457         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1458         'info_dict': {
1459             'id': '11246489',
1460             'ext': 'mp4',
1461             'title': 'E1 - Operation \'Strix\' <Owl>',
1462             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1463             'timestamp': 1649516400,
1464             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1465             'episode': 'Episode 1',
1466             'episode_number': 1,
1467             'upload_date': '20220409',
1468         },
1469     }, {
1470         'url': 'https://www.biliintl.com/en/play/34613/341736',
1471         'only_matching': True,
1472     }, {
1473         # User-generated content (as opposed to a series licensed from a studio)
1474         'url': 'https://bilibili.tv/en/video/2019955076',
1475         'only_matching': True,
1476     }, {
1477         # No language in URL
1478         'url': 'https://www.bilibili.tv/video/2019955076',
1479         'only_matching': True,
1480     }, {
1481         # Uppercase language in URL
1482         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1483         'only_matching': True,
1484     }]
1485
1486     def _make_url(video_id, series_id=None):
1487         if series_id:
1488             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1489         return f'https://www.bilibili.tv/en/video/{video_id}'
1490
1491     def _extract_video_metadata(self, url, video_id, season_id):
1492         url, smuggled_data = unsmuggle_url(url, {})
1493         if smuggled_data.get('title'):
1494             return smuggled_data
1495
1496         webpage = self._download_webpage(url, video_id)
1497         # Bstation layout
1498         initial_data = (
1499             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1500             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1501         video_data = traverse_obj(
1502             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1503
1504         if season_id and not video_data:
1505             # Non-Bstation layout, read through episode list
1506             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1507             video_data = traverse_obj(season_json, (
1508                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1509             ), expected_type=dict, get_all=False)
1510
1511         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1512         return merge_dicts(
1513             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
1514                 'title': self._html_search_meta('og:title', webpage),
1515                 'description': self._html_search_meta('og:description', webpage)
1516             })
1517
1518     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1519         comment_api_raw_data = self._download_json(
1520             'https://api.bilibili.tv/reply/web/detail', display_id,
1521             note=f'Downloading reply comment of {root_id} - {next_id}',
1522             query={
1523                 'platform': 'web',
1524                 'ps': 20,  # comment's reply per page (default: 3)
1525                 'root': root_id,
1526                 'next': next_id,
1527             })
1528
1529         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1530             yield {
1531                 'author': traverse_obj(replies, ('member', 'name')),
1532                 'author_id': traverse_obj(replies, ('member', 'mid')),
1533                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1534                 'text': traverse_obj(replies, ('content', 'message')),
1535                 'id': replies.get('rpid'),
1536                 'like_count': int_or_none(replies.get('like_count')),
1537                 'parent': replies.get('parent'),
1538                 'timestamp': unified_timestamp(replies.get('ctime_text'))
1539             }
1540
1541         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1542             yield from self._get_comments_reply(
1543                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1544
1545     def _get_comments(self, video_id, ep_id):
1546         for i in itertools.count(0):
1547             comment_api_raw_data = self._download_json(
1548                 'https://api.bilibili.tv/reply/web/root', video_id,
1549                 note=f'Downloading comment page {i + 1}',
1550                 query={
1551                     'platform': 'web',
1552                     'pn': i,  # page number
1553                     'ps': 20,  # comment per page (default: 20)
1554                     'oid': video_id,
1555                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
1556                     'sort_type': 1,  # 1: best, 2: recent
1557                 })
1558
1559             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1560                 yield {
1561                     'author': traverse_obj(replies, ('member', 'name')),
1562                     'author_id': traverse_obj(replies, ('member', 'mid')),
1563                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1564                     'text': traverse_obj(replies, ('content', 'message')),
1565                     'id': replies.get('rpid'),
1566                     'like_count': int_or_none(replies.get('like_count')),
1567                     'timestamp': unified_timestamp(replies.get('ctime_text')),
1568                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1569                 }
1570                 if replies.get('count'):
1571                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1572
1573             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1574                 break
1575
1576     def _real_extract(self, url):
1577         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1578         video_id = ep_id or aid
1579         chapters = None
1580
1581         if ep_id:
1582             intro_ending_json = self._call_api(
1583                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1584                 video_id, fatal=False) or {}
1585             if intro_ending_json.get('skip'):
1586                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1587                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1588                 chapters = [{
1589                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1590                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1591                     'title': 'Intro'
1592                 }, {
1593                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1594                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1595                     'title': 'Outro'
1596                 }]
1597
1598         return {
1599             'id': video_id,
1600             **self._extract_video_metadata(url, video_id, season_id),
1601             'formats': self._get_formats(ep_id=ep_id, aid=aid),
1602             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
1603             'chapters': chapters,
1604             '__post_extractor': self.extract_comments(video_id, ep_id)
1605         }
1606
1607
1608 class BiliIntlSeriesIE(BiliIntlBaseIE):
1609     IE_NAME = 'biliIntl:series'
1610     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
1611     _TESTS = [{
1612         'url': 'https://www.bilibili.tv/en/play/34613',
1613         'playlist_mincount': 15,
1614         'info_dict': {
1615             'id': '34613',
1616             'title': 'TONIKAWA: Over the Moon For You',
1617             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1618             'categories': ['Slice of life', 'Comedy', 'Romance'],
1619             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1620             'view_count': int,
1621         },
1622         'params': {
1623             'skip_download': True,
1624         },
1625     }, {
1626         'url': 'https://www.bilibili.tv/en/media/1048837',
1627         'info_dict': {
1628             'id': '1048837',
1629             'title': 'SPY×FAMILY',
1630             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1631             'categories': ['Adventure', 'Action', 'Comedy'],
1632             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1633             'view_count': int,
1634         },
1635         'playlist_mincount': 25,
1636     }, {
1637         'url': 'https://www.biliintl.com/en/play/34613',
1638         'only_matching': True,
1639     }, {
1640         'url': 'https://www.biliintl.com/EN/play/34613',
1641         'only_matching': True,
1642     }]
1643
1644     def _entries(self, series_id):
1645         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
1646         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1647             episode_id = str(episode['episode_id'])
1648             yield self.url_result(smuggle_url(
1649                 BiliIntlIE._make_url(episode_id, series_id),
1650                 self._parse_video_metadata(episode)
1651             ), BiliIntlIE, episode_id)
1652
1653     def _real_extract(self, url):
1654         series_id = self._match_id(url)
1655         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1656         return self.playlist_result(
1657             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1658             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1659             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
1660
1661
1662 class BiliLiveIE(InfoExtractor):
1663     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
1664
1665     _TESTS = [{
1666         'url': 'https://live.bilibili.com/196',
1667         'info_dict': {
1668             'id': '33989',
1669             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
1670             'ext': 'flv',
1671             'title': "太空狼人杀联动，不被爆杀就算赢",
1672             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1673             'timestamp': 1650802769,
1674         },
1675         'skip': 'not live'
1676     }, {
1677         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1678         'only_matching': True
1679     }, {
1680         'url': 'https://live.bilibili.com/blanc/196',
1681         'only_matching': True
1682     }]
1683
1684     _FORMATS = {
1685         80: {'format_id': 'low', 'format_note': '流畅'},
1686         150: {'format_id': 'high_res', 'format_note': '高清'},
1687         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1688         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1689         10000: {'format_id': 'source', 'format_note': '原画'},
1690         20000: {'format_id': '4K', 'format_note': '4K'},
1691         30000: {'format_id': 'dolby', 'format_note': '杜比'},
1692     }
1693
1694     _quality = staticmethod(qualities(list(_FORMATS)))
1695
1696     def _call_api(self, path, room_id, query):
1697         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1698         if api_result.get('code') != 0:
1699             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1700         return api_result.get('data') or {}
1701
1702     def _parse_formats(self, qn, fmt):
1703         for codec in fmt.get('codec') or []:
1704             if codec.get('current_qn') != qn:
1705                 continue
1706             for url_info in codec['url_info']:
1707                 yield {
1708                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1709                     'ext': fmt.get('format_name'),
1710                     'vcodec': codec.get('codec_name'),
1711                     'quality': self._quality(qn),
1712                     **self._FORMATS[qn],
1713                 }
1714
1715     def _real_extract(self, url):
1716         room_id = self._match_id(url)
1717         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1718         if room_data.get('live_status') == 0:
1719             raise ExtractorError('Streamer is not live', expected=True)
1720
1721         formats = []
1722         for qn in self._FORMATS.keys():
1723             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1724                 'room_id': room_id,
1725                 'qn': qn,
1726                 'codec': '0,1',
1727                 'format': '0,2',
1728                 'mask': '0',
1729                 'no_playurl': '0',
1730                 'platform': 'web',
1731                 'protocol': '0,1',
1732             })
1733             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1734                 formats.extend(self._parse_formats(qn, fmt))
1735
1736         return {
1737             'id': room_id,
1738             'title': room_data.get('title'),
1739             'description': room_data.get('description'),
1740             'thumbnail': room_data.get('user_cover'),
1741             'timestamp': stream_data.get('live_time'),
1742             'formats': formats,
1743             'is_live': True,
1744             'http_headers': {
1745                 'Referer': url,
1746             },
1747         }