yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     clean_html,
  22     determine_ext,
  23     filter_dict,
  24     float_or_none,
  25     format_field,
  26     get_element_by_class,
  27     int_or_none,
  28     join_nonempty,
  29     make_archive_id,
  30     merge_dicts,
  31     mimetype2ext,
  32     parse_count,
  33     parse_qs,
  34     qualities,
  35     smuggle_url,
  36     srt_subtitles_timecode,
  37     str_or_none,
  38     traverse_obj,
  39     try_call,
  40     unified_timestamp,
  41     unsmuggle_url,
  42     url_or_none,
  43     urlencode_postdata,
  44     variadic,
  45 )
  46
  47
  48 class BilibiliBaseIE(InfoExtractor):
  49     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  50
  51     def extract_formats(self, play_info):
  52         format_names = {
  53             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  54             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  55         }
  56
  57         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  58         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  59         if flac_audio:
  60             audios.append(flac_audio)
  61         formats = [{
  62             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  63             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  64             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  65             'vcodec': 'none',
  66             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  67             'filesize': int_or_none(audio.get('size')),
  68             'format_id': str_or_none(audio.get('id')),
  69         } for audio in audios]
  70
  71         formats.extend({
  72             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  73             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  74             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  75             'width': int_or_none(video.get('width')),
  76             'height': int_or_none(video.get('height')),
  77             'vcodec': video.get('codecs'),
  78             'acodec': 'none' if audios else None,
  79             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  80             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  81             'filesize': int_or_none(video.get('size')),
  82             'quality': int_or_none(video.get('id')),
  83             'format_id': traverse_obj(
  84                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  85                 ('id', {str_or_none}), get_all=False),
  86             'format': format_names.get(video.get('id')),
  87         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  88
  89         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  90         if missing_formats:
  91             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  92                            f'you have to login or become premium member to download them. {self._login_hint()}')
  93
  94         return formats
  95
  96     def _download_playinfo(self, video_id, cid, headers=None):
  97         return self._download_json(
  98             'https://api.bilibili.com/x/player/playurl', video_id,
  99             query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 100             note=f'Downloading video formats for cid {cid}', headers=headers)['data']
 101
 102     def json2srt(self, json_data):
 103         srt_data = ''
 104         for idx, line in enumerate(json_data.get('body') or []):
 105             srt_data += (f'{idx + 1}\n'
 106                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 107                          f'{line["content"]}\n\n')
 108         return srt_data
 109
 110     def _get_subtitles(self, video_id, cid, aid=None):
 111         subtitles = {
 112             'danmaku': [{
 113                 'ext': 'xml',
 114                 'url': f'https://comment.bilibili.com/{cid}.xml',
 115             }],
 116         }
 117
 118         subtitle_info = traverse_obj(self._download_json(
 119             'https://api.bilibili.com/x/player/v2', video_id,
 120             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 121             note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
 122         subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
 123         if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
 124             if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):  # no login session cookie
 125                 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
 126         for s in subs_list:
 127             subtitles.setdefault(s['lan'], []).append({
 128                 'ext': 'srt',
 129                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
 130             })
 131         return subtitles
 132
 133     def _get_chapters(self, aid, cid):
 134         chapters = aid and cid and self._download_json(
 135             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 136             note='Extracting chapters', fatal=False)
 137         return traverse_obj(chapters, ('data', 'view_points', ..., {
 138             'title': 'content',
 139             'start_time': 'from',
 140             'end_time': 'to',
 141         })) or None
 142
 143     def _get_comments(self, aid):
 144         for idx in itertools.count(1):
 145             replies = traverse_obj(
 146                 self._download_json(
 147                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 148                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 149                 ('data', 'replies'))
 150             if not replies:
 151                 return
 152             for children in map(self._get_all_children, replies):
 153                 yield from children
 154
 155     def _get_all_children(self, reply):
 156         yield {
 157             'author': traverse_obj(reply, ('member', 'uname')),
 158             'author_id': traverse_obj(reply, ('member', 'mid')),
 159             'id': reply.get('rpid'),
 160             'text': traverse_obj(reply, ('content', 'message')),
 161             'timestamp': reply.get('ctime'),
 162             'parent': reply.get('parent') or 'root',
 163         }
 164         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 165             yield from children
 166
 167     def _get_episodes_from_season(self, ss_id, url):
 168         season_info = self._download_json(
 169             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 170             note='Downloading season info', query={'season_id': ss_id},
 171             headers={'Referer': url, **self.geo_verification_headers()})
 172
 173         for entry in traverse_obj(season_info, (
 174                 'result', 'main_section', 'episodes',
 175                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 176             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 177
 178     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 179         cid_edges = cid_edges or {}
 180         division_data = self._download_json(
 181             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 182             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 183             note=f'Extracting divisions from edge {edge_id}')
 184         edges.setdefault(edge_id, {}).update(
 185             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 186                 'title': ('title', {str}),
 187                 'cid': ('cid', {int_or_none}),
 188             }), get_all=False))
 189
 190         edges[edge_id].update(traverse_obj(division_data, ('data', {
 191             'title': ('title', {str}),
 192             'choices': ('edges', 'questions', ..., 'choices', ..., {
 193                 'edge_id': ('id', {int_or_none}),
 194                 'cid': ('cid', {int_or_none}),
 195                 'text': ('option', {str}),
 196             }),
 197         })))
 198         # use dict to combine edges that use the same video section (same cid)
 199         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 200         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 201             if choice['edge_id'] not in edges:
 202                 edges[choice['edge_id']] = {'cid': choice['cid']}
 203                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 204         return cid_edges
 205
 206     def _get_interactive_entries(self, video_id, cid, metainfo):
 207         graph_version = traverse_obj(
 208             self._download_json(
 209                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 210                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
 211             ('data', 'interaction', 'graph_version', {int_or_none}))
 212         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 213         for cid, edges in cid_edges.items():
 214             play_info = self._download_playinfo(video_id, cid)
 215             yield {
 216                 **metainfo,
 217                 'id': f'{video_id}_{cid}',
 218                 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
 219                 'formats': self.extract_formats(play_info),
 220                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 221                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 222                 'subtitles': self.extract_subtitles(video_id, cid),
 223             }
 224
 225
 226 class BiliBiliIE(BilibiliBaseIE):
 227     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 228
 229     _TESTS = [{
 230         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 231         'info_dict': {
 232             'id': 'BV13x41117TL',
 233             'title': '阿滴英文｜英文歌分享#6 "Closer',
 234             'ext': 'mp4',
 235             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 236             'uploader_id': '65880958',
 237             'uploader': '阿滴英文',
 238             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 239             'duration': 554.117,
 240             'tags': list,
 241             'comment_count': int,
 242             'upload_date': '20170301',
 243             'timestamp': 1488353834,
 244             'like_count': int,
 245             'view_count': int,
 246         },
 247     }, {
 248         'note': 'old av URL version',
 249         'url': 'http://www.bilibili.com/video/av1074402/',
 250         'info_dict': {
 251             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 252             'ext': 'mp4',
 253             'uploader': '菊子桑',
 254             'uploader_id': '156160',
 255             'id': 'BV11x411K7CN',
 256             'title': '【金坷垃】金泡沫',
 257             'duration': 308.36,
 258             'upload_date': '20140420',
 259             'timestamp': 1397983878,
 260             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 261             'like_count': int,
 262             'comment_count': int,
 263             'view_count': int,
 264             'tags': list,
 265         },
 266         'params': {'skip_download': True},
 267     }, {
 268         'note': 'Anthology',
 269         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 270         'info_dict': {
 271             'id': 'BV1bK411W797',
 272             'title': '物语中的人物是如何吐槽自己的OP的',
 273         },
 274         'playlist_count': 18,
 275         'playlist': [{
 276             'info_dict': {
 277                 'id': 'BV1bK411W797_p1',
 278                 'ext': 'mp4',
 279                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 280                 'tags': 'count:10',
 281                 'timestamp': 1589601697,
 282                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 283                 'uploader': '打牌还是打桩',
 284                 'uploader_id': '150259984',
 285                 'like_count': int,
 286                 'comment_count': int,
 287                 'upload_date': '20200516',
 288                 'view_count': int,
 289                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 290                 'duration': 90.314,
 291             },
 292         }],
 293     }, {
 294         'note': 'Specific page of Anthology',
 295         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 296         'info_dict': {
 297             'id': 'BV1bK411W797_p1',
 298             'ext': 'mp4',
 299             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 300             'tags': 'count:10',
 301             'timestamp': 1589601697,
 302             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 303             'uploader': '打牌还是打桩',
 304             'uploader_id': '150259984',
 305             'like_count': int,
 306             'comment_count': int,
 307             'upload_date': '20200516',
 308             'view_count': int,
 309             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 310             'duration': 90.314,
 311         },
 312     }, {
 313         'note': 'video has subtitles',
 314         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 315         'info_dict': {
 316             'id': 'BV12N4y1M7rh',
 317             'ext': 'mp4',
 318             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 319             'tags': list,
 320             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 321             'duration': 313.557,
 322             'upload_date': '20220709',
 323             'uploader': '小夫太渴',
 324             'timestamp': 1657347907,
 325             'uploader_id': '1326814124',
 326             'comment_count': int,
 327             'view_count': int,
 328             'like_count': int,
 329             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 330             'subtitles': 'count:2',
 331         },
 332         'params': {'listsubtitles': True},
 333     }, {
 334         'url': 'https://www.bilibili.com/video/av8903802/',
 335         'info_dict': {
 336             'id': 'BV13x41117TL',
 337             'ext': 'mp4',
 338             'title': '阿滴英文｜英文歌分享#6 "Closer',
 339             'upload_date': '20170301',
 340             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 341             'timestamp': 1488353834,
 342             'uploader_id': '65880958',
 343             'uploader': '阿滴英文',
 344             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 345             'duration': 554.117,
 346             'tags': list,
 347             'comment_count': int,
 348             'view_count': int,
 349             'like_count': int,
 350         },
 351         'params': {
 352             'skip_download': True,
 353         },
 354     }, {
 355         'note': 'video has chapter',
 356         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 357         'info_dict': {
 358             'id': 'BV1vL411G7N7',
 359             'ext': 'mp4',
 360             'title': '如何为你的B站视频添加进度条分段',
 361             'timestamp': 1634554558,
 362             'upload_date': '20211018',
 363             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 364             'tags': list,
 365             'uploader': '爱喝咖啡的当麻',
 366             'duration': 669.482,
 367             'uploader_id': '1680903',
 368             'chapters': 'count:6',
 369             'comment_count': int,
 370             'view_count': int,
 371             'like_count': int,
 372             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 373         },
 374         'params': {'skip_download': True},
 375     }, {
 376         'note': 'video redirects to festival page',
 377         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 378         'info_dict': {
 379             'id': 'BV1wP4y1P72h',
 380             'ext': 'mp4',
 381             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 382             'timestamp': 1643947497,
 383             'upload_date': '20220204',
 384             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 385             'uploader': '叨叨冯聊音乐',
 386             'duration': 246.719,
 387             'uploader_id': '528182630',
 388             'view_count': int,
 389             'like_count': int,
 390             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 391         },
 392         'params': {'skip_download': True},
 393     }, {
 394         'note': 'newer festival video',
 395         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 396         'info_dict': {
 397             'id': 'BV1ay4y1d77f',
 398             'ext': 'mp4',
 399             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 400             'timestamp': 1674273600,
 401             'upload_date': '20230121',
 402             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 403             'uploader': '果蝇轰',
 404             'duration': 1111.722,
 405             'uploader_id': '8469526',
 406             'view_count': int,
 407             'like_count': int,
 408             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 409         },
 410         'params': {'skip_download': True},
 411     }, {
 412         'note': 'interactive/split-path video',
 413         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 414         'info_dict': {
 415             'id': 'BV1af4y1H7ga',
 416             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 417             'timestamp': 1630500414,
 418             'upload_date': '20210901',
 419             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 420             'tags': list,
 421             'uploader': '钉宫妮妮Ninico',
 422             'duration': 1503,
 423             'uploader_id': '8881297',
 424             'comment_count': int,
 425             'view_count': int,
 426             'like_count': int,
 427             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 428         },
 429         'playlist_count': 33,
 430         'playlist': [{
 431             'info_dict': {
 432                 'id': 'BV1af4y1H7ga_400950101',
 433                 'ext': 'mp4',
 434                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 435                 'timestamp': 1630500414,
 436                 'upload_date': '20210901',
 437                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 438                 'tags': list,
 439                 'uploader': '钉宫妮妮Ninico',
 440                 'duration': 11.605,
 441                 'uploader_id': '8881297',
 442                 'comment_count': int,
 443                 'view_count': int,
 444                 'like_count': int,
 445                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 446             },
 447         }],
 448     }, {
 449         'note': '301 redirect to bangumi link',
 450         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 451         'info_dict': {
 452             'id': '288525',
 453             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 454             'ext': 'mp4',
 455             'series': '我和我的祖国',
 456             'series_id': '4780',
 457             'season': '幕后纪实',
 458             'season_id': '28609',
 459             'season_number': 1,
 460             'episode': '钱学森弹道和乘波体飞行器是什么？',
 461             'episode_id': '288525',
 462             'episode_number': 105,
 463             'duration': 1183.957,
 464             'timestamp': 1571648124,
 465             'upload_date': '20191021',
 466             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 467         },
 468     }, {
 469         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 470         'info_dict': {
 471             'id': 'BV1jL41167ZG',
 472             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 473             'ext': 'mp4',
 474         },
 475         'skip': 'supporter-only video',
 476     }, {
 477         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 478         'info_dict': {
 479             'id': 'BV1Ks411f7aQ',
 480             'title': '【BD1080P】狼与香辛料I【华盟】',
 481             'ext': 'mp4',
 482         },
 483         'skip': 'login required',
 484     }, {
 485         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 486         'info_dict': {
 487             'id': 'BV1GJ411x7h7',
 488             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 489             'ext': 'mp4',
 490         },
 491         'skip': 'geo-restricted',
 492     }]
 493
 494     def _real_extract(self, url):
 495         video_id = self._match_id(url)
 496         headers = self.geo_verification_headers()
 497         webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
 498         if not self._match_valid_url(urlh.url):
 499             return self.url_result(urlh.url)
 500
 501         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 502
 503         is_festival = 'videoData' not in initial_state
 504         if is_festival:
 505             video_data = initial_state['videoInfo']
 506         else:
 507             play_info_obj = self._search_json(
 508                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 509             if not play_info_obj:
 510                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 511                     self.raise_login_required()
 512                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 513                     raise ExtractorError(
 514                         'This video may be deleted or geo-restricted. '
 515                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 516             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 517             if not play_info:
 518                 if traverse_obj(play_info_obj, 'code') == 87007:
 519                     toast = get_element_by_class('tips-toast', webpage) or ''
 520                     msg = clean_html(
 521                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 522                         + (get_element_by_class('level', toast) or ''))
 523                     raise ExtractorError(
 524                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 525                 raise ExtractorError('Failed to extract play info')
 526             video_data = initial_state['videoData']
 527
 528         video_id, title = video_data['bvid'], video_data.get('title')
 529
 530         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 531         page_list_json = not is_festival and traverse_obj(
 532             self._download_json(
 533                 'https://api.bilibili.com/x/player/pagelist', video_id,
 534                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 535                 note='Extracting videos in anthology', headers=headers),
 536             'data', expected_type=list) or []
 537         is_anthology = len(page_list_json) > 1
 538
 539         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 540         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 541             return self.playlist_from_matches(
 542                 page_list_json, video_id, title, ie=BiliBiliIE,
 543                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 544
 545         if is_anthology:
 546             part_id = part_id or 1
 547             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 548
 549         aid = video_data.get('aid')
 550         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 551
 552         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 553
 554         festival_info = {}
 555         if is_festival:
 556             play_info = self._download_playinfo(video_id, cid, headers=headers)
 557
 558             festival_info = traverse_obj(initial_state, {
 559                 'uploader': ('videoInfo', 'upName'),
 560                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 561                 'like_count': ('videoStatus', 'like', {int_or_none}),
 562                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 563             }, get_all=False)
 564
 565         metainfo = {
 566             **traverse_obj(initial_state, {
 567                 'uploader': ('upData', 'name'),
 568                 'uploader_id': ('upData', 'mid', {str_or_none}),
 569                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 570                 'tags': ('tags', ..., 'tag_name'),
 571                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 572             }),
 573             **festival_info,
 574             **traverse_obj(video_data, {
 575                 'description': 'desc',
 576                 'timestamp': ('pubdate', {int_or_none}),
 577                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 578                 'comment_count': ('stat', 'reply', {int_or_none}),
 579             }, get_all=False),
 580             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 581             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 582             'title': title,
 583             'http_headers': {'Referer': url},
 584         }
 585
 586         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 587         if is_interactive:
 588             return self.playlist_result(
 589                 self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
 590                 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 591                 __post_extractor=self.extract_comments(aid))
 592         else:
 593             return {
 594                 **metainfo,
 595                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 596                 'chapters': self._get_chapters(aid, cid),
 597                 'subtitles': self.extract_subtitles(video_id, cid),
 598                 'formats': self.extract_formats(play_info),
 599                 '__post_extractor': self.extract_comments(aid),
 600             }
 601
 602
 603 class BiliBiliBangumiIE(BilibiliBaseIE):
 604     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 605
 606     _TESTS = [{
 607         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 608         'info_dict': {
 609             'id': '21495',
 610             'ext': 'mp4',
 611             'series': '悠久之翼',
 612             'series_id': '774',
 613             'season': '第二季',
 614             'season_id': '1182',
 615             'season_number': 2,
 616             'episode': 'forever／ef',
 617             'episode_id': '21495',
 618             'episode_number': 12,
 619             'title': '12 forever／ef',
 620             'duration': 1420.791,
 621             'timestamp': 1320412200,
 622             'upload_date': '20111104',
 623             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 624         },
 625     }, {
 626         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 627         'info_dict': {
 628             'id': '267851',
 629             'ext': 'mp4',
 630             'series': '鬼灭之刃',
 631             'series_id': '4358',
 632             'season': '立志篇',
 633             'season_id': '26801',
 634             'season_number': 1,
 635             'episode': '残酷',
 636             'episode_id': '267851',
 637             'episode_number': 1,
 638             'title': '1 残酷',
 639             'duration': 1425.256,
 640             'timestamp': 1554566400,
 641             'upload_date': '20190406',
 642             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 643         },
 644         'skip': 'Geo-restricted',
 645     }, {
 646         'note': 'a making-of which falls outside main section',
 647         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 648         'info_dict': {
 649             'id': '345120',
 650             'ext': 'mp4',
 651             'series': '鬼灭之刃',
 652             'series_id': '4358',
 653             'season': '立志篇',
 654             'season_id': '26801',
 655             'season_number': 1,
 656             'episode': '炭治郎篇',
 657             'episode_id': '345120',
 658             'episode_number': 27,
 659             'title': '#1 炭治郎篇',
 660             'duration': 1922.129,
 661             'timestamp': 1602853860,
 662             'upload_date': '20201016',
 663             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 664         },
 665     }]
 666
 667     def _real_extract(self, url):
 668         episode_id = self._match_id(url)
 669         headers = self.geo_verification_headers()
 670         webpage = self._download_webpage(url, episode_id, headers=headers)
 671
 672         if '您所在的地区无法观看本片' in webpage:
 673             raise GeoRestrictedError('This video is restricted')
 674         elif '正在观看预览，大会员免费看全片' in webpage:
 675             self.raise_login_required('This video is for premium members only')
 676
 677         headers['Referer'] = url
 678         play_info = self._download_json(
 679             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 680             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 681             headers=headers)
 682         premium_only = play_info.get('code') == -10403
 683         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 684
 685         formats = self.extract_formats(play_info)
 686         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 687             self.raise_login_required('This video is for premium members only')
 688
 689         bangumi_info = self._download_json(
 690             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 691             query={'ep_id': episode_id}, headers=headers)['result']
 692
 693         episode_number, episode_info = next((
 694             (idx, ep) for idx, ep in enumerate(traverse_obj(
 695                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 696             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 697
 698         season_id = bangumi_info.get('season_id')
 699         season_number, season_title = season_id and next((
 700             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 701                 traverse_obj(bangumi_info, ('seasons', ...)))
 702             if e.get('season_id') == season_id
 703         ), (None, None))
 704
 705         aid = episode_info.get('aid')
 706
 707         return {
 708             'id': episode_id,
 709             'formats': formats,
 710             **traverse_obj(bangumi_info, {
 711                 'series': ('series', 'series_title', {str}),
 712                 'series_id': ('series', 'series_id', {str_or_none}),
 713                 'thumbnail': ('square_cover', {url_or_none}),
 714             }),
 715             **traverse_obj(episode_info, {
 716                 'episode': ('long_title', {str}),
 717                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 718                 'timestamp': ('pub_time', {int_or_none}),
 719                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 720             }),
 721             'episode_id': episode_id,
 722             'season': str_or_none(season_title),
 723             'season_id': str_or_none(season_id),
 724             'season_number': season_number,
 725             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 726             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 727             '__post_extractor': self.extract_comments(aid),
 728             'http_headers': {'Referer': url},
 729         }
 730
 731
 732 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 733     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 734     _TESTS = [{
 735         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 736         'info_dict': {
 737             'id': '24097891',
 738             'title': 'CAROLE & TUESDAY',
 739             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 740         },
 741         'playlist_mincount': 25,
 742     }, {
 743         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 744         'info_dict': {
 745             'id': '1565',
 746             'title': '攻壳机动队 S.A.C. 2nd GIG',
 747             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 748         },
 749         'playlist_count': 26,
 750         'playlist': [{
 751             'info_dict': {
 752                 'id': '68540',
 753                 'ext': 'mp4',
 754                 'series': '攻壳机动队',
 755                 'series_id': '1077',
 756                 'season': '第二季',
 757                 'season_id': '1565',
 758                 'season_number': 2,
 759                 'episode': '再启动 REEMBODY',
 760                 'episode_id': '68540',
 761                 'episode_number': 1,
 762                 'title': '1 再启动 REEMBODY',
 763                 'duration': 1525.777,
 764                 'timestamp': 1425074413,
 765                 'upload_date': '20150227',
 766                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 767             },
 768         }],
 769     }]
 770
 771     def _real_extract(self, url):
 772         media_id = self._match_id(url)
 773         webpage = self._download_webpage(url, media_id)
 774
 775         initial_state = self._search_json(
 776             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 777         ss_id = initial_state['mediaInfo']['season_id']
 778
 779         return self.playlist_result(
 780             self._get_episodes_from_season(ss_id, url), media_id,
 781             **traverse_obj(initial_state, ('mediaInfo', {
 782                 'title': ('title', {str}),
 783                 'description': ('evaluate', {str}),
 784             })))
 785
 786
 787 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 788     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 789     _TESTS = [{
 790         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 791         'info_dict': {
 792             'id': '26801',
 793             'title': '鬼灭之刃',
 794             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 795         },
 796         'playlist_mincount': 26,
 797     }, {
 798         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 799         'info_dict': {
 800             'id': '2251',
 801             'title': '玲音',
 802             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 803         },
 804         'playlist_count': 13,
 805         'playlist': [{
 806             'info_dict': {
 807                 'id': '50188',
 808                 'ext': 'mp4',
 809                 'series': '玲音',
 810                 'series_id': '1526',
 811                 'season': 'TV',
 812                 'season_id': '2251',
 813                 'season_number': 1,
 814                 'episode': 'WEIRD',
 815                 'episode_id': '50188',
 816                 'episode_number': 1,
 817                 'title': '1 WEIRD',
 818                 'duration': 1436.992,
 819                 'timestamp': 1343185080,
 820                 'upload_date': '20120725',
 821                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 822             },
 823         }],
 824     }]
 825
 826     def _real_extract(self, url):
 827         ss_id = self._match_id(url)
 828         webpage = self._download_webpage(url, ss_id)
 829         metainfo = traverse_obj(
 830             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
 831             ('itemListElement', ..., {
 832                 'title': ('name', {str}),
 833                 'description': ('description', {str}),
 834             }), get_all=False)
 835
 836         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
 837
 838
 839 class BilibiliCheeseBaseIE(BilibiliBaseIE):
 840     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
 841
 842     def _extract_episode(self, season_info, ep_id):
 843         episode_info = traverse_obj(season_info, (
 844             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
 845         aid, cid = episode_info['aid'], episode_info['cid']
 846
 847         if traverse_obj(episode_info, 'ep_status') == -1:
 848             raise ExtractorError('This course episode is not yet available.', expected=True)
 849         if not traverse_obj(episode_info, 'playable'):
 850             self.raise_login_required('You need to purchase the course to download this episode')
 851
 852         play_info = self._download_json(
 853             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
 854             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
 855             headers=self._HEADERS, note='Downloading playinfo')['data']
 856
 857         return {
 858             'id': str_or_none(ep_id),
 859             'episode_id': str_or_none(ep_id),
 860             'formats': self.extract_formats(play_info),
 861             'extractor_key': BilibiliCheeseIE.ie_key(),
 862             'extractor': BilibiliCheeseIE.IE_NAME,
 863             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
 864             **traverse_obj(episode_info, {
 865                 'episode': ('title', {str}),
 866                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
 867                 'alt_title': ('subtitle', {str}),
 868                 'duration': ('duration', {int_or_none}),
 869                 'episode_number': ('index', {int_or_none}),
 870                 'thumbnail': ('cover', {url_or_none}),
 871                 'timestamp': ('release_date', {int_or_none}),
 872                 'view_count': ('play', {int_or_none}),
 873             }),
 874             **traverse_obj(season_info, {
 875                 'uploader': ('up_info', 'uname', {str}),
 876                 'uploader_id': ('up_info', 'mid', {str_or_none}),
 877             }),
 878             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
 879             '__post_extractor': self.extract_comments(aid),
 880             'http_headers': self._HEADERS,
 881         }
 882
 883     def _download_season_info(self, query_key, video_id):
 884         return self._download_json(
 885             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
 886             headers=self._HEADERS, note='Downloading season info')['data']
 887
 888
 889 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
 890     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
 891     _TESTS = [{
 892         'url': 'https://www.bilibili.com/cheese/play/ep229832',
 893         'info_dict': {
 894             'id': '229832',
 895             'ext': 'mp4',
 896             'title': '1 - 课程先导片',
 897             'alt_title': '视频课 · 3分41秒',
 898             'uploader': '马督工',
 899             'uploader_id': '316568752',
 900             'episode': '课程先导片',
 901             'episode_id': '229832',
 902             'episode_number': 1,
 903             'duration': 221,
 904             'timestamp': 1695549606,
 905             'upload_date': '20230924',
 906             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 907             'view_count': int,
 908         },
 909     }]
 910
 911     def _real_extract(self, url):
 912         ep_id = self._match_id(url)
 913         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
 914
 915
 916 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
 917     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
 918     _TESTS = [{
 919         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 920         'info_dict': {
 921             'id': '5918',
 922             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 923             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 924         },
 925         'playlist': [{
 926             'info_dict': {
 927                 'id': '229832',
 928                 'ext': 'mp4',
 929                 'title': '1 - 课程先导片',
 930                 'alt_title': '视频课 · 3分41秒',
 931                 'uploader': '马督工',
 932                 'uploader_id': '316568752',
 933                 'episode': '课程先导片',
 934                 'episode_id': '229832',
 935                 'episode_number': 1,
 936                 'duration': 221,
 937                 'timestamp': 1695549606,
 938                 'upload_date': '20230924',
 939                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 940                 'view_count': int,
 941             },
 942         }],
 943         'params': {'playlist_items': '1'},
 944     }, {
 945         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 946         'info_dict': {
 947             'id': '5918',
 948             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 949             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 950         },
 951         'playlist_mincount': 5,
 952         'skip': 'paid video in list',
 953     }]
 954
 955     def _get_cheese_entries(self, season_info):
 956         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
 957             yield self._extract_episode(season_info, ep_id)
 958
 959     def _real_extract(self, url):
 960         season_id = self._match_id(url)
 961         season_info = self._download_season_info('season_id', season_id)
 962
 963         return self.playlist_result(
 964             self._get_cheese_entries(season_info), season_id,
 965             **traverse_obj(season_info, {
 966                 'title': ('title', {str}),
 967                 'description': ('subtitle', {str}),
 968             }))
 969
 970
 971 class BilibiliSpaceBaseIE(InfoExtractor):
 972     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 973         first_page = fetch_page(0)
 974         metadata = get_metadata(first_page)
 975
 976         paged_list = InAdvancePagedList(
 977             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 978             metadata['page_count'], metadata['page_size'])
 979
 980         return metadata, paged_list
 981
 982
 983 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 984     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 985     _TESTS = [{
 986         'url': 'https://space.bilibili.com/3985676/video',
 987         'info_dict': {
 988             'id': '3985676',
 989         },
 990         'playlist_mincount': 178,
 991     }, {
 992         'url': 'https://space.bilibili.com/313580179/video',
 993         'info_dict': {
 994             'id': '313580179',
 995         },
 996         'playlist_mincount': 92,
 997     }]
 998
 999     def _extract_signature(self, playlist_id):
1000         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1001
1002         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1003         img_key = traverse_obj(
1004             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1005         sub_key = traverse_obj(
1006             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1007
1008         session_key = img_key + sub_key
1009
1010         signature_values = []
1011         for position in (
1012             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1013             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1014             57, 62, 11, 36, 20, 34, 44, 52,
1015         ):
1016             char_at_position = try_call(lambda: session_key[position])
1017             if char_at_position:
1018                 signature_values.append(char_at_position)
1019
1020         return ''.join(signature_values)[:32]
1021
1022     def _real_extract(self, url):
1023         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1024         if not is_video_url:
1025             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1026                            'To download audios, add a "/audio" to the URL')
1027
1028         signature = self._extract_signature(playlist_id)
1029
1030         def fetch_page(page_idx):
1031             query = {
1032                 'keyword': '',
1033                 'mid': playlist_id,
1034                 'order': 'pubdate',
1035                 'order_avoided': 'true',
1036                 'platform': 'web',
1037                 'pn': page_idx + 1,
1038                 'ps': 30,
1039                 'tid': 0,
1040                 'web_location': 1550101,
1041                 'wts': int(time.time()),
1042             }
1043             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1044
1045             try:
1046                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1047                                                playlist_id, note=f'Downloading page {page_idx}', query=query,
1048                                                headers={'referer': url})
1049             except ExtractorError as e:
1050                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1051                     raise ExtractorError(
1052                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1053                 raise
1054             if response['code'] in (-352, -401):
1055                 raise ExtractorError(
1056                     f'Request is blocked by server ({-response["code"]}), '
1057                     'please add cookies, wait and try later.', expected=True)
1058             return response['data']
1059
1060         def get_metadata(page_data):
1061             page_size = page_data['page']['ps']
1062             entry_count = page_data['page']['count']
1063             return {
1064                 'page_count': math.ceil(entry_count / page_size),
1065                 'page_size': page_size,
1066             }
1067
1068         def get_entries(page_data):
1069             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1070                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1071
1072         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1073         return self.playlist_result(paged_list, playlist_id)
1074
1075
1076 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1077     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1078     _TESTS = [{
1079         'url': 'https://space.bilibili.com/313580179/audio',
1080         'info_dict': {
1081             'id': '313580179',
1082         },
1083         'playlist_mincount': 1,
1084     }]
1085
1086     def _real_extract(self, url):
1087         playlist_id = self._match_id(url)
1088
1089         def fetch_page(page_idx):
1090             return self._download_json(
1091                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1092                 note=f'Downloading page {page_idx}',
1093                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1094
1095         def get_metadata(page_data):
1096             return {
1097                 'page_count': page_data['pageCount'],
1098                 'page_size': page_data['pageSize'],
1099             }
1100
1101         def get_entries(page_data):
1102             for entry in page_data.get('data', []):
1103                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1104
1105         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1106         return self.playlist_result(paged_list, playlist_id)
1107
1108
1109 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1110     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1111         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1112             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1113
1114     def _get_uploader(self, uid, playlist_id):
1115         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1116         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1117
1118     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1119         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1120         metadata.pop('page_count', None)
1121         metadata.pop('page_size', None)
1122         return metadata, page_list
1123
1124
1125 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1126     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1127     _TESTS = [{
1128         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1129         'info_dict': {
1130             'id': '2142762_57445',
1131             'title': '【完结】《底特律 变人》全结局流程解说',
1132             'description': '',
1133             'uploader': '老戴在此',
1134             'uploader_id': '2142762',
1135             'timestamp': int,
1136             'upload_date': str,
1137             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1138         },
1139         'playlist_mincount': 31,
1140     }]
1141
1142     def _real_extract(self, url):
1143         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1144         playlist_id = f'{mid}_{sid}'
1145
1146         def fetch_page(page_idx):
1147             return self._download_json(
1148                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1149                 playlist_id, note=f'Downloading page {page_idx}',
1150                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1151
1152         def get_metadata(page_data):
1153             page_size = page_data['page']['page_size']
1154             entry_count = page_data['page']['total']
1155             return {
1156                 'page_count': math.ceil(entry_count / page_size),
1157                 'page_size': page_size,
1158                 'uploader': self._get_uploader(mid, playlist_id),
1159                 **traverse_obj(page_data, {
1160                     'title': ('meta', 'name', {str}),
1161                     'description': ('meta', 'description', {str}),
1162                     'uploader_id': ('meta', 'mid', {str_or_none}),
1163                     'timestamp': ('meta', 'ptime', {int_or_none}),
1164                     'thumbnail': ('meta', 'cover', {url_or_none}),
1165                 }),
1166             }
1167
1168         def get_entries(page_data):
1169             return self._get_entries(page_data, 'archives')
1170
1171         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1172         return self.playlist_result(paged_list, playlist_id, **metadata)
1173
1174
1175 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1176     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1177     _TESTS = [{
1178         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1179         'info_dict': {
1180             'id': '1958703906_547718',
1181             'title': '直播回放',
1182             'description': '直播回放',
1183             'uploader': '靡烟miya',
1184             'uploader_id': '1958703906',
1185             'timestamp': 1637985853,
1186             'upload_date': '20211127',
1187             'modified_timestamp': int,
1188             'modified_date': str,
1189         },
1190         'playlist_mincount': 513,
1191     }]
1192
1193     def _real_extract(self, url):
1194         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1195         playlist_id = f'{mid}_{sid}'
1196         playlist_meta = traverse_obj(self._download_json(
1197             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1198         ), {
1199             'title': ('data', 'meta', 'name', {str}),
1200             'description': ('data', 'meta', 'description', {str}),
1201             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1202             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1203             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1204         })
1205
1206         def fetch_page(page_idx):
1207             return self._download_json(
1208                 'https://api.bilibili.com/x/series/archives',
1209                 playlist_id, note=f'Downloading page {page_idx}',
1210                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1211
1212         def get_metadata(page_data):
1213             page_size = page_data['page']['size']
1214             entry_count = page_data['page']['total']
1215             return {
1216                 'page_count': math.ceil(entry_count / page_size),
1217                 'page_size': page_size,
1218                 'uploader': self._get_uploader(mid, playlist_id),
1219                 **playlist_meta,
1220             }
1221
1222         def get_entries(page_data):
1223             return self._get_entries(page_data, 'archives')
1224
1225         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1226         return self.playlist_result(paged_list, playlist_id, **metadata)
1227
1228
1229 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1230     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1231     _TESTS = [{
1232         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1233         'info_dict': {
1234             'id': '1103407912',
1235             'title': '【V2】（旧）',
1236             'description': '',
1237             'uploader': '晓月春日',
1238             'uploader_id': '84912',
1239             'timestamp': 1604905176,
1240             'upload_date': '20201109',
1241             'modified_timestamp': int,
1242             'modified_date': str,
1243             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1244             'view_count': int,
1245             'like_count': int,
1246         },
1247         'playlist_mincount': 22,
1248     }, {
1249         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1250         'only_matching': True,
1251     }]
1252
1253     def _real_extract(self, url):
1254         fid = self._match_id(url)
1255
1256         list_info = self._download_json(
1257             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1258             fid, note='Downloading favlist metadata')
1259         if list_info['code'] == -403:
1260             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1261
1262         entries = self._get_entries(self._download_json(
1263             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1264             fid, note='Download favlist entries'), 'data')
1265
1266         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1267             'title': ('title', {str}),
1268             'description': ('intro', {str}),
1269             'uploader': ('upper', 'name', {str}),
1270             'uploader_id': ('upper', 'mid', {str_or_none}),
1271             'timestamp': ('ctime', {int_or_none}),
1272             'modified_timestamp': ('mtime', {int_or_none}),
1273             'thumbnail': ('cover', {url_or_none}),
1274             'view_count': ('cnt_info', 'play', {int_or_none}),
1275             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1276         })))
1277
1278
1279 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1280     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1281     _TESTS = [{
1282         'url': 'https://www.bilibili.com/watchlater/#/list',
1283         'info_dict': {'id': 'watchlater'},
1284         'playlist_mincount': 0,
1285         'skip': 'login required',
1286     }]
1287
1288     def _real_extract(self, url):
1289         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1290         watchlater_info = self._download_json(
1291             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1292         if watchlater_info['code'] == -101:
1293             self.raise_login_required(msg='You need to login to access your watchlater list')
1294         entries = self._get_entries(watchlater_info, ('data', 'list'))
1295         return self.playlist_result(entries, id=list_id, title='稍后再看')
1296
1297
1298 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1299     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1300     _TESTS = [{
1301         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1302         'info_dict': {
1303             'id': '5_547718',
1304             'title': '直播回放',
1305             'uploader': '靡烟miya',
1306             'uploader_id': '1958703906',
1307             'timestamp': 1637985853,
1308             'upload_date': '20211127',
1309         },
1310         'playlist_mincount': 513,
1311     }, {
1312         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1313         'info_dict': {
1314             'id': 'BV1DU4y1r7tz',
1315             'ext': 'mp4',
1316             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1317             'upload_date': '20220820',
1318             'description': '',
1319             'timestamp': 1661016330,
1320             'uploader_id': '1958703906',
1321             'uploader': '靡烟miya',
1322             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1323             'duration': 9552.903,
1324             'tags': list,
1325             'comment_count': int,
1326             'view_count': int,
1327             'like_count': int,
1328             '_old_archive_ids': ['bilibili 687146339_part1'],
1329         },
1330         'params': {'noplaylist': True},
1331     }, {
1332         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1333         'info_dict': {
1334             'id': '5_547718',
1335         },
1336         'playlist_mincount': 513,
1337         'skip': 'redirect url',
1338     }, {
1339         'url': 'https://www.bilibili.com/list/ml1103407912',
1340         'info_dict': {
1341             'id': '3_1103407912',
1342             'title': '【V2】（旧）',
1343             'uploader': '晓月春日',
1344             'uploader_id': '84912',
1345             'timestamp': 1604905176,
1346             'upload_date': '20201109',
1347             'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1348         },
1349         'playlist_mincount': 22,
1350     }, {
1351         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1352         'info_dict': {
1353             'id': '3_1103407912',
1354         },
1355         'playlist_mincount': 22,
1356         'skip': 'redirect url',
1357     }, {
1358         'url': 'https://www.bilibili.com/list/watchlater',
1359         'info_dict': {'id': 'watchlater'},
1360         'playlist_mincount': 0,
1361         'skip': 'login required',
1362     }, {
1363         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1364         'info_dict': {'id': 'watchlater'},
1365         'playlist_mincount': 0,
1366         'skip': 'login required',
1367     }]
1368
1369     def _extract_medialist(self, query, list_id):
1370         for page_num in itertools.count(1):
1371             page_data = self._download_json(
1372                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1373                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1374             )['data']
1375             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1376             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1377             if not page_data.get('has_more', False):
1378                 break
1379
1380     def _real_extract(self, url):
1381         list_id = self._match_id(url)
1382
1383         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1384         if not self._yes_playlist(list_id, bvid):
1385             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1386
1387         webpage = self._download_webpage(url, list_id)
1388         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1389         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1390             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1391             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1392             if error_code == -400 and list_id == 'watchlater':
1393                 self.raise_login_required('You need to login to access your watchlater playlist')
1394             elif error_code == -403:
1395                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1396             elif error_code == 11010:
1397                 raise ExtractorError('Playlist is no longer available', expected=True)
1398             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1399
1400         query = {
1401             'ps': 20,
1402             'with_current': False,
1403             **traverse_obj(initial_state, {
1404                 'type': ('playlist', 'type', {int_or_none}),
1405                 'biz_id': ('playlist', 'id', {int_or_none}),
1406                 'tid': ('tid', {int_or_none}),
1407                 'sort_field': ('sortFiled', {int_or_none}),
1408                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1409             }),
1410         }
1411         metadata = {
1412             'id': f'{query["type"]}_{query["biz_id"]}',
1413             **traverse_obj(initial_state, ('mediaListInfo', {
1414                 'title': ('title', {str}),
1415                 'uploader': ('upper', 'name', {str}),
1416                 'uploader_id': ('upper', 'mid', {str_or_none}),
1417                 'timestamp': ('ctime', {int_or_none}),
1418                 'thumbnail': ('cover', {url_or_none}),
1419             })),
1420         }
1421         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1422
1423
1424 class BilibiliCategoryIE(InfoExtractor):
1425     IE_NAME = 'Bilibili category extractor'
1426     _MAX_RESULTS = 1000000
1427     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1428     _TESTS = [{
1429         'url': 'https://www.bilibili.com/v/kichiku/mad',
1430         'info_dict': {
1431             'id': 'kichiku: mad',
1432             'title': 'kichiku: mad',
1433         },
1434         'playlist_mincount': 45,
1435         'params': {
1436             'playlistend': 45,
1437         },
1438     }]
1439
1440     def _fetch_page(self, api_url, num_pages, query, page_num):
1441         parsed_json = self._download_json(
1442             api_url, query, query={'Search_key': query, 'pn': page_num},
1443             note=f'Extracting results from page {page_num} of {num_pages}')
1444
1445         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1446         if not video_list:
1447             raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1448
1449         for video in video_list:
1450             yield self.url_result(
1451                 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1452
1453     def _entries(self, category, subcategory, query):
1454         # map of categories : subcategories : RIDs
1455         rid_map = {
1456             'kichiku': {
1457                 'mad': 26,
1458                 'manual_vocaloid': 126,
1459                 'guide': 22,
1460                 'theatre': 216,
1461                 'course': 127,
1462             },
1463         }
1464
1465         if category not in rid_map:
1466             raise ExtractorError(
1467                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1468         if subcategory not in rid_map[category]:
1469             raise ExtractorError(
1470                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1471         rid_value = rid_map[category][subcategory]
1472
1473         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1474         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1475         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1476         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1477         if count is None or not size:
1478             raise ExtractorError('Failed to calculate either page count or size')
1479
1480         num_pages = math.ceil(count / size)
1481
1482         return OnDemandPagedList(functools.partial(
1483             self._fetch_page, api_url, num_pages, query), size)
1484
1485     def _real_extract(self, url):
1486         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1487         query = f'{category}: {subcategory}'
1488
1489         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1490
1491
1492 class BiliBiliSearchIE(SearchInfoExtractor):
1493     IE_DESC = 'Bilibili video search'
1494     _MAX_RESULTS = 100000
1495     _SEARCH_KEY = 'bilisearch'
1496     _TESTS = [{
1497         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1498         'playlist_count': 3,
1499         'info_dict': {
1500             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1501             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1502         },
1503         'playlist': [{
1504             'info_dict': {
1505                 'id': 'BV1n44y1Q7sc',
1506                 'ext': 'mp4',
1507                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1508                 'timestamp': 1669889987,
1509                 'upload_date': '20221201',
1510                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1511                 'tags': list,
1512                 'uploader': '靡烟miya',
1513                 'duration': 123.156,
1514                 'uploader_id': '1958703906',
1515                 'comment_count': int,
1516                 'view_count': int,
1517                 'like_count': int,
1518                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1519                 '_old_archive_ids': ['bilibili 988222410_part1'],
1520             },
1521         }],
1522     }]
1523
1524     def _search_results(self, query):
1525         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1526             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1527         for page_num in itertools.count(1):
1528             videos = self._download_json(
1529                 'https://api.bilibili.com/x/web-interface/search/type', query,
1530                 note=f'Extracting results from page {page_num}', query={
1531                     'Search_key': query,
1532                     'keyword': query,
1533                     'page': page_num,
1534                     'context': '',
1535                     'duration': 0,
1536                     'tids_2': '',
1537                     '__refresh__': 'true',
1538                     'search_type': 'video',
1539                     'tids': 0,
1540                     'highlight': 1,
1541                 })['data'].get('result')
1542             if not videos:
1543                 break
1544             for video in videos:
1545                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1546
1547
1548 class BilibiliAudioBaseIE(InfoExtractor):
1549     def _call_api(self, path, sid, query=None):
1550         if not query:
1551             query = {'sid': sid}
1552         return self._download_json(
1553             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1554             sid, query=query)['data']
1555
1556
1557 class BilibiliAudioIE(BilibiliAudioBaseIE):
1558     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1559     _TEST = {
1560         'url': 'https://www.bilibili.com/audio/au1003142',
1561         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1562         'info_dict': {
1563             'id': '1003142',
1564             'ext': 'm4a',
1565             'title': '【tsukimi】YELLOW / 神山羊',
1566             'artist': 'tsukimi',
1567             'comment_count': int,
1568             'description': 'YELLOW的mp3版！',
1569             'duration': 183,
1570             'subtitles': {
1571                 'origin': [{
1572                     'ext': 'lrc',
1573                 }],
1574             },
1575             'thumbnail': r're:^https?://.+\.jpg',
1576             'timestamp': 1564836614,
1577             'upload_date': '20190803',
1578             'uploader': 'tsukimi-つきみぐー',
1579             'view_count': int,
1580         },
1581     }
1582
1583     def _real_extract(self, url):
1584         au_id = self._match_id(url)
1585
1586         play_data = self._call_api('url', au_id)
1587         formats = [{
1588             'url': play_data['cdns'][0],
1589             'filesize': int_or_none(play_data.get('size')),
1590             'vcodec': 'none',
1591         }]
1592
1593         for a_format in formats:
1594             a_format.setdefault('http_headers', {}).update({
1595                 'Referer': url,
1596             })
1597
1598         song = self._call_api('song/info', au_id)
1599         title = song['title']
1600         statistic = song.get('statistic') or {}
1601
1602         subtitles = None
1603         lyric = song.get('lyric')
1604         if lyric:
1605             subtitles = {
1606                 'origin': [{
1607                     'url': lyric,
1608                 }],
1609             }
1610
1611         return {
1612             'id': au_id,
1613             'title': title,
1614             'formats': formats,
1615             'artist': song.get('author'),
1616             'comment_count': int_or_none(statistic.get('comment')),
1617             'description': song.get('intro'),
1618             'duration': int_or_none(song.get('duration')),
1619             'subtitles': subtitles,
1620             'thumbnail': song.get('cover'),
1621             'timestamp': int_or_none(song.get('passtime')),
1622             'uploader': song.get('uname'),
1623             'view_count': int_or_none(statistic.get('play')),
1624         }
1625
1626
1627 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1628     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1629     _TEST = {
1630         'url': 'https://www.bilibili.com/audio/am10624',
1631         'info_dict': {
1632             'id': '10624',
1633             'title': '每日新曲推荐（每日11:00更新）',
1634             'description': '每天11:00更新，为你推送最新音乐',
1635         },
1636         'playlist_count': 19,
1637     }
1638
1639     def _real_extract(self, url):
1640         am_id = self._match_id(url)
1641
1642         songs = self._call_api(
1643             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1644
1645         entries = []
1646         for song in songs:
1647             sid = str_or_none(song.get('id'))
1648             if not sid:
1649                 continue
1650             entries.append(self.url_result(
1651                 'https://www.bilibili.com/audio/au' + sid,
1652                 BilibiliAudioIE.ie_key(), sid))
1653
1654         if entries:
1655             album_data = self._call_api('menu/info', am_id) or {}
1656             album_title = album_data.get('title')
1657             if album_title:
1658                 for entry in entries:
1659                     entry['album'] = album_title
1660                 return self.playlist_result(
1661                     entries, am_id, album_title, album_data.get('intro'))
1662
1663         return self.playlist_result(entries, am_id)
1664
1665
1666 class BiliBiliPlayerIE(InfoExtractor):
1667     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1668     _TEST = {
1669         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1670         'only_matching': True,
1671     }
1672
1673     def _real_extract(self, url):
1674         video_id = self._match_id(url)
1675         return self.url_result(
1676             f'http://www.bilibili.tv/video/av{video_id}/',
1677             ie=BiliBiliIE.ie_key(), video_id=video_id)
1678
1679
1680 class BiliIntlBaseIE(InfoExtractor):
1681     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1682     _NETRC_MACHINE = 'biliintl'
1683     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1684
1685     def _call_api(self, endpoint, *args, **kwargs):
1686         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1687         if json.get('code'):
1688             if json['code'] in (10004004, 10004005, 10023006):
1689                 self.raise_login_required()
1690             elif json['code'] == 10004001:
1691                 self.raise_geo_restricted()
1692             else:
1693                 if json.get('message') and str(json['code']) != json['message']:
1694                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1695                 else:
1696                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1697                 if kwargs.get('fatal'):
1698                     raise ExtractorError(errmsg)
1699                 else:
1700                     self.report_warning(errmsg)
1701         return json.get('data')
1702
1703     def json2srt(self, json):
1704         return '\n\n'.join(
1705             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1706             for i, line in enumerate(traverse_obj(json, (
1707                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1708
1709     def _get_subtitles(self, *, ep_id=None, aid=None):
1710         sub_json = self._call_api(
1711             '/web/v2/subtitle', ep_id or aid, fatal=False,
1712             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1713             query=filter_dict({
1714                 'platform': 'web',
1715                 's_locale': 'en_US',
1716                 'episode_id': ep_id,
1717                 'aid': aid,
1718             })) or {}
1719         subtitles = {}
1720         fetched_urls = set()
1721         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1722             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1723                 if url in fetched_urls:
1724                     continue
1725                 fetched_urls.add(url)
1726                 sub_ext = determine_ext(url)
1727                 sub_lang = sub.get('lang_key') or 'en'
1728
1729                 if sub_ext == 'ass':
1730                     subtitles.setdefault(sub_lang, []).append({
1731                         'ext': 'ass',
1732                         'url': url,
1733                     })
1734                 elif sub_ext == 'json':
1735                     sub_data = self._download_json(
1736                         url, ep_id or aid, fatal=False,
1737                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1738                         errnote='Unable to download subtitles')
1739
1740                     if sub_data:
1741                         subtitles.setdefault(sub_lang, []).append({
1742                             'ext': 'srt',
1743                             'data': self.json2srt(sub_data),
1744                         })
1745                 else:
1746                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1747
1748         return subtitles
1749
1750     def _get_formats(self, *, ep_id=None, aid=None):
1751         video_json = self._call_api(
1752             '/web/playurl', ep_id or aid, note='Downloading video formats',
1753             errnote='Unable to download video formats', query=filter_dict({
1754                 'platform': 'web',
1755                 'ep_id': ep_id,
1756                 'aid': aid,
1757             }))
1758         video_json = video_json['playurl']
1759         formats = []
1760         for vid in video_json.get('video') or []:
1761             video_res = vid.get('video_resource') or {}
1762             video_info = vid.get('stream_info') or {}
1763             if not video_res.get('url'):
1764                 continue
1765             formats.append({
1766                 'url': video_res['url'],
1767                 'ext': 'mp4',
1768                 'format_note': video_info.get('desc_words'),
1769                 'width': video_res.get('width'),
1770                 'height': video_res.get('height'),
1771                 'vbr': video_res.get('bandwidth'),
1772                 'acodec': 'none',
1773                 'vcodec': video_res.get('codecs'),
1774                 'filesize': video_res.get('size'),
1775             })
1776         for aud in video_json.get('audio_resource') or []:
1777             if not aud.get('url'):
1778                 continue
1779             formats.append({
1780                 'url': aud['url'],
1781                 'ext': 'mp4',
1782                 'abr': aud.get('bandwidth'),
1783                 'acodec': aud.get('codecs'),
1784                 'vcodec': 'none',
1785                 'filesize': aud.get('size'),
1786             })
1787
1788         return formats
1789
1790     def _parse_video_metadata(self, video_data):
1791         return {
1792             'title': video_data.get('title_display') or video_data.get('title'),
1793             'description': video_data.get('desc'),
1794             'thumbnail': video_data.get('cover'),
1795             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1796             'episode_number': int_or_none(self._search_regex(
1797                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1798         }
1799
1800     def _perform_login(self, username, password):
1801         if not Cryptodome.RSA:
1802             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1803
1804         key_data = self._download_json(
1805             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1806             note='Downloading login key', errnote='Unable to download login key')['data']
1807
1808         public_key = Cryptodome.RSA.importKey(key_data['key'])
1809         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1810         login_post = self._download_json(
1811             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1812                 'username': username,
1813                 'password': base64.b64encode(password_hash).decode('ascii'),
1814                 'keep_me': 'true',
1815                 's_locale': 'en_US',
1816                 'isTrusted': 'true',
1817             }), note='Logging in', errnote='Unable to log in')
1818         if login_post.get('code'):
1819             if login_post.get('message'):
1820                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1821             else:
1822                 raise ExtractorError('Unable to log in')
1823
1824
1825 class BiliIntlIE(BiliIntlBaseIE):
1826     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1827     _TESTS = [{
1828         # Bstation page
1829         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1830         'info_dict': {
1831             'id': '341736',
1832             'ext': 'mp4',
1833             'title': 'E2 - The First Night',
1834             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1835             'episode_number': 2,
1836             'upload_date': '20201009',
1837             'episode': 'Episode 2',
1838             'timestamp': 1602259500,
1839             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1840             'chapters': [{
1841                 'start_time': 0,
1842                 'end_time': 76.242,
1843                 'title': '<Untitled Chapter 1>',
1844             }, {
1845                 'start_time': 76.242,
1846                 'end_time': 161.161,
1847                 'title': 'Intro',
1848             }, {
1849                 'start_time': 1325.742,
1850                 'end_time': 1403.903,
1851                 'title': 'Outro',
1852             }],
1853         },
1854     }, {
1855         # Non-Bstation page
1856         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1857         'info_dict': {
1858             'id': '11005006',
1859             'ext': 'mp4',
1860             'title': 'E3 - Who?',
1861             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1862             'episode_number': 3,
1863             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1864             'episode': 'Episode 3',
1865             'upload_date': '20211219',
1866             'timestamp': 1639928700,
1867             'chapters': [{
1868                 'start_time': 0,
1869                 'end_time': 88.0,
1870                 'title': '<Untitled Chapter 1>',
1871             }, {
1872                 'start_time': 88.0,
1873                 'end_time': 156.0,
1874                 'title': 'Intro',
1875             }, {
1876                 'start_time': 1173.0,
1877                 'end_time': 1259.535,
1878                 'title': 'Outro',
1879             }],
1880         },
1881     }, {
1882         # Subtitle with empty content
1883         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1884         'info_dict': {
1885             'id': '10131790',
1886             'ext': 'mp4',
1887             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1888             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1889             'episode_number': 140,
1890         },
1891         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
1892     }, {
1893         # episode comment extraction
1894         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1895         'info_dict': {
1896             'id': '340317',
1897             'ext': 'mp4',
1898             'timestamp': 1604057820,
1899             'upload_date': '20201030',
1900             'episode_number': 5,
1901             'title': 'E5 - My Own Steel',
1902             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1903             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1904             'episode': 'Episode 5',
1905             'comment_count': int,
1906             'chapters': [{
1907                 'start_time': 0,
1908                 'end_time': 61.0,
1909                 'title': '<Untitled Chapter 1>',
1910             }, {
1911                 'start_time': 61.0,
1912                 'end_time': 134.0,
1913                 'title': 'Intro',
1914             }, {
1915                 'start_time': 1290.0,
1916                 'end_time': 1379.0,
1917                 'title': 'Outro',
1918             }],
1919         },
1920         'params': {
1921             'getcomments': True,
1922         },
1923     }, {
1924         # user generated content comment extraction
1925         'url': 'https://www.bilibili.tv/en/video/2045730385',
1926         'info_dict': {
1927             'id': '2045730385',
1928             'ext': 'mp4',
1929             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1930             'timestamp': 1667891924,
1931             'upload_date': '20221108',
1932             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1933             'comment_count': int,
1934             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1935         },
1936         'params': {
1937             'getcomments': True,
1938         },
1939     }, {
1940         # episode id without intro and outro
1941         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1942         'info_dict': {
1943             'id': '11246489',
1944             'ext': 'mp4',
1945             'title': 'E1 - Operation \'Strix\' <Owl>',
1946             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1947             'timestamp': 1649516400,
1948             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1949             'episode': 'Episode 1',
1950             'episode_number': 1,
1951             'upload_date': '20220409',
1952         },
1953     }, {
1954         'url': 'https://www.biliintl.com/en/play/34613/341736',
1955         'only_matching': True,
1956     }, {
1957         # User-generated content (as opposed to a series licensed from a studio)
1958         'url': 'https://bilibili.tv/en/video/2019955076',
1959         'only_matching': True,
1960     }, {
1961         # No language in URL
1962         'url': 'https://www.bilibili.tv/video/2019955076',
1963         'only_matching': True,
1964     }, {
1965         # Uppercase language in URL
1966         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1967         'only_matching': True,
1968     }]
1969
1970     @staticmethod
1971     def _make_url(video_id, series_id=None):
1972         if series_id:
1973             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1974         return f'https://www.bilibili.tv/en/video/{video_id}'
1975
1976     def _extract_video_metadata(self, url, video_id, season_id):
1977         url, smuggled_data = unsmuggle_url(url, {})
1978         if smuggled_data.get('title'):
1979             return smuggled_data
1980
1981         webpage = self._download_webpage(url, video_id)
1982         # Bstation layout
1983         initial_data = (
1984             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1985             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1986         video_data = traverse_obj(
1987             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1988
1989         if season_id and not video_data:
1990             # Non-Bstation layout, read through episode list
1991             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1992             video_data = traverse_obj(season_json, (
1993                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
1994             ), expected_type=dict, get_all=False)
1995
1996         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1997         return merge_dicts(
1998             self._parse_video_metadata(video_data), {
1999                 'title': get_element_by_class(
2000                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2001                 'description': get_element_by_class(
2002                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2003             }, self._search_json_ld(webpage, video_id, default={}))
2004
2005     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2006         comment_api_raw_data = self._download_json(
2007             'https://api.bilibili.tv/reply/web/detail', display_id,
2008             note=f'Downloading reply comment of {root_id} - {next_id}',
2009             query={
2010                 'platform': 'web',
2011                 'ps': 20,  # comment's reply per page (default: 3)
2012                 'root': root_id,
2013                 'next': next_id,
2014             })
2015
2016         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2017             yield {
2018                 'author': traverse_obj(replies, ('member', 'name')),
2019                 'author_id': traverse_obj(replies, ('member', 'mid')),
2020                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2021                 'text': traverse_obj(replies, ('content', 'message')),
2022                 'id': replies.get('rpid'),
2023                 'like_count': int_or_none(replies.get('like_count')),
2024                 'parent': replies.get('parent'),
2025                 'timestamp': unified_timestamp(replies.get('ctime_text')),
2026             }
2027
2028         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2029             yield from self._get_comments_reply(
2030                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2031
2032     def _get_comments(self, video_id, ep_id):
2033         for i in itertools.count(0):
2034             comment_api_raw_data = self._download_json(
2035                 'https://api.bilibili.tv/reply/web/root', video_id,
2036                 note=f'Downloading comment page {i + 1}',
2037                 query={
2038                     'platform': 'web',
2039                     'pn': i,  # page number
2040                     'ps': 20,  # comment per page (default: 20)
2041                     'oid': video_id,
2042                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2043                     'sort_type': 1,  # 1: best, 2: recent
2044                 })
2045
2046             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2047                 yield {
2048                     'author': traverse_obj(replies, ('member', 'name')),
2049                     'author_id': traverse_obj(replies, ('member', 'mid')),
2050                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2051                     'text': traverse_obj(replies, ('content', 'message')),
2052                     'id': replies.get('rpid'),
2053                     'like_count': int_or_none(replies.get('like_count')),
2054                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2055                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2056                 }
2057                 if replies.get('count'):
2058                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2059
2060             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2061                 break
2062
2063     def _real_extract(self, url):
2064         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2065         video_id = ep_id or aid
2066         chapters = None
2067
2068         if ep_id:
2069             intro_ending_json = self._call_api(
2070                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2071                 video_id, fatal=False) or {}
2072             if intro_ending_json.get('skip'):
2073                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2074                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2075                 chapters = [{
2076                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2077                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2078                     'title': 'Intro',
2079                 }, {
2080                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2081                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2082                     'title': 'Outro',
2083                 }]
2084
2085         return {
2086             'id': video_id,
2087             **self._extract_video_metadata(url, video_id, season_id),
2088             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2089             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2090             'chapters': chapters,
2091             '__post_extractor': self.extract_comments(video_id, ep_id),
2092             'http_headers': self._HEADERS,
2093         }
2094
2095
2096 class BiliIntlSeriesIE(BiliIntlBaseIE):
2097     IE_NAME = 'biliIntl:series'
2098     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2099     _TESTS = [{
2100         'url': 'https://www.bilibili.tv/en/play/34613',
2101         'playlist_mincount': 15,
2102         'info_dict': {
2103             'id': '34613',
2104             'title': 'TONIKAWA: Over the Moon For You',
2105             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2106             'categories': ['Slice of life', 'Comedy', 'Romance'],
2107             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2108             'view_count': int,
2109         },
2110         'params': {
2111             'skip_download': True,
2112         },
2113     }, {
2114         'url': 'https://www.bilibili.tv/en/media/1048837',
2115         'info_dict': {
2116             'id': '1048837',
2117             'title': 'SPY×FAMILY',
2118             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119             'categories': ['Adventure', 'Action', 'Comedy'],
2120             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2121             'view_count': int,
2122         },
2123         'playlist_mincount': 25,
2124     }, {
2125         'url': 'https://www.biliintl.com/en/play/34613',
2126         'only_matching': True,
2127     }, {
2128         'url': 'https://www.biliintl.com/EN/play/34613',
2129         'only_matching': True,
2130     }]
2131
2132     def _entries(self, series_id):
2133         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2134         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2135             episode_id = str(episode['episode_id'])
2136             yield self.url_result(smuggle_url(
2137                 BiliIntlIE._make_url(episode_id, series_id),
2138                 self._parse_video_metadata(episode),
2139             ), BiliIntlIE, episode_id)
2140
2141     def _real_extract(self, url):
2142         series_id = self._match_id(url)
2143         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2144         return self.playlist_result(
2145             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2146             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2147             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2148
2149
2150 class BiliLiveIE(InfoExtractor):
2151     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2152
2153     _TESTS = [{
2154         'url': 'https://live.bilibili.com/196',
2155         'info_dict': {
2156             'id': '33989',
2157             'description': '周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）',
2158             'ext': 'flv',
2159             'title': '太空狼人杀联动，不被爆杀就算赢',
2160             'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2161             'timestamp': 1650802769,
2162         },
2163         'skip': 'not live',
2164     }, {
2165         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2166         'only_matching': True,
2167     }, {
2168         'url': 'https://live.bilibili.com/blanc/196',
2169         'only_matching': True,
2170     }]
2171
2172     _FORMATS = {
2173         80: {'format_id': 'low', 'format_note': '流畅'},
2174         150: {'format_id': 'high_res', 'format_note': '高清'},
2175         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2176         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2177         10000: {'format_id': 'source', 'format_note': '原画'},
2178         20000: {'format_id': '4K', 'format_note': '4K'},
2179         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2180     }
2181
2182     _quality = staticmethod(qualities(list(_FORMATS)))
2183
2184     def _call_api(self, path, room_id, query):
2185         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2186         if api_result.get('code') != 0:
2187             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2188         return api_result.get('data') or {}
2189
2190     def _parse_formats(self, qn, fmt):
2191         for codec in fmt.get('codec') or []:
2192             if codec.get('current_qn') != qn:
2193                 continue
2194             for url_info in codec['url_info']:
2195                 yield {
2196                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2197                     'ext': fmt.get('format_name'),
2198                     'vcodec': codec.get('codec_name'),
2199                     'quality': self._quality(qn),
2200                     **self._FORMATS[qn],
2201                 }
2202
2203     def _real_extract(self, url):
2204         room_id = self._match_id(url)
2205         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2206         if room_data.get('live_status') == 0:
2207             raise ExtractorError('Streamer is not live', expected=True)
2208
2209         formats = []
2210         for qn in self._FORMATS:
2211             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2212                 'room_id': room_id,
2213                 'qn': qn,
2214                 'codec': '0,1',
2215                 'format': '0,2',
2216                 'mask': '0',
2217                 'no_playurl': '0',
2218                 'platform': 'web',
2219                 'protocol': '0,1',
2220             })
2221             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2222                 formats.extend(self._parse_formats(qn, fmt))
2223
2224         return {
2225             'id': room_id,
2226             'title': room_data.get('title'),
2227             'description': room_data.get('description'),
2228             'thumbnail': room_data.get('user_cover'),
2229             'timestamp': stream_data.get('live_time'),
2230             'formats': formats,
2231             'is_live': True,
2232             'http_headers': {
2233                 'Referer': url,
2234             },
2235         }