yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     clean_html,
  22     determine_ext,
  23     filter_dict,
  24     float_or_none,
  25     format_field,
  26     get_element_by_class,
  27     int_or_none,
  28     join_nonempty,
  29     make_archive_id,
  30     merge_dicts,
  31     mimetype2ext,
  32     parse_count,
  33     parse_qs,
  34     qualities,
  35     smuggle_url,
  36     srt_subtitles_timecode,
  37     str_or_none,
  38     traverse_obj,
  39     try_call,
  40     unified_timestamp,
  41     unsmuggle_url,
  42     url_or_none,
  43     urlencode_postdata,
  44     variadic,
  45 )
  46
  47
  48 class BilibiliBaseIE(InfoExtractor):
  49     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  50
  51     def extract_formats(self, play_info):
  52         format_names = {
  53             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  54             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  55         }
  56
  57         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  58         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  59         if flac_audio:
  60             audios.append(flac_audio)
  61         formats = [{
  62             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  63             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  64             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  65             'vcodec': 'none',
  66             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  67             'filesize': int_or_none(audio.get('size')),
  68             'format_id': str_or_none(audio.get('id')),
  69         } for audio in audios]
  70
  71         formats.extend({
  72             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  73             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  74             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  75             'width': int_or_none(video.get('width')),
  76             'height': int_or_none(video.get('height')),
  77             'vcodec': video.get('codecs'),
  78             'acodec': 'none' if audios else None,
  79             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  80             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  81             'filesize': int_or_none(video.get('size')),
  82             'quality': int_or_none(video.get('id')),
  83             'format_id': traverse_obj(
  84                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  85                 ('id', {str_or_none}), get_all=False),
  86             'format': format_names.get(video.get('id')),
  87         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  88
  89         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  90         if missing_formats:
  91             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  92                            f'you have to login or become premium member to download them. {self._login_hint()}')
  93
  94         return formats
  95
  96     def _download_playinfo(self, video_id, cid, headers=None):
  97         return self._download_json(
  98             'https://api.bilibili.com/x/player/playurl', video_id,
  99             query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 100             note=f'Downloading video formats for cid {cid}', headers=headers)['data']
 101
 102     def json2srt(self, json_data):
 103         srt_data = ''
 104         for idx, line in enumerate(json_data.get('body') or []):
 105             srt_data += (f'{idx + 1}\n'
 106                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 107                          f'{line["content"]}\n\n')
 108         return srt_data
 109
 110     def _get_subtitles(self, video_id, cid, aid=None):
 111         subtitles = {
 112             'danmaku': [{
 113                 'ext': 'xml',
 114                 'url': f'https://comment.bilibili.com/{cid}.xml',
 115             }]
 116         }
 117
 118         subtitle_info = traverse_obj(self._download_json(
 119             'https://api.bilibili.com/x/player/v2', video_id,
 120             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 121             note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
 122         subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
 123         if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
 124             if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):  # no login session cookie
 125                 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
 126         for s in subs_list:
 127             subtitles.setdefault(s['lan'], []).append({
 128                 'ext': 'srt',
 129                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 130             })
 131         return subtitles
 132
 133     def _get_chapters(self, aid, cid):
 134         chapters = aid and cid and self._download_json(
 135             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 136             note='Extracting chapters', fatal=False)
 137         return traverse_obj(chapters, ('data', 'view_points', ..., {
 138             'title': 'content',
 139             'start_time': 'from',
 140             'end_time': 'to',
 141         })) or None
 142
 143     def _get_comments(self, aid):
 144         for idx in itertools.count(1):
 145             replies = traverse_obj(
 146                 self._download_json(
 147                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 148                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 149                 ('data', 'replies'))
 150             if not replies:
 151                 return
 152             for children in map(self._get_all_children, replies):
 153                 yield from children
 154
 155     def _get_all_children(self, reply):
 156         yield {
 157             'author': traverse_obj(reply, ('member', 'uname')),
 158             'author_id': traverse_obj(reply, ('member', 'mid')),
 159             'id': reply.get('rpid'),
 160             'text': traverse_obj(reply, ('content', 'message')),
 161             'timestamp': reply.get('ctime'),
 162             'parent': reply.get('parent') or 'root',
 163         }
 164         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 165             yield from children
 166
 167     def _get_episodes_from_season(self, ss_id, url):
 168         season_info = self._download_json(
 169             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 170             note='Downloading season info', query={'season_id': ss_id},
 171             headers={'Referer': url, **self.geo_verification_headers()})
 172
 173         for entry in traverse_obj(season_info, (
 174                 'result', 'main_section', 'episodes',
 175                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 176             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 177
 178     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 179         cid_edges = cid_edges or {}
 180         division_data = self._download_json(
 181             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 182             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 183             note=f'Extracting divisions from edge {edge_id}')
 184         edges.setdefault(edge_id, {}).update(
 185             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 186                 'title': ('title', {str}),
 187                 'cid': ('cid', {int_or_none}),
 188             }), get_all=False))
 189
 190         edges[edge_id].update(traverse_obj(division_data, ('data', {
 191             'title': ('title', {str}),
 192             'choices': ('edges', 'questions', ..., 'choices', ..., {
 193                 'edge_id': ('id', {int_or_none}),
 194                 'cid': ('cid', {int_or_none}),
 195                 'text': ('option', {str}),
 196             }),
 197         })))
 198         # use dict to combine edges that use the same video section (same cid)
 199         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 200         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 201             if choice['edge_id'] not in edges:
 202                 edges[choice['edge_id']] = {'cid': choice['cid']}
 203                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 204         return cid_edges
 205
 206     def _get_interactive_entries(self, video_id, cid, metainfo):
 207         graph_version = traverse_obj(
 208             self._download_json(
 209                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 210                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
 211             ('data', 'interaction', 'graph_version', {int_or_none}))
 212         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 213         for cid, edges in cid_edges.items():
 214             play_info = self._download_playinfo(video_id, cid)
 215             yield {
 216                 **metainfo,
 217                 'id': f'{video_id}_{cid}',
 218                 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
 219                 'formats': self.extract_formats(play_info),
 220                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 221                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 222                 'subtitles': self.extract_subtitles(video_id, cid),
 223             }
 224
 225
 226 class BiliBiliIE(BilibiliBaseIE):
 227     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 228
 229     _TESTS = [{
 230         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 231         'info_dict': {
 232             'id': 'BV13x41117TL',
 233             'title': '阿滴英文｜英文歌分享#6 "Closer',
 234             'ext': 'mp4',
 235             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 236             'uploader_id': '65880958',
 237             'uploader': '阿滴英文',
 238             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 239             'duration': 554.117,
 240             'tags': list,
 241             'comment_count': int,
 242             'upload_date': '20170301',
 243             'timestamp': 1488353834,
 244             'like_count': int,
 245             'view_count': int,
 246         },
 247     }, {
 248         'note': 'old av URL version',
 249         'url': 'http://www.bilibili.com/video/av1074402/',
 250         'info_dict': {
 251             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 252             'ext': 'mp4',
 253             'uploader': '菊子桑',
 254             'uploader_id': '156160',
 255             'id': 'BV11x411K7CN',
 256             'title': '【金坷垃】金泡沫',
 257             'duration': 308.36,
 258             'upload_date': '20140420',
 259             'timestamp': 1397983878,
 260             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 261             'like_count': int,
 262             'comment_count': int,
 263             'view_count': int,
 264             'tags': list,
 265         },
 266         'params': {'skip_download': True},
 267     }, {
 268         'note': 'Anthology',
 269         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 270         'info_dict': {
 271             'id': 'BV1bK411W797',
 272             'title': '物语中的人物是如何吐槽自己的OP的'
 273         },
 274         'playlist_count': 18,
 275         'playlist': [{
 276             'info_dict': {
 277                 'id': 'BV1bK411W797_p1',
 278                 'ext': 'mp4',
 279                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 280                 'tags': 'count:10',
 281                 'timestamp': 1589601697,
 282                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 283                 'uploader': '打牌还是打桩',
 284                 'uploader_id': '150259984',
 285                 'like_count': int,
 286                 'comment_count': int,
 287                 'upload_date': '20200516',
 288                 'view_count': int,
 289                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 290                 'duration': 90.314,
 291             }
 292         }]
 293     }, {
 294         'note': 'Specific page of Anthology',
 295         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 296         'info_dict': {
 297             'id': 'BV1bK411W797_p1',
 298             'ext': 'mp4',
 299             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 300             'tags': 'count:10',
 301             'timestamp': 1589601697,
 302             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 303             'uploader': '打牌还是打桩',
 304             'uploader_id': '150259984',
 305             'like_count': int,
 306             'comment_count': int,
 307             'upload_date': '20200516',
 308             'view_count': int,
 309             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 310             'duration': 90.314,
 311         }
 312     }, {
 313         'note': 'video has subtitles',
 314         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 315         'info_dict': {
 316             'id': 'BV12N4y1M7rh',
 317             'ext': 'mp4',
 318             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 319             'tags': list,
 320             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 321             'duration': 313.557,
 322             'upload_date': '20220709',
 323             'uploader': '小夫太渴',
 324             'timestamp': 1657347907,
 325             'uploader_id': '1326814124',
 326             'comment_count': int,
 327             'view_count': int,
 328             'like_count': int,
 329             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 330             'subtitles': 'count:2'
 331         },
 332         'params': {'listsubtitles': True},
 333     }, {
 334         'url': 'https://www.bilibili.com/video/av8903802/',
 335         'info_dict': {
 336             'id': 'BV13x41117TL',
 337             'ext': 'mp4',
 338             'title': '阿滴英文｜英文歌分享#6 "Closer',
 339             'upload_date': '20170301',
 340             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 341             'timestamp': 1488353834,
 342             'uploader_id': '65880958',
 343             'uploader': '阿滴英文',
 344             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 345             'duration': 554.117,
 346             'tags': list,
 347             'comment_count': int,
 348             'view_count': int,
 349             'like_count': int,
 350         },
 351         'params': {
 352             'skip_download': True,
 353         },
 354     }, {
 355         'note': 'video has chapter',
 356         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 357         'info_dict': {
 358             'id': 'BV1vL411G7N7',
 359             'ext': 'mp4',
 360             'title': '如何为你的B站视频添加进度条分段',
 361             'timestamp': 1634554558,
 362             'upload_date': '20211018',
 363             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 364             'tags': list,
 365             'uploader': '爱喝咖啡的当麻',
 366             'duration': 669.482,
 367             'uploader_id': '1680903',
 368             'chapters': 'count:6',
 369             'comment_count': int,
 370             'view_count': int,
 371             'like_count': int,
 372             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 373         },
 374         'params': {'skip_download': True},
 375     }, {
 376         'note': 'video redirects to festival page',
 377         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 378         'info_dict': {
 379             'id': 'BV1wP4y1P72h',
 380             'ext': 'mp4',
 381             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 382             'timestamp': 1643947497,
 383             'upload_date': '20220204',
 384             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 385             'uploader': '叨叨冯聊音乐',
 386             'duration': 246.719,
 387             'uploader_id': '528182630',
 388             'view_count': int,
 389             'like_count': int,
 390             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 391         },
 392         'params': {'skip_download': True},
 393     }, {
 394         'note': 'newer festival video',
 395         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 396         'info_dict': {
 397             'id': 'BV1ay4y1d77f',
 398             'ext': 'mp4',
 399             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 400             'timestamp': 1674273600,
 401             'upload_date': '20230121',
 402             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 403             'uploader': '果蝇轰',
 404             'duration': 1111.722,
 405             'uploader_id': '8469526',
 406             'view_count': int,
 407             'like_count': int,
 408             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 409         },
 410         'params': {'skip_download': True},
 411     }, {
 412         'note': 'interactive/split-path video',
 413         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 414         'info_dict': {
 415             'id': 'BV1af4y1H7ga',
 416             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 417             'timestamp': 1630500414,
 418             'upload_date': '20210901',
 419             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 420             'tags': list,
 421             'uploader': '钉宫妮妮Ninico',
 422             'duration': 1503,
 423             'uploader_id': '8881297',
 424             'comment_count': int,
 425             'view_count': int,
 426             'like_count': int,
 427             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 428         },
 429         'playlist_count': 33,
 430         'playlist': [{
 431             'info_dict': {
 432                 'id': 'BV1af4y1H7ga_400950101',
 433                 'ext': 'mp4',
 434                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 435                 'timestamp': 1630500414,
 436                 'upload_date': '20210901',
 437                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 438                 'tags': list,
 439                 'uploader': '钉宫妮妮Ninico',
 440                 'duration': 11.605,
 441                 'uploader_id': '8881297',
 442                 'comment_count': int,
 443                 'view_count': int,
 444                 'like_count': int,
 445                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 446             },
 447         }],
 448     }, {
 449         'note': '301 redirect to bangumi link',
 450         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 451         'info_dict': {
 452             'id': '288525',
 453             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 454             'ext': 'mp4',
 455             'series': '我和我的祖国',
 456             'series_id': '4780',
 457             'season': '幕后纪实',
 458             'season_id': '28609',
 459             'season_number': 1,
 460             'episode': '钱学森弹道和乘波体飞行器是什么？',
 461             'episode_id': '288525',
 462             'episode_number': 105,
 463             'duration': 1183.957,
 464             'timestamp': 1571648124,
 465             'upload_date': '20191021',
 466             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 467         },
 468     }, {
 469         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 470         'info_dict': {
 471             'id': 'BV1jL41167ZG',
 472             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 473             'ext': 'mp4',
 474         },
 475         'skip': 'supporter-only video',
 476     }, {
 477         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 478         'info_dict': {
 479             'id': 'BV1Ks411f7aQ',
 480             'title': '【BD1080P】狼与香辛料I【华盟】',
 481             'ext': 'mp4',
 482         },
 483         'skip': 'login required',
 484     }, {
 485         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 486         'info_dict': {
 487             'id': 'BV1GJ411x7h7',
 488             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 489             'ext': 'mp4',
 490         },
 491         'skip': 'geo-restricted',
 492     }]
 493
 494     def _real_extract(self, url):
 495         video_id = self._match_id(url)
 496         headers = self.geo_verification_headers()
 497         webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
 498         if not self._match_valid_url(urlh.url):
 499             return self.url_result(urlh.url)
 500
 501         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 502
 503         is_festival = 'videoData' not in initial_state
 504         if is_festival:
 505             video_data = initial_state['videoInfo']
 506         else:
 507             play_info_obj = self._search_json(
 508                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 509             if not play_info_obj:
 510                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 511                     self.raise_login_required()
 512                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 513                     raise ExtractorError(
 514                         'This video may be deleted or geo-restricted. '
 515                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 516             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 517             if not play_info:
 518                 if traverse_obj(play_info_obj, 'code') == 87007:
 519                     toast = get_element_by_class('tips-toast', webpage) or ''
 520                     msg = clean_html(
 521                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 522                         + (get_element_by_class('level', toast) or ''))
 523                     raise ExtractorError(
 524                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 525                 raise ExtractorError('Failed to extract play info')
 526             video_data = initial_state['videoData']
 527
 528         video_id, title = video_data['bvid'], video_data.get('title')
 529
 530         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 531         page_list_json = not is_festival and traverse_obj(
 532             self._download_json(
 533                 'https://api.bilibili.com/x/player/pagelist', video_id,
 534                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 535                 note='Extracting videos in anthology', headers=headers),
 536             'data', expected_type=list) or []
 537         is_anthology = len(page_list_json) > 1
 538
 539         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 540         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 541             return self.playlist_from_matches(
 542                 page_list_json, video_id, title, ie=BiliBiliIE,
 543                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 544
 545         if is_anthology:
 546             part_id = part_id or 1
 547             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 548
 549         aid = video_data.get('aid')
 550         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 551
 552         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 553
 554         festival_info = {}
 555         if is_festival:
 556             play_info = self._download_playinfo(video_id, cid, headers=headers)
 557
 558             festival_info = traverse_obj(initial_state, {
 559                 'uploader': ('videoInfo', 'upName'),
 560                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 561                 'like_count': ('videoStatus', 'like', {int_or_none}),
 562                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 563             }, get_all=False)
 564
 565         metainfo = {
 566             **traverse_obj(initial_state, {
 567                 'uploader': ('upData', 'name'),
 568                 'uploader_id': ('upData', 'mid', {str_or_none}),
 569                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 570                 'tags': ('tags', ..., 'tag_name'),
 571                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 572             }),
 573             **festival_info,
 574             **traverse_obj(video_data, {
 575                 'description': 'desc',
 576                 'timestamp': ('pubdate', {int_or_none}),
 577                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 578                 'comment_count': ('stat', 'reply', {int_or_none}),
 579             }, get_all=False),
 580             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 581             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 582             'title': title,
 583             'http_headers': {'Referer': url},
 584         }
 585
 586         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 587         if is_interactive:
 588             return self.playlist_result(
 589                 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
 590                     'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 591                     '__post_extractor': self.extract_comments(aid),
 592                 })
 593         else:
 594             return {
 595                 **metainfo,
 596                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 597                 'chapters': self._get_chapters(aid, cid),
 598                 'subtitles': self.extract_subtitles(video_id, cid),
 599                 'formats': self.extract_formats(play_info),
 600                 '__post_extractor': self.extract_comments(aid),
 601             }
 602
 603
 604 class BiliBiliBangumiIE(BilibiliBaseIE):
 605     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 606
 607     _TESTS = [{
 608         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 609         'info_dict': {
 610             'id': '21495',
 611             'ext': 'mp4',
 612             'series': '悠久之翼',
 613             'series_id': '774',
 614             'season': '第二季',
 615             'season_id': '1182',
 616             'season_number': 2,
 617             'episode': 'forever／ef',
 618             'episode_id': '21495',
 619             'episode_number': 12,
 620             'title': '12 forever／ef',
 621             'duration': 1420.791,
 622             'timestamp': 1320412200,
 623             'upload_date': '20111104',
 624             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 625         },
 626     }, {
 627         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 628         'info_dict': {
 629             'id': '267851',
 630             'ext': 'mp4',
 631             'series': '鬼灭之刃',
 632             'series_id': '4358',
 633             'season': '立志篇',
 634             'season_id': '26801',
 635             'season_number': 1,
 636             'episode': '残酷',
 637             'episode_id': '267851',
 638             'episode_number': 1,
 639             'title': '1 残酷',
 640             'duration': 1425.256,
 641             'timestamp': 1554566400,
 642             'upload_date': '20190406',
 643             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 644         },
 645         'skip': 'Geo-restricted',
 646     }, {
 647         'note': 'a making-of which falls outside main section',
 648         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 649         'info_dict': {
 650             'id': '345120',
 651             'ext': 'mp4',
 652             'series': '鬼灭之刃',
 653             'series_id': '4358',
 654             'season': '立志篇',
 655             'season_id': '26801',
 656             'season_number': 1,
 657             'episode': '炭治郎篇',
 658             'episode_id': '345120',
 659             'episode_number': 27,
 660             'title': '#1 炭治郎篇',
 661             'duration': 1922.129,
 662             'timestamp': 1602853860,
 663             'upload_date': '20201016',
 664             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 665         },
 666     }]
 667
 668     def _real_extract(self, url):
 669         episode_id = self._match_id(url)
 670         headers = self.geo_verification_headers()
 671         webpage = self._download_webpage(url, episode_id, headers=headers)
 672
 673         if '您所在的地区无法观看本片' in webpage:
 674             raise GeoRestrictedError('This video is restricted')
 675         elif '正在观看预览，大会员免费看全片' in webpage:
 676             self.raise_login_required('This video is for premium members only')
 677
 678         headers['Referer'] = url
 679         play_info = self._download_json(
 680             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 681             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 682             headers=headers)
 683         premium_only = play_info.get('code') == -10403
 684         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 685
 686         formats = self.extract_formats(play_info)
 687         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 688             self.raise_login_required('This video is for premium members only')
 689
 690         bangumi_info = self._download_json(
 691             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 692             query={'ep_id': episode_id}, headers=headers)['result']
 693
 694         episode_number, episode_info = next((
 695             (idx, ep) for idx, ep in enumerate(traverse_obj(
 696                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 697             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 698
 699         season_id = bangumi_info.get('season_id')
 700         season_number, season_title = season_id and next((
 701             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 702                 traverse_obj(bangumi_info, ('seasons', ...)))
 703             if e.get('season_id') == season_id
 704         ), (None, None))
 705
 706         aid = episode_info.get('aid')
 707
 708         return {
 709             'id': episode_id,
 710             'formats': formats,
 711             **traverse_obj(bangumi_info, {
 712                 'series': ('series', 'series_title', {str}),
 713                 'series_id': ('series', 'series_id', {str_or_none}),
 714                 'thumbnail': ('square_cover', {url_or_none}),
 715             }),
 716             **traverse_obj(episode_info, {
 717                 'episode': ('long_title', {str}),
 718                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 719                 'timestamp': ('pub_time', {int_or_none}),
 720                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 721             }),
 722             'episode_id': episode_id,
 723             'season': str_or_none(season_title),
 724             'season_id': str_or_none(season_id),
 725             'season_number': season_number,
 726             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 727             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 728             '__post_extractor': self.extract_comments(aid),
 729             'http_headers': {'Referer': url},
 730         }
 731
 732
 733 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 734     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 735     _TESTS = [{
 736         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 737         'info_dict': {
 738             'id': '24097891',
 739             'title': 'CAROLE & TUESDAY',
 740             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 741         },
 742         'playlist_mincount': 25,
 743     }, {
 744         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 745         'info_dict': {
 746             'id': '1565',
 747             'title': '攻壳机动队 S.A.C. 2nd GIG',
 748             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 749         },
 750         'playlist_count': 26,
 751         'playlist': [{
 752             'info_dict': {
 753                 'id': '68540',
 754                 'ext': 'mp4',
 755                 'series': '攻壳机动队',
 756                 'series_id': '1077',
 757                 'season': '第二季',
 758                 'season_id': '1565',
 759                 'season_number': 2,
 760                 'episode': '再启动 REEMBODY',
 761                 'episode_id': '68540',
 762                 'episode_number': 1,
 763                 'title': '1 再启动 REEMBODY',
 764                 'duration': 1525.777,
 765                 'timestamp': 1425074413,
 766                 'upload_date': '20150227',
 767                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 768             },
 769         }],
 770     }]
 771
 772     def _real_extract(self, url):
 773         media_id = self._match_id(url)
 774         webpage = self._download_webpage(url, media_id)
 775
 776         initial_state = self._search_json(
 777             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 778         ss_id = initial_state['mediaInfo']['season_id']
 779
 780         return self.playlist_result(
 781             self._get_episodes_from_season(ss_id, url), media_id,
 782             **traverse_obj(initial_state, ('mediaInfo', {
 783                 'title': ('title', {str}),
 784                 'description': ('evaluate', {str}),
 785             })))
 786
 787
 788 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 789     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 790     _TESTS = [{
 791         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 792         'info_dict': {
 793             'id': '26801',
 794             'title': '鬼灭之刃',
 795             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 796         },
 797         'playlist_mincount': 26
 798     }, {
 799         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 800         'info_dict': {
 801             'id': '2251',
 802             'title': '玲音',
 803             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 804         },
 805         'playlist_count': 13,
 806         'playlist': [{
 807             'info_dict': {
 808                 'id': '50188',
 809                 'ext': 'mp4',
 810                 'series': '玲音',
 811                 'series_id': '1526',
 812                 'season': 'TV',
 813                 'season_id': '2251',
 814                 'season_number': 1,
 815                 'episode': 'WEIRD',
 816                 'episode_id': '50188',
 817                 'episode_number': 1,
 818                 'title': '1 WEIRD',
 819                 'duration': 1436.992,
 820                 'timestamp': 1343185080,
 821                 'upload_date': '20120725',
 822                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 823             },
 824         }],
 825     }]
 826
 827     def _real_extract(self, url):
 828         ss_id = self._match_id(url)
 829         webpage = self._download_webpage(url, ss_id)
 830         metainfo = traverse_obj(
 831             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
 832             ('itemListElement', ..., {
 833                 'title': ('name', {str}),
 834                 'description': ('description', {str}),
 835             }), get_all=False)
 836
 837         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
 838
 839
 840 class BilibiliCheeseBaseIE(BilibiliBaseIE):
 841     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
 842
 843     def _extract_episode(self, season_info, ep_id):
 844         episode_info = traverse_obj(season_info, (
 845             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
 846         aid, cid = episode_info['aid'], episode_info['cid']
 847
 848         if traverse_obj(episode_info, 'ep_status') == -1:
 849             raise ExtractorError('This course episode is not yet available.', expected=True)
 850         if not traverse_obj(episode_info, 'playable'):
 851             self.raise_login_required('You need to purchase the course to download this episode')
 852
 853         play_info = self._download_json(
 854             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
 855             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
 856             headers=self._HEADERS, note='Downloading playinfo')['data']
 857
 858         return {
 859             'id': str_or_none(ep_id),
 860             'episode_id': str_or_none(ep_id),
 861             'formats': self.extract_formats(play_info),
 862             'extractor_key': BilibiliCheeseIE.ie_key(),
 863             'extractor': BilibiliCheeseIE.IE_NAME,
 864             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
 865             **traverse_obj(episode_info, {
 866                 'episode': ('title', {str}),
 867                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
 868                 'alt_title': ('subtitle', {str}),
 869                 'duration': ('duration', {int_or_none}),
 870                 'episode_number': ('index', {int_or_none}),
 871                 'thumbnail': ('cover', {url_or_none}),
 872                 'timestamp': ('release_date', {int_or_none}),
 873                 'view_count': ('play', {int_or_none}),
 874             }),
 875             **traverse_obj(season_info, {
 876                 'uploader': ('up_info', 'uname', {str}),
 877                 'uploader_id': ('up_info', 'mid', {str_or_none}),
 878             }),
 879             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
 880             '__post_extractor': self.extract_comments(aid),
 881             'http_headers': self._HEADERS,
 882         }
 883
 884     def _download_season_info(self, query_key, video_id):
 885         return self._download_json(
 886             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
 887             headers=self._HEADERS, note='Downloading season info')['data']
 888
 889
 890 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
 891     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
 892     _TESTS = [{
 893         'url': 'https://www.bilibili.com/cheese/play/ep229832',
 894         'info_dict': {
 895             'id': '229832',
 896             'ext': 'mp4',
 897             'title': '1 - 课程先导片',
 898             'alt_title': '视频课 · 3分41秒',
 899             'uploader': '马督工',
 900             'uploader_id': '316568752',
 901             'episode': '课程先导片',
 902             'episode_id': '229832',
 903             'episode_number': 1,
 904             'duration': 221,
 905             'timestamp': 1695549606,
 906             'upload_date': '20230924',
 907             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 908             'view_count': int,
 909         }
 910     }]
 911
 912     def _real_extract(self, url):
 913         ep_id = self._match_id(url)
 914         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
 915
 916
 917 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
 918     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
 919     _TESTS = [{
 920         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 921         'info_dict': {
 922             'id': '5918',
 923             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 924             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 925         },
 926         'playlist': [{
 927             'info_dict': {
 928                 'id': '229832',
 929                 'ext': 'mp4',
 930                 'title': '1 - 课程先导片',
 931                 'alt_title': '视频课 · 3分41秒',
 932                 'uploader': '马督工',
 933                 'uploader_id': '316568752',
 934                 'episode': '课程先导片',
 935                 'episode_id': '229832',
 936                 'episode_number': 1,
 937                 'duration': 221,
 938                 'timestamp': 1695549606,
 939                 'upload_date': '20230924',
 940                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 941                 'view_count': int,
 942             }
 943         }],
 944         'params': {'playlist_items': '1'},
 945     }, {
 946         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 947         'info_dict': {
 948             'id': '5918',
 949             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 950             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 951         },
 952         'playlist_mincount': 5,
 953         'skip': 'paid video in list',
 954     }]
 955
 956     def _get_cheese_entries(self, season_info):
 957         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
 958             yield self._extract_episode(season_info, ep_id)
 959
 960     def _real_extract(self, url):
 961         season_id = self._match_id(url)
 962         season_info = self._download_season_info('season_id', season_id)
 963
 964         return self.playlist_result(
 965             self._get_cheese_entries(season_info), season_id,
 966             **traverse_obj(season_info, {
 967                 'title': ('title', {str}),
 968                 'description': ('subtitle', {str}),
 969             }))
 970
 971
 972 class BilibiliSpaceBaseIE(InfoExtractor):
 973     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 974         first_page = fetch_page(0)
 975         metadata = get_metadata(first_page)
 976
 977         paged_list = InAdvancePagedList(
 978             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 979             metadata['page_count'], metadata['page_size'])
 980
 981         return metadata, paged_list
 982
 983
 984 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 985     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 986     _TESTS = [{
 987         'url': 'https://space.bilibili.com/3985676/video',
 988         'info_dict': {
 989             'id': '3985676',
 990         },
 991         'playlist_mincount': 178,
 992     }, {
 993         'url': 'https://space.bilibili.com/313580179/video',
 994         'info_dict': {
 995             'id': '313580179',
 996         },
 997         'playlist_mincount': 92,
 998     }]
 999
1000     def _extract_signature(self, playlist_id):
1001         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1002
1003         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1004         img_key = traverse_obj(
1005             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1006         sub_key = traverse_obj(
1007             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1008
1009         session_key = img_key + sub_key
1010
1011         signature_values = []
1012         for position in (
1013             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1014             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1015             57, 62, 11, 36, 20, 34, 44, 52
1016         ):
1017             char_at_position = try_call(lambda: session_key[position])
1018             if char_at_position:
1019                 signature_values.append(char_at_position)
1020
1021         return ''.join(signature_values)[:32]
1022
1023     def _real_extract(self, url):
1024         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1025         if not is_video_url:
1026             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1027                            'To download audios, add a "/audio" to the URL')
1028
1029         signature = self._extract_signature(playlist_id)
1030
1031         def fetch_page(page_idx):
1032             query = {
1033                 'keyword': '',
1034                 'mid': playlist_id,
1035                 'order': 'pubdate',
1036                 'order_avoided': 'true',
1037                 'platform': 'web',
1038                 'pn': page_idx + 1,
1039                 'ps': 30,
1040                 'tid': 0,
1041                 'web_location': 1550101,
1042                 'wts': int(time.time()),
1043             }
1044             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1045
1046             try:
1047                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1048                                                playlist_id, note=f'Downloading page {page_idx}', query=query,
1049                                                headers={'referer': url})
1050             except ExtractorError as e:
1051                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1052                     raise ExtractorError(
1053                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1054                 raise
1055             if response['code'] in (-352, -401):
1056                 raise ExtractorError(
1057                     f'Request is blocked by server ({-response["code"]}), '
1058                     'please add cookies, wait and try later.', expected=True)
1059             return response['data']
1060
1061         def get_metadata(page_data):
1062             page_size = page_data['page']['ps']
1063             entry_count = page_data['page']['count']
1064             return {
1065                 'page_count': math.ceil(entry_count / page_size),
1066                 'page_size': page_size,
1067             }
1068
1069         def get_entries(page_data):
1070             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1071                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1072
1073         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1074         return self.playlist_result(paged_list, playlist_id)
1075
1076
1077 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1078     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1079     _TESTS = [{
1080         'url': 'https://space.bilibili.com/313580179/audio',
1081         'info_dict': {
1082             'id': '313580179',
1083         },
1084         'playlist_mincount': 1,
1085     }]
1086
1087     def _real_extract(self, url):
1088         playlist_id = self._match_id(url)
1089
1090         def fetch_page(page_idx):
1091             return self._download_json(
1092                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1093                 note=f'Downloading page {page_idx}',
1094                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1095
1096         def get_metadata(page_data):
1097             return {
1098                 'page_count': page_data['pageCount'],
1099                 'page_size': page_data['pageSize'],
1100             }
1101
1102         def get_entries(page_data):
1103             for entry in page_data.get('data', []):
1104                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1105
1106         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1107         return self.playlist_result(paged_list, playlist_id)
1108
1109
1110 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1111     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1112         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1113             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1114
1115     def _get_uploader(self, uid, playlist_id):
1116         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1117         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1118
1119     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1120         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1121         metadata.pop('page_count', None)
1122         metadata.pop('page_size', None)
1123         return metadata, page_list
1124
1125
1126 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1127     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1128     _TESTS = [{
1129         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1130         'info_dict': {
1131             'id': '2142762_57445',
1132             'title': '【完结】《底特律 变人》全结局流程解说',
1133             'description': '',
1134             'uploader': '老戴在此',
1135             'uploader_id': '2142762',
1136             'timestamp': int,
1137             'upload_date': str,
1138             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1139         },
1140         'playlist_mincount': 31,
1141     }]
1142
1143     def _real_extract(self, url):
1144         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1145         playlist_id = f'{mid}_{sid}'
1146
1147         def fetch_page(page_idx):
1148             return self._download_json(
1149                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1150                 playlist_id, note=f'Downloading page {page_idx}',
1151                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1152
1153         def get_metadata(page_data):
1154             page_size = page_data['page']['page_size']
1155             entry_count = page_data['page']['total']
1156             return {
1157                 'page_count': math.ceil(entry_count / page_size),
1158                 'page_size': page_size,
1159                 'uploader': self._get_uploader(mid, playlist_id),
1160                 **traverse_obj(page_data, {
1161                     'title': ('meta', 'name', {str}),
1162                     'description': ('meta', 'description', {str}),
1163                     'uploader_id': ('meta', 'mid', {str_or_none}),
1164                     'timestamp': ('meta', 'ptime', {int_or_none}),
1165                     'thumbnail': ('meta', 'cover', {url_or_none}),
1166                 })
1167             }
1168
1169         def get_entries(page_data):
1170             return self._get_entries(page_data, 'archives')
1171
1172         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1173         return self.playlist_result(paged_list, playlist_id, **metadata)
1174
1175
1176 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1177     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1178     _TESTS = [{
1179         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1180         'info_dict': {
1181             'id': '1958703906_547718',
1182             'title': '直播回放',
1183             'description': '直播回放',
1184             'uploader': '靡烟miya',
1185             'uploader_id': '1958703906',
1186             'timestamp': 1637985853,
1187             'upload_date': '20211127',
1188             'modified_timestamp': int,
1189             'modified_date': str,
1190         },
1191         'playlist_mincount': 513,
1192     }]
1193
1194     def _real_extract(self, url):
1195         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1196         playlist_id = f'{mid}_{sid}'
1197         playlist_meta = traverse_obj(self._download_json(
1198             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1199         ), {
1200             'title': ('data', 'meta', 'name', {str}),
1201             'description': ('data', 'meta', 'description', {str}),
1202             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1203             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1204             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1205         })
1206
1207         def fetch_page(page_idx):
1208             return self._download_json(
1209                 'https://api.bilibili.com/x/series/archives',
1210                 playlist_id, note=f'Downloading page {page_idx}',
1211                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1212
1213         def get_metadata(page_data):
1214             page_size = page_data['page']['size']
1215             entry_count = page_data['page']['total']
1216             return {
1217                 'page_count': math.ceil(entry_count / page_size),
1218                 'page_size': page_size,
1219                 'uploader': self._get_uploader(mid, playlist_id),
1220                 **playlist_meta
1221             }
1222
1223         def get_entries(page_data):
1224             return self._get_entries(page_data, 'archives')
1225
1226         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1227         return self.playlist_result(paged_list, playlist_id, **metadata)
1228
1229
1230 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1231     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1232     _TESTS = [{
1233         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1234         'info_dict': {
1235             'id': '1103407912',
1236             'title': '【V2】（旧）',
1237             'description': '',
1238             'uploader': '晓月春日',
1239             'uploader_id': '84912',
1240             'timestamp': 1604905176,
1241             'upload_date': '20201109',
1242             'modified_timestamp': int,
1243             'modified_date': str,
1244             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1245             'view_count': int,
1246             'like_count': int,
1247         },
1248         'playlist_mincount': 22,
1249     }, {
1250         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1251         'only_matching': True,
1252     }]
1253
1254     def _real_extract(self, url):
1255         fid = self._match_id(url)
1256
1257         list_info = self._download_json(
1258             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1259             fid, note='Downloading favlist metadata')
1260         if list_info['code'] == -403:
1261             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1262
1263         entries = self._get_entries(self._download_json(
1264             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1265             fid, note='Download favlist entries'), 'data')
1266
1267         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1268             'title': ('title', {str}),
1269             'description': ('intro', {str}),
1270             'uploader': ('upper', 'name', {str}),
1271             'uploader_id': ('upper', 'mid', {str_or_none}),
1272             'timestamp': ('ctime', {int_or_none}),
1273             'modified_timestamp': ('mtime', {int_or_none}),
1274             'thumbnail': ('cover', {url_or_none}),
1275             'view_count': ('cnt_info', 'play', {int_or_none}),
1276             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1277         })))
1278
1279
1280 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1281     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1282     _TESTS = [{
1283         'url': 'https://www.bilibili.com/watchlater/#/list',
1284         'info_dict': {'id': 'watchlater'},
1285         'playlist_mincount': 0,
1286         'skip': 'login required',
1287     }]
1288
1289     def _real_extract(self, url):
1290         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1291         watchlater_info = self._download_json(
1292             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1293         if watchlater_info['code'] == -101:
1294             self.raise_login_required(msg='You need to login to access your watchlater list')
1295         entries = self._get_entries(watchlater_info, ('data', 'list'))
1296         return self.playlist_result(entries, id=list_id, title='稍后再看')
1297
1298
1299 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1300     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1301     _TESTS = [{
1302         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1303         'info_dict': {
1304             'id': '5_547718',
1305             'title': '直播回放',
1306             'uploader': '靡烟miya',
1307             'uploader_id': '1958703906',
1308             'timestamp': 1637985853,
1309             'upload_date': '20211127',
1310         },
1311         'playlist_mincount': 513,
1312     }, {
1313         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1314         'info_dict': {
1315             'id': 'BV1DU4y1r7tz',
1316             'ext': 'mp4',
1317             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1318             'upload_date': '20220820',
1319             'description': '',
1320             'timestamp': 1661016330,
1321             'uploader_id': '1958703906',
1322             'uploader': '靡烟miya',
1323             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1324             'duration': 9552.903,
1325             'tags': list,
1326             'comment_count': int,
1327             'view_count': int,
1328             'like_count': int,
1329             '_old_archive_ids': ['bilibili 687146339_part1'],
1330         },
1331         'params': {'noplaylist': True},
1332     }, {
1333         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1334         'info_dict': {
1335             'id': '5_547718',
1336         },
1337         'playlist_mincount': 513,
1338         'skip': 'redirect url',
1339     }, {
1340         'url': 'https://www.bilibili.com/list/ml1103407912',
1341         'info_dict': {
1342             'id': '3_1103407912',
1343             'title': '【V2】（旧）',
1344             'uploader': '晓月春日',
1345             'uploader_id': '84912',
1346             'timestamp': 1604905176,
1347             'upload_date': '20201109',
1348             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1349         },
1350         'playlist_mincount': 22,
1351     }, {
1352         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1353         'info_dict': {
1354             'id': '3_1103407912',
1355         },
1356         'playlist_mincount': 22,
1357         'skip': 'redirect url',
1358     }, {
1359         'url': 'https://www.bilibili.com/list/watchlater',
1360         'info_dict': {'id': 'watchlater'},
1361         'playlist_mincount': 0,
1362         'skip': 'login required',
1363     }, {
1364         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1365         'info_dict': {'id': 'watchlater'},
1366         'playlist_mincount': 0,
1367         'skip': 'login required',
1368     }]
1369
1370     def _extract_medialist(self, query, list_id):
1371         for page_num in itertools.count(1):
1372             page_data = self._download_json(
1373                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1374                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1375             )['data']
1376             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1377             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1378             if not page_data.get('has_more', False):
1379                 break
1380
1381     def _real_extract(self, url):
1382         list_id = self._match_id(url)
1383
1384         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1385         if not self._yes_playlist(list_id, bvid):
1386             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1387
1388         webpage = self._download_webpage(url, list_id)
1389         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1390         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1391             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1392             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1393             if error_code == -400 and list_id == 'watchlater':
1394                 self.raise_login_required('You need to login to access your watchlater playlist')
1395             elif error_code == -403:
1396                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1397             elif error_code == 11010:
1398                 raise ExtractorError('Playlist is no longer available', expected=True)
1399             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1400
1401         query = {
1402             'ps': 20,
1403             'with_current': False,
1404             **traverse_obj(initial_state, {
1405                 'type': ('playlist', 'type', {int_or_none}),
1406                 'biz_id': ('playlist', 'id', {int_or_none}),
1407                 'tid': ('tid', {int_or_none}),
1408                 'sort_field': ('sortFiled', {int_or_none}),
1409                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1410             })
1411         }
1412         metadata = {
1413             'id': f'{query["type"]}_{query["biz_id"]}',
1414             **traverse_obj(initial_state, ('mediaListInfo', {
1415                 'title': ('title', {str}),
1416                 'uploader': ('upper', 'name', {str}),
1417                 'uploader_id': ('upper', 'mid', {str_or_none}),
1418                 'timestamp': ('ctime', {int_or_none}),
1419                 'thumbnail': ('cover', {url_or_none}),
1420             })),
1421         }
1422         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1423
1424
1425 class BilibiliCategoryIE(InfoExtractor):
1426     IE_NAME = 'Bilibili category extractor'
1427     _MAX_RESULTS = 1000000
1428     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1429     _TESTS = [{
1430         'url': 'https://www.bilibili.com/v/kichiku/mad',
1431         'info_dict': {
1432             'id': 'kichiku: mad',
1433             'title': 'kichiku: mad'
1434         },
1435         'playlist_mincount': 45,
1436         'params': {
1437             'playlistend': 45
1438         }
1439     }]
1440
1441     def _fetch_page(self, api_url, num_pages, query, page_num):
1442         parsed_json = self._download_json(
1443             api_url, query, query={'Search_key': query, 'pn': page_num},
1444             note='Extracting results from page %s of %s' % (page_num, num_pages))
1445
1446         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1447         if not video_list:
1448             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1449
1450         for video in video_list:
1451             yield self.url_result(
1452                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1453
1454     def _entries(self, category, subcategory, query):
1455         # map of categories : subcategories : RIDs
1456         rid_map = {
1457             'kichiku': {
1458                 'mad': 26,
1459                 'manual_vocaloid': 126,
1460                 'guide': 22,
1461                 'theatre': 216,
1462                 'course': 127
1463             },
1464         }
1465
1466         if category not in rid_map:
1467             raise ExtractorError(
1468                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1469         if subcategory not in rid_map[category]:
1470             raise ExtractorError(
1471                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1472         rid_value = rid_map[category][subcategory]
1473
1474         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1475         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1476         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1477         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1478         if count is None or not size:
1479             raise ExtractorError('Failed to calculate either page count or size')
1480
1481         num_pages = math.ceil(count / size)
1482
1483         return OnDemandPagedList(functools.partial(
1484             self._fetch_page, api_url, num_pages, query), size)
1485
1486     def _real_extract(self, url):
1487         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1488         query = '%s: %s' % (category, subcategory)
1489
1490         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1491
1492
1493 class BiliBiliSearchIE(SearchInfoExtractor):
1494     IE_DESC = 'Bilibili video search'
1495     _MAX_RESULTS = 100000
1496     _SEARCH_KEY = 'bilisearch'
1497     _TESTS = [{
1498         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1499         'playlist_count': 3,
1500         'info_dict': {
1501             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1502             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1503         },
1504         'playlist': [{
1505             'info_dict': {
1506                 'id': 'BV1n44y1Q7sc',
1507                 'ext': 'mp4',
1508                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1509                 'timestamp': 1669889987,
1510                 'upload_date': '20221201',
1511                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1512                 'tags': list,
1513                 'uploader': '靡烟miya',
1514                 'duration': 123.156,
1515                 'uploader_id': '1958703906',
1516                 'comment_count': int,
1517                 'view_count': int,
1518                 'like_count': int,
1519                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1520                 '_old_archive_ids': ['bilibili 988222410_part1'],
1521             },
1522         }],
1523     }]
1524
1525     def _search_results(self, query):
1526         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1527             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1528         for page_num in itertools.count(1):
1529             videos = self._download_json(
1530                 'https://api.bilibili.com/x/web-interface/search/type', query,
1531                 note=f'Extracting results from page {page_num}', query={
1532                     'Search_key': query,
1533                     'keyword': query,
1534                     'page': page_num,
1535                     'context': '',
1536                     'duration': 0,
1537                     'tids_2': '',
1538                     '__refresh__': 'true',
1539                     'search_type': 'video',
1540                     'tids': 0,
1541                     'highlight': 1,
1542                 })['data'].get('result')
1543             if not videos:
1544                 break
1545             for video in videos:
1546                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1547
1548
1549 class BilibiliAudioBaseIE(InfoExtractor):
1550     def _call_api(self, path, sid, query=None):
1551         if not query:
1552             query = {'sid': sid}
1553         return self._download_json(
1554             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1555             sid, query=query)['data']
1556
1557
1558 class BilibiliAudioIE(BilibiliAudioBaseIE):
1559     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1560     _TEST = {
1561         'url': 'https://www.bilibili.com/audio/au1003142',
1562         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1563         'info_dict': {
1564             'id': '1003142',
1565             'ext': 'm4a',
1566             'title': '【tsukimi】YELLOW / 神山羊',
1567             'artist': 'tsukimi',
1568             'comment_count': int,
1569             'description': 'YELLOW的mp3版！',
1570             'duration': 183,
1571             'subtitles': {
1572                 'origin': [{
1573                     'ext': 'lrc',
1574                 }],
1575             },
1576             'thumbnail': r're:^https?://.+\.jpg',
1577             'timestamp': 1564836614,
1578             'upload_date': '20190803',
1579             'uploader': 'tsukimi-つきみぐー',
1580             'view_count': int,
1581         },
1582     }
1583
1584     def _real_extract(self, url):
1585         au_id = self._match_id(url)
1586
1587         play_data = self._call_api('url', au_id)
1588         formats = [{
1589             'url': play_data['cdns'][0],
1590             'filesize': int_or_none(play_data.get('size')),
1591             'vcodec': 'none'
1592         }]
1593
1594         for a_format in formats:
1595             a_format.setdefault('http_headers', {}).update({
1596                 'Referer': url,
1597             })
1598
1599         song = self._call_api('song/info', au_id)
1600         title = song['title']
1601         statistic = song.get('statistic') or {}
1602
1603         subtitles = None
1604         lyric = song.get('lyric')
1605         if lyric:
1606             subtitles = {
1607                 'origin': [{
1608                     'url': lyric,
1609                 }]
1610             }
1611
1612         return {
1613             'id': au_id,
1614             'title': title,
1615             'formats': formats,
1616             'artist': song.get('author'),
1617             'comment_count': int_or_none(statistic.get('comment')),
1618             'description': song.get('intro'),
1619             'duration': int_or_none(song.get('duration')),
1620             'subtitles': subtitles,
1621             'thumbnail': song.get('cover'),
1622             'timestamp': int_or_none(song.get('passtime')),
1623             'uploader': song.get('uname'),
1624             'view_count': int_or_none(statistic.get('play')),
1625         }
1626
1627
1628 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1629     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1630     _TEST = {
1631         'url': 'https://www.bilibili.com/audio/am10624',
1632         'info_dict': {
1633             'id': '10624',
1634             'title': '每日新曲推荐（每日11:00更新）',
1635             'description': '每天11:00更新，为你推送最新音乐',
1636         },
1637         'playlist_count': 19,
1638     }
1639
1640     def _real_extract(self, url):
1641         am_id = self._match_id(url)
1642
1643         songs = self._call_api(
1644             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1645
1646         entries = []
1647         for song in songs:
1648             sid = str_or_none(song.get('id'))
1649             if not sid:
1650                 continue
1651             entries.append(self.url_result(
1652                 'https://www.bilibili.com/audio/au' + sid,
1653                 BilibiliAudioIE.ie_key(), sid))
1654
1655         if entries:
1656             album_data = self._call_api('menu/info', am_id) or {}
1657             album_title = album_data.get('title')
1658             if album_title:
1659                 for entry in entries:
1660                     entry['album'] = album_title
1661                 return self.playlist_result(
1662                     entries, am_id, album_title, album_data.get('intro'))
1663
1664         return self.playlist_result(entries, am_id)
1665
1666
1667 class BiliBiliPlayerIE(InfoExtractor):
1668     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1669     _TEST = {
1670         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1671         'only_matching': True,
1672     }
1673
1674     def _real_extract(self, url):
1675         video_id = self._match_id(url)
1676         return self.url_result(
1677             'http://www.bilibili.tv/video/av%s/' % video_id,
1678             ie=BiliBiliIE.ie_key(), video_id=video_id)
1679
1680
1681 class BiliIntlBaseIE(InfoExtractor):
1682     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1683     _NETRC_MACHINE = 'biliintl'
1684     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1685
1686     def _call_api(self, endpoint, *args, **kwargs):
1687         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1688         if json.get('code'):
1689             if json['code'] in (10004004, 10004005, 10023006):
1690                 self.raise_login_required()
1691             elif json['code'] == 10004001:
1692                 self.raise_geo_restricted()
1693             else:
1694                 if json.get('message') and str(json['code']) != json['message']:
1695                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1696                 else:
1697                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1698                 if kwargs.get('fatal'):
1699                     raise ExtractorError(errmsg)
1700                 else:
1701                     self.report_warning(errmsg)
1702         return json.get('data')
1703
1704     def json2srt(self, json):
1705         data = '\n\n'.join(
1706             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1707             for i, line in enumerate(traverse_obj(json, (
1708                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1709         return data
1710
1711     def _get_subtitles(self, *, ep_id=None, aid=None):
1712         sub_json = self._call_api(
1713             '/web/v2/subtitle', ep_id or aid, fatal=False,
1714             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1715             query=filter_dict({
1716                 'platform': 'web',
1717                 's_locale': 'en_US',
1718                 'episode_id': ep_id,
1719                 'aid': aid,
1720             })) or {}
1721         subtitles = {}
1722         fetched_urls = set()
1723         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1724             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1725                 if url in fetched_urls:
1726                     continue
1727                 fetched_urls.add(url)
1728                 sub_ext = determine_ext(url)
1729                 sub_lang = sub.get('lang_key') or 'en'
1730
1731                 if sub_ext == 'ass':
1732                     subtitles.setdefault(sub_lang, []).append({
1733                         'ext': 'ass',
1734                         'url': url,
1735                     })
1736                 elif sub_ext == 'json':
1737                     sub_data = self._download_json(
1738                         url, ep_id or aid, fatal=False,
1739                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1740                         errnote='Unable to download subtitles')
1741
1742                     if sub_data:
1743                         subtitles.setdefault(sub_lang, []).append({
1744                             'ext': 'srt',
1745                             'data': self.json2srt(sub_data),
1746                         })
1747                 else:
1748                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1749
1750         return subtitles
1751
1752     def _get_formats(self, *, ep_id=None, aid=None):
1753         video_json = self._call_api(
1754             '/web/playurl', ep_id or aid, note='Downloading video formats',
1755             errnote='Unable to download video formats', query=filter_dict({
1756                 'platform': 'web',
1757                 'ep_id': ep_id,
1758                 'aid': aid,
1759             }))
1760         video_json = video_json['playurl']
1761         formats = []
1762         for vid in video_json.get('video') or []:
1763             video_res = vid.get('video_resource') or {}
1764             video_info = vid.get('stream_info') or {}
1765             if not video_res.get('url'):
1766                 continue
1767             formats.append({
1768                 'url': video_res['url'],
1769                 'ext': 'mp4',
1770                 'format_note': video_info.get('desc_words'),
1771                 'width': video_res.get('width'),
1772                 'height': video_res.get('height'),
1773                 'vbr': video_res.get('bandwidth'),
1774                 'acodec': 'none',
1775                 'vcodec': video_res.get('codecs'),
1776                 'filesize': video_res.get('size'),
1777             })
1778         for aud in video_json.get('audio_resource') or []:
1779             if not aud.get('url'):
1780                 continue
1781             formats.append({
1782                 'url': aud['url'],
1783                 'ext': 'mp4',
1784                 'abr': aud.get('bandwidth'),
1785                 'acodec': aud.get('codecs'),
1786                 'vcodec': 'none',
1787                 'filesize': aud.get('size'),
1788             })
1789
1790         return formats
1791
1792     def _parse_video_metadata(self, video_data):
1793         return {
1794             'title': video_data.get('title_display') or video_data.get('title'),
1795             'description': video_data.get('desc'),
1796             'thumbnail': video_data.get('cover'),
1797             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1798             'episode_number': int_or_none(self._search_regex(
1799                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1800         }
1801
1802     def _perform_login(self, username, password):
1803         if not Cryptodome.RSA:
1804             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1805
1806         key_data = self._download_json(
1807             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1808             note='Downloading login key', errnote='Unable to download login key')['data']
1809
1810         public_key = Cryptodome.RSA.importKey(key_data['key'])
1811         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1812         login_post = self._download_json(
1813             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1814                 'username': username,
1815                 'password': base64.b64encode(password_hash).decode('ascii'),
1816                 'keep_me': 'true',
1817                 's_locale': 'en_US',
1818                 'isTrusted': 'true'
1819             }), note='Logging in', errnote='Unable to log in')
1820         if login_post.get('code'):
1821             if login_post.get('message'):
1822                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1823             else:
1824                 raise ExtractorError('Unable to log in')
1825
1826
1827 class BiliIntlIE(BiliIntlBaseIE):
1828     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1829     _TESTS = [{
1830         # Bstation page
1831         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1832         'info_dict': {
1833             'id': '341736',
1834             'ext': 'mp4',
1835             'title': 'E2 - The First Night',
1836             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1837             'episode_number': 2,
1838             'upload_date': '20201009',
1839             'episode': 'Episode 2',
1840             'timestamp': 1602259500,
1841             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1842             'chapters': [{
1843                 'start_time': 0,
1844                 'end_time': 76.242,
1845                 'title': '<Untitled Chapter 1>'
1846             }, {
1847                 'start_time': 76.242,
1848                 'end_time': 161.161,
1849                 'title': 'Intro'
1850             }, {
1851                 'start_time': 1325.742,
1852                 'end_time': 1403.903,
1853                 'title': 'Outro'
1854             }],
1855         }
1856     }, {
1857         # Non-Bstation page
1858         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1859         'info_dict': {
1860             'id': '11005006',
1861             'ext': 'mp4',
1862             'title': 'E3 - Who?',
1863             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1864             'episode_number': 3,
1865             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1866             'episode': 'Episode 3',
1867             'upload_date': '20211219',
1868             'timestamp': 1639928700,
1869             'chapters': [{
1870                 'start_time': 0,
1871                 'end_time': 88.0,
1872                 'title': '<Untitled Chapter 1>'
1873             }, {
1874                 'start_time': 88.0,
1875                 'end_time': 156.0,
1876                 'title': 'Intro'
1877             }, {
1878                 'start_time': 1173.0,
1879                 'end_time': 1259.535,
1880                 'title': 'Outro'
1881             }],
1882         }
1883     }, {
1884         # Subtitle with empty content
1885         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1886         'info_dict': {
1887             'id': '10131790',
1888             'ext': 'mp4',
1889             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1890             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1891             'episode_number': 140,
1892         },
1893         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1894     }, {
1895         # episode comment extraction
1896         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1897         'info_dict': {
1898             'id': '340317',
1899             'ext': 'mp4',
1900             'timestamp': 1604057820,
1901             'upload_date': '20201030',
1902             'episode_number': 5,
1903             'title': 'E5 - My Own Steel',
1904             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1905             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1906             'episode': 'Episode 5',
1907             'comment_count': int,
1908             'chapters': [{
1909                 'start_time': 0,
1910                 'end_time': 61.0,
1911                 'title': '<Untitled Chapter 1>'
1912             }, {
1913                 'start_time': 61.0,
1914                 'end_time': 134.0,
1915                 'title': 'Intro'
1916             }, {
1917                 'start_time': 1290.0,
1918                 'end_time': 1379.0,
1919                 'title': 'Outro'
1920             }],
1921         },
1922         'params': {
1923             'getcomments': True
1924         }
1925     }, {
1926         # user generated content comment extraction
1927         'url': 'https://www.bilibili.tv/en/video/2045730385',
1928         'info_dict': {
1929             'id': '2045730385',
1930             'ext': 'mp4',
1931             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1932             'timestamp': 1667891924,
1933             'upload_date': '20221108',
1934             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1935             'comment_count': int,
1936             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1937         },
1938         'params': {
1939             'getcomments': True
1940         }
1941     }, {
1942         # episode id without intro and outro
1943         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1944         'info_dict': {
1945             'id': '11246489',
1946             'ext': 'mp4',
1947             'title': 'E1 - Operation \'Strix\' <Owl>',
1948             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1949             'timestamp': 1649516400,
1950             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1951             'episode': 'Episode 1',
1952             'episode_number': 1,
1953             'upload_date': '20220409',
1954         },
1955     }, {
1956         'url': 'https://www.biliintl.com/en/play/34613/341736',
1957         'only_matching': True,
1958     }, {
1959         # User-generated content (as opposed to a series licensed from a studio)
1960         'url': 'https://bilibili.tv/en/video/2019955076',
1961         'only_matching': True,
1962     }, {
1963         # No language in URL
1964         'url': 'https://www.bilibili.tv/video/2019955076',
1965         'only_matching': True,
1966     }, {
1967         # Uppercase language in URL
1968         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1969         'only_matching': True,
1970     }]
1971
1972     @staticmethod
1973     def _make_url(video_id, series_id=None):
1974         if series_id:
1975             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1976         return f'https://www.bilibili.tv/en/video/{video_id}'
1977
1978     def _extract_video_metadata(self, url, video_id, season_id):
1979         url, smuggled_data = unsmuggle_url(url, {})
1980         if smuggled_data.get('title'):
1981             return smuggled_data
1982
1983         webpage = self._download_webpage(url, video_id)
1984         # Bstation layout
1985         initial_data = (
1986             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1987             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1988         video_data = traverse_obj(
1989             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1990
1991         if season_id and not video_data:
1992             # Non-Bstation layout, read through episode list
1993             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1994             video_data = traverse_obj(season_json, (
1995                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1996             ), expected_type=dict, get_all=False)
1997
1998         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1999         return merge_dicts(
2000             self._parse_video_metadata(video_data), {
2001                 'title': get_element_by_class(
2002                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2003                 'description': get_element_by_class(
2004                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2005             }, self._search_json_ld(webpage, video_id, default={}))
2006
2007     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2008         comment_api_raw_data = self._download_json(
2009             'https://api.bilibili.tv/reply/web/detail', display_id,
2010             note=f'Downloading reply comment of {root_id} - {next_id}',
2011             query={
2012                 'platform': 'web',
2013                 'ps': 20,  # comment's reply per page (default: 3)
2014                 'root': root_id,
2015                 'next': next_id,
2016             })
2017
2018         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2019             yield {
2020                 'author': traverse_obj(replies, ('member', 'name')),
2021                 'author_id': traverse_obj(replies, ('member', 'mid')),
2022                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2023                 'text': traverse_obj(replies, ('content', 'message')),
2024                 'id': replies.get('rpid'),
2025                 'like_count': int_or_none(replies.get('like_count')),
2026                 'parent': replies.get('parent'),
2027                 'timestamp': unified_timestamp(replies.get('ctime_text'))
2028             }
2029
2030         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2031             yield from self._get_comments_reply(
2032                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2033
2034     def _get_comments(self, video_id, ep_id):
2035         for i in itertools.count(0):
2036             comment_api_raw_data = self._download_json(
2037                 'https://api.bilibili.tv/reply/web/root', video_id,
2038                 note=f'Downloading comment page {i + 1}',
2039                 query={
2040                     'platform': 'web',
2041                     'pn': i,  # page number
2042                     'ps': 20,  # comment per page (default: 20)
2043                     'oid': video_id,
2044                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2045                     'sort_type': 1,  # 1: best, 2: recent
2046                 })
2047
2048             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2049                 yield {
2050                     'author': traverse_obj(replies, ('member', 'name')),
2051                     'author_id': traverse_obj(replies, ('member', 'mid')),
2052                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2053                     'text': traverse_obj(replies, ('content', 'message')),
2054                     'id': replies.get('rpid'),
2055                     'like_count': int_or_none(replies.get('like_count')),
2056                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2057                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2058                 }
2059                 if replies.get('count'):
2060                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2061
2062             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2063                 break
2064
2065     def _real_extract(self, url):
2066         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2067         video_id = ep_id or aid
2068         chapters = None
2069
2070         if ep_id:
2071             intro_ending_json = self._call_api(
2072                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2073                 video_id, fatal=False) or {}
2074             if intro_ending_json.get('skip'):
2075                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2076                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2077                 chapters = [{
2078                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2079                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2080                     'title': 'Intro'
2081                 }, {
2082                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2083                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2084                     'title': 'Outro'
2085                 }]
2086
2087         return {
2088             'id': video_id,
2089             **self._extract_video_metadata(url, video_id, season_id),
2090             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2091             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2092             'chapters': chapters,
2093             '__post_extractor': self.extract_comments(video_id, ep_id),
2094             'http_headers': self._HEADERS,
2095         }
2096
2097
2098 class BiliIntlSeriesIE(BiliIntlBaseIE):
2099     IE_NAME = 'biliIntl:series'
2100     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2101     _TESTS = [{
2102         'url': 'https://www.bilibili.tv/en/play/34613',
2103         'playlist_mincount': 15,
2104         'info_dict': {
2105             'id': '34613',
2106             'title': 'TONIKAWA: Over the Moon For You',
2107             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2108             'categories': ['Slice of life', 'Comedy', 'Romance'],
2109             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2110             'view_count': int,
2111         },
2112         'params': {
2113             'skip_download': True,
2114         },
2115     }, {
2116         'url': 'https://www.bilibili.tv/en/media/1048837',
2117         'info_dict': {
2118             'id': '1048837',
2119             'title': 'SPY×FAMILY',
2120             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2121             'categories': ['Adventure', 'Action', 'Comedy'],
2122             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2123             'view_count': int,
2124         },
2125         'playlist_mincount': 25,
2126     }, {
2127         'url': 'https://www.biliintl.com/en/play/34613',
2128         'only_matching': True,
2129     }, {
2130         'url': 'https://www.biliintl.com/EN/play/34613',
2131         'only_matching': True,
2132     }]
2133
2134     def _entries(self, series_id):
2135         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2136         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2137             episode_id = str(episode['episode_id'])
2138             yield self.url_result(smuggle_url(
2139                 BiliIntlIE._make_url(episode_id, series_id),
2140                 self._parse_video_metadata(episode)
2141             ), BiliIntlIE, episode_id)
2142
2143     def _real_extract(self, url):
2144         series_id = self._match_id(url)
2145         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2146         return self.playlist_result(
2147             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2148             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2149             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2150
2151
2152 class BiliLiveIE(InfoExtractor):
2153     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2154
2155     _TESTS = [{
2156         'url': 'https://live.bilibili.com/196',
2157         'info_dict': {
2158             'id': '33989',
2159             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
2160             'ext': 'flv',
2161             'title': "太空狼人杀联动，不被爆杀就算赢",
2162             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2163             'timestamp': 1650802769,
2164         },
2165         'skip': 'not live'
2166     }, {
2167         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2168         'only_matching': True
2169     }, {
2170         'url': 'https://live.bilibili.com/blanc/196',
2171         'only_matching': True
2172     }]
2173
2174     _FORMATS = {
2175         80: {'format_id': 'low', 'format_note': '流畅'},
2176         150: {'format_id': 'high_res', 'format_note': '高清'},
2177         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2178         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2179         10000: {'format_id': 'source', 'format_note': '原画'},
2180         20000: {'format_id': '4K', 'format_note': '4K'},
2181         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2182     }
2183
2184     _quality = staticmethod(qualities(list(_FORMATS)))
2185
2186     def _call_api(self, path, room_id, query):
2187         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2188         if api_result.get('code') != 0:
2189             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2190         return api_result.get('data') or {}
2191
2192     def _parse_formats(self, qn, fmt):
2193         for codec in fmt.get('codec') or []:
2194             if codec.get('current_qn') != qn:
2195                 continue
2196             for url_info in codec['url_info']:
2197                 yield {
2198                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2199                     'ext': fmt.get('format_name'),
2200                     'vcodec': codec.get('codec_name'),
2201                     'quality': self._quality(qn),
2202                     **self._FORMATS[qn],
2203                 }
2204
2205     def _real_extract(self, url):
2206         room_id = self._match_id(url)
2207         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2208         if room_data.get('live_status') == 0:
2209             raise ExtractorError('Streamer is not live', expected=True)
2210
2211         formats = []
2212         for qn in self._FORMATS.keys():
2213             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2214                 'room_id': room_id,
2215                 'qn': qn,
2216                 'codec': '0,1',
2217                 'format': '0,2',
2218                 'mask': '0',
2219                 'no_playurl': '0',
2220                 'platform': 'web',
2221                 'protocol': '0,1',
2222             })
2223             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2224                 formats.extend(self._parse_formats(qn, fmt))
2225
2226         return {
2227             'id': room_id,
2228             'title': room_data.get('title'),
2229             'description': room_data.get('description'),
2230             'thumbnail': room_data.get('user_cover'),
2231             'timestamp': stream_data.get('live_time'),
2232             'formats': formats,
2233             'is_live': True,
2234             'http_headers': {
2235                 'Referer': url,
2236             },
2237         }