yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10 import uuid
  11
  12 from .common import InfoExtractor, SearchInfoExtractor
  13 from ..dependencies import Cryptodome
  14 from ..networking.exceptions import HTTPError
  15 from ..utils import (
  16     ExtractorError,
  17     GeoRestrictedError,
  18     InAdvancePagedList,
  19     OnDemandPagedList,
  20     bool_or_none,
  21     clean_html,
  22     determine_ext,
  23     filter_dict,
  24     float_or_none,
  25     format_field,
  26     get_element_by_class,
  27     int_or_none,
  28     join_nonempty,
  29     make_archive_id,
  30     merge_dicts,
  31     mimetype2ext,
  32     parse_count,
  33     parse_qs,
  34     qualities,
  35     smuggle_url,
  36     srt_subtitles_timecode,
  37     str_or_none,
  38     traverse_obj,
  39     try_call,
  40     unified_timestamp,
  41     unsmuggle_url,
  42     url_or_none,
  43     urlencode_postdata,
  44     variadic,
  45 )
  46
  47
  48 class BilibiliBaseIE(InfoExtractor):
  49     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  50
  51     def extract_formats(self, play_info):
  52         format_names = {
  53             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  54             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  55         }
  56
  57         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  58         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  59         if flac_audio:
  60             audios.append(flac_audio)
  61         formats = [{
  62             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  63             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  64             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  65             'vcodec': 'none',
  66             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  67             'filesize': int_or_none(audio.get('size')),
  68             'format_id': str_or_none(audio.get('id')),
  69         } for audio in audios]
  70
  71         formats.extend({
  72             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  73             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  74             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  75             'width': int_or_none(video.get('width')),
  76             'height': int_or_none(video.get('height')),
  77             'vcodec': video.get('codecs'),
  78             'acodec': 'none' if audios else None,
  79             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  80             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  81             'filesize': int_or_none(video.get('size')),
  82             'quality': int_or_none(video.get('id')),
  83             'format_id': traverse_obj(
  84                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  85                 ('id', {str_or_none}), get_all=False),
  86             'format': format_names.get(video.get('id')),
  87         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  88
  89         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  90         if missing_formats:
  91             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  92                            f'you have to login or become premium member to download them. {self._login_hint()}')
  93
  94         return formats
  95
  96     def _download_playinfo(self, video_id, cid):
  97         return self._download_json(
  98             'https://api.bilibili.com/x/player/playurl', video_id,
  99             query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 100             note=f'Downloading video formats for cid {cid}')['data']
 101
 102     def json2srt(self, json_data):
 103         srt_data = ''
 104         for idx, line in enumerate(json_data.get('body') or []):
 105             srt_data += (f'{idx + 1}\n'
 106                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 107                          f'{line["content"]}\n\n')
 108         return srt_data
 109
 110     def _get_subtitles(self, video_id, cid, aid=None):
 111         subtitles = {
 112             'danmaku': [{
 113                 'ext': 'xml',
 114                 'url': f'https://comment.bilibili.com/{cid}.xml',
 115             }]
 116         }
 117
 118         subtitle_info = traverse_obj(self._download_json(
 119             'https://api.bilibili.com/x/player/v2', video_id,
 120             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 121             note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
 122         subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
 123         if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
 124             if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):  # no login session cookie
 125                 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
 126         for s in subs_list:
 127             subtitles.setdefault(s['lan'], []).append({
 128                 'ext': 'srt',
 129                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 130             })
 131         return subtitles
 132
 133     def _get_chapters(self, aid, cid):
 134         chapters = aid and cid and self._download_json(
 135             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 136             note='Extracting chapters', fatal=False)
 137         return traverse_obj(chapters, ('data', 'view_points', ..., {
 138             'title': 'content',
 139             'start_time': 'from',
 140             'end_time': 'to',
 141         })) or None
 142
 143     def _get_comments(self, aid):
 144         for idx in itertools.count(1):
 145             replies = traverse_obj(
 146                 self._download_json(
 147                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 148                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 149                 ('data', 'replies'))
 150             if not replies:
 151                 return
 152             for children in map(self._get_all_children, replies):
 153                 yield from children
 154
 155     def _get_all_children(self, reply):
 156         yield {
 157             'author': traverse_obj(reply, ('member', 'uname')),
 158             'author_id': traverse_obj(reply, ('member', 'mid')),
 159             'id': reply.get('rpid'),
 160             'text': traverse_obj(reply, ('content', 'message')),
 161             'timestamp': reply.get('ctime'),
 162             'parent': reply.get('parent') or 'root',
 163         }
 164         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 165             yield from children
 166
 167     def _get_episodes_from_season(self, ss_id, url):
 168         season_info = self._download_json(
 169             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 170             note='Downloading season info', query={'season_id': ss_id},
 171             headers={'Referer': url, **self.geo_verification_headers()})
 172
 173         for entry in traverse_obj(season_info, (
 174                 'result', 'main_section', 'episodes',
 175                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 176             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 177
 178     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 179         cid_edges = cid_edges or {}
 180         division_data = self._download_json(
 181             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 182             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 183             note=f'Extracting divisions from edge {edge_id}')
 184         edges.setdefault(edge_id, {}).update(
 185             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 186                 'title': ('title', {str}),
 187                 'cid': ('cid', {int_or_none}),
 188             }), get_all=False))
 189
 190         edges[edge_id].update(traverse_obj(division_data, ('data', {
 191             'title': ('title', {str}),
 192             'choices': ('edges', 'questions', ..., 'choices', ..., {
 193                 'edge_id': ('id', {int_or_none}),
 194                 'cid': ('cid', {int_or_none}),
 195                 'text': ('option', {str}),
 196             }),
 197         })))
 198         # use dict to combine edges that use the same video section (same cid)
 199         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 200         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 201             if choice['edge_id'] not in edges:
 202                 edges[choice['edge_id']] = {'cid': choice['cid']}
 203                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 204         return cid_edges
 205
 206     def _get_interactive_entries(self, video_id, cid, metainfo):
 207         graph_version = traverse_obj(
 208             self._download_json(
 209                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 210                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
 211             ('data', 'interaction', 'graph_version', {int_or_none}))
 212         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 213         for cid, edges in cid_edges.items():
 214             play_info = self._download_playinfo(video_id, cid)
 215             yield {
 216                 **metainfo,
 217                 'id': f'{video_id}_{cid}',
 218                 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
 219                 'formats': self.extract_formats(play_info),
 220                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 221                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 222                 'subtitles': self.extract_subtitles(video_id, cid),
 223             }
 224
 225
 226 class BiliBiliIE(BilibiliBaseIE):
 227     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 228
 229     _TESTS = [{
 230         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 231         'info_dict': {
 232             'id': 'BV13x41117TL',
 233             'title': '阿滴英文｜英文歌分享#6 "Closer',
 234             'ext': 'mp4',
 235             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 236             'uploader_id': '65880958',
 237             'uploader': '阿滴英文',
 238             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 239             'duration': 554.117,
 240             'tags': list,
 241             'comment_count': int,
 242             'upload_date': '20170301',
 243             'timestamp': 1488353834,
 244             'like_count': int,
 245             'view_count': int,
 246         },
 247     }, {
 248         'note': 'old av URL version',
 249         'url': 'http://www.bilibili.com/video/av1074402/',
 250         'info_dict': {
 251             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 252             'ext': 'mp4',
 253             'uploader': '菊子桑',
 254             'uploader_id': '156160',
 255             'id': 'BV11x411K7CN',
 256             'title': '【金坷垃】金泡沫',
 257             'duration': 308.36,
 258             'upload_date': '20140420',
 259             'timestamp': 1397983878,
 260             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 261             'like_count': int,
 262             'comment_count': int,
 263             'view_count': int,
 264             'tags': list,
 265         },
 266         'params': {'skip_download': True},
 267     }, {
 268         'note': 'Anthology',
 269         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 270         'info_dict': {
 271             'id': 'BV1bK411W797',
 272             'title': '物语中的人物是如何吐槽自己的OP的'
 273         },
 274         'playlist_count': 18,
 275         'playlist': [{
 276             'info_dict': {
 277                 'id': 'BV1bK411W797_p1',
 278                 'ext': 'mp4',
 279                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 280                 'tags': 'count:10',
 281                 'timestamp': 1589601697,
 282                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 283                 'uploader': '打牌还是打桩',
 284                 'uploader_id': '150259984',
 285                 'like_count': int,
 286                 'comment_count': int,
 287                 'upload_date': '20200516',
 288                 'view_count': int,
 289                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 290                 'duration': 90.314,
 291             }
 292         }]
 293     }, {
 294         'note': 'Specific page of Anthology',
 295         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 296         'info_dict': {
 297             'id': 'BV1bK411W797_p1',
 298             'ext': 'mp4',
 299             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 300             'tags': 'count:10',
 301             'timestamp': 1589601697,
 302             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 303             'uploader': '打牌还是打桩',
 304             'uploader_id': '150259984',
 305             'like_count': int,
 306             'comment_count': int,
 307             'upload_date': '20200516',
 308             'view_count': int,
 309             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 310             'duration': 90.314,
 311         }
 312     }, {
 313         'note': 'video has subtitles',
 314         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 315         'info_dict': {
 316             'id': 'BV12N4y1M7rh',
 317             'ext': 'mp4',
 318             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 319             'tags': list,
 320             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 321             'duration': 313.557,
 322             'upload_date': '20220709',
 323             'uploader': '小夫太渴',
 324             'timestamp': 1657347907,
 325             'uploader_id': '1326814124',
 326             'comment_count': int,
 327             'view_count': int,
 328             'like_count': int,
 329             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 330             'subtitles': 'count:2'
 331         },
 332         'params': {'listsubtitles': True},
 333     }, {
 334         'url': 'https://www.bilibili.com/video/av8903802/',
 335         'info_dict': {
 336             'id': 'BV13x41117TL',
 337             'ext': 'mp4',
 338             'title': '阿滴英文｜英文歌分享#6 "Closer',
 339             'upload_date': '20170301',
 340             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 341             'timestamp': 1488353834,
 342             'uploader_id': '65880958',
 343             'uploader': '阿滴英文',
 344             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 345             'duration': 554.117,
 346             'tags': list,
 347             'comment_count': int,
 348             'view_count': int,
 349             'like_count': int,
 350         },
 351         'params': {
 352             'skip_download': True,
 353         },
 354     }, {
 355         'note': 'video has chapter',
 356         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 357         'info_dict': {
 358             'id': 'BV1vL411G7N7',
 359             'ext': 'mp4',
 360             'title': '如何为你的B站视频添加进度条分段',
 361             'timestamp': 1634554558,
 362             'upload_date': '20211018',
 363             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 364             'tags': list,
 365             'uploader': '爱喝咖啡的当麻',
 366             'duration': 669.482,
 367             'uploader_id': '1680903',
 368             'chapters': 'count:6',
 369             'comment_count': int,
 370             'view_count': int,
 371             'like_count': int,
 372             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 373         },
 374         'params': {'skip_download': True},
 375     }, {
 376         'note': 'video redirects to festival page',
 377         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 378         'info_dict': {
 379             'id': 'BV1wP4y1P72h',
 380             'ext': 'mp4',
 381             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 382             'timestamp': 1643947497,
 383             'upload_date': '20220204',
 384             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 385             'uploader': '叨叨冯聊音乐',
 386             'duration': 246.719,
 387             'uploader_id': '528182630',
 388             'view_count': int,
 389             'like_count': int,
 390             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 391         },
 392         'params': {'skip_download': True},
 393     }, {
 394         'note': 'newer festival video',
 395         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 396         'info_dict': {
 397             'id': 'BV1ay4y1d77f',
 398             'ext': 'mp4',
 399             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 400             'timestamp': 1674273600,
 401             'upload_date': '20230121',
 402             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 403             'uploader': '果蝇轰',
 404             'duration': 1111.722,
 405             'uploader_id': '8469526',
 406             'view_count': int,
 407             'like_count': int,
 408             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 409         },
 410         'params': {'skip_download': True},
 411     }, {
 412         'note': 'interactive/split-path video',
 413         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 414         'info_dict': {
 415             'id': 'BV1af4y1H7ga',
 416             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 417             'timestamp': 1630500414,
 418             'upload_date': '20210901',
 419             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 420             'tags': list,
 421             'uploader': '钉宫妮妮Ninico',
 422             'duration': 1503,
 423             'uploader_id': '8881297',
 424             'comment_count': int,
 425             'view_count': int,
 426             'like_count': int,
 427             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 428         },
 429         'playlist_count': 33,
 430         'playlist': [{
 431             'info_dict': {
 432                 'id': 'BV1af4y1H7ga_400950101',
 433                 'ext': 'mp4',
 434                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 435                 'timestamp': 1630500414,
 436                 'upload_date': '20210901',
 437                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 438                 'tags': list,
 439                 'uploader': '钉宫妮妮Ninico',
 440                 'duration': 11.605,
 441                 'uploader_id': '8881297',
 442                 'comment_count': int,
 443                 'view_count': int,
 444                 'like_count': int,
 445                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 446             },
 447         }],
 448     }, {
 449         'note': '301 redirect to bangumi link',
 450         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 451         'info_dict': {
 452             'id': '288525',
 453             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 454             'ext': 'mp4',
 455             'series': '我和我的祖国',
 456             'series_id': '4780',
 457             'season': '幕后纪实',
 458             'season_id': '28609',
 459             'season_number': 1,
 460             'episode': '钱学森弹道和乘波体飞行器是什么？',
 461             'episode_id': '288525',
 462             'episode_number': 105,
 463             'duration': 1183.957,
 464             'timestamp': 1571648124,
 465             'upload_date': '20191021',
 466             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 467         },
 468     }, {
 469         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 470         'info_dict': {
 471             'id': 'BV1jL41167ZG',
 472             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 473             'ext': 'mp4',
 474         },
 475         'skip': 'supporter-only video',
 476     }, {
 477         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 478         'info_dict': {
 479             'id': 'BV1Ks411f7aQ',
 480             'title': '【BD1080P】狼与香辛料I【华盟】',
 481             'ext': 'mp4',
 482         },
 483         'skip': 'login required',
 484     }, {
 485         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 486         'info_dict': {
 487             'id': 'BV1GJ411x7h7',
 488             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 489             'ext': 'mp4',
 490         },
 491         'skip': 'geo-restricted',
 492     }]
 493
 494     def _real_extract(self, url):
 495         video_id = self._match_id(url)
 496         webpage, urlh = self._download_webpage_handle(url, video_id)
 497         if not self._match_valid_url(urlh.url):
 498             return self.url_result(urlh.url)
 499
 500         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 501
 502         is_festival = 'videoData' not in initial_state
 503         if is_festival:
 504             video_data = initial_state['videoInfo']
 505         else:
 506             play_info_obj = self._search_json(
 507                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 508             if not play_info_obj:
 509                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 510                     self.raise_login_required()
 511                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 512                     raise ExtractorError(
 513                         'This video may be deleted or geo-restricted. '
 514                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 515             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 516             if not play_info:
 517                 if traverse_obj(play_info_obj, 'code') == 87007:
 518                     toast = get_element_by_class('tips-toast', webpage) or ''
 519                     msg = clean_html(
 520                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 521                         + (get_element_by_class('level', toast) or ''))
 522                     raise ExtractorError(
 523                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 524                 raise ExtractorError('Failed to extract play info')
 525             video_data = initial_state['videoData']
 526
 527         video_id, title = video_data['bvid'], video_data.get('title')
 528
 529         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 530         page_list_json = not is_festival and traverse_obj(
 531             self._download_json(
 532                 'https://api.bilibili.com/x/player/pagelist', video_id,
 533                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 534                 note='Extracting videos in anthology'),
 535             'data', expected_type=list) or []
 536         is_anthology = len(page_list_json) > 1
 537
 538         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 539         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 540             return self.playlist_from_matches(
 541                 page_list_json, video_id, title, ie=BiliBiliIE,
 542                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 543
 544         if is_anthology:
 545             part_id = part_id or 1
 546             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 547
 548         aid = video_data.get('aid')
 549         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 550
 551         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 552
 553         festival_info = {}
 554         if is_festival:
 555             play_info = self._download_playinfo(video_id, cid)
 556
 557             festival_info = traverse_obj(initial_state, {
 558                 'uploader': ('videoInfo', 'upName'),
 559                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 560                 'like_count': ('videoStatus', 'like', {int_or_none}),
 561                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 562             }, get_all=False)
 563
 564         metainfo = {
 565             **traverse_obj(initial_state, {
 566                 'uploader': ('upData', 'name'),
 567                 'uploader_id': ('upData', 'mid', {str_or_none}),
 568                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 569                 'tags': ('tags', ..., 'tag_name'),
 570                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 571             }),
 572             **festival_info,
 573             **traverse_obj(video_data, {
 574                 'description': 'desc',
 575                 'timestamp': ('pubdate', {int_or_none}),
 576                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 577                 'comment_count': ('stat', 'reply', {int_or_none}),
 578             }, get_all=False),
 579             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 580             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 581             'title': title,
 582             'http_headers': {'Referer': url},
 583         }
 584
 585         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 586         if is_interactive:
 587             return self.playlist_result(
 588                 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
 589                     'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 590                     '__post_extractor': self.extract_comments(aid),
 591                 })
 592         else:
 593             return {
 594                 **metainfo,
 595                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 596                 'chapters': self._get_chapters(aid, cid),
 597                 'subtitles': self.extract_subtitles(video_id, cid),
 598                 'formats': self.extract_formats(play_info),
 599                 '__post_extractor': self.extract_comments(aid),
 600             }
 601
 602
 603 class BiliBiliBangumiIE(BilibiliBaseIE):
 604     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 605
 606     _TESTS = [{
 607         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 608         'info_dict': {
 609             'id': '21495',
 610             'ext': 'mp4',
 611             'series': '悠久之翼',
 612             'series_id': '774',
 613             'season': '第二季',
 614             'season_id': '1182',
 615             'season_number': 2,
 616             'episode': 'forever／ef',
 617             'episode_id': '21495',
 618             'episode_number': 12,
 619             'title': '12 forever／ef',
 620             'duration': 1420.791,
 621             'timestamp': 1320412200,
 622             'upload_date': '20111104',
 623             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 624         },
 625     }, {
 626         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 627         'info_dict': {
 628             'id': '267851',
 629             'ext': 'mp4',
 630             'series': '鬼灭之刃',
 631             'series_id': '4358',
 632             'season': '立志篇',
 633             'season_id': '26801',
 634             'season_number': 1,
 635             'episode': '残酷',
 636             'episode_id': '267851',
 637             'episode_number': 1,
 638             'title': '1 残酷',
 639             'duration': 1425.256,
 640             'timestamp': 1554566400,
 641             'upload_date': '20190406',
 642             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 643         },
 644         'skip': 'Geo-restricted',
 645     }, {
 646         'note': 'a making-of which falls outside main section',
 647         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 648         'info_dict': {
 649             'id': '345120',
 650             'ext': 'mp4',
 651             'series': '鬼灭之刃',
 652             'series_id': '4358',
 653             'season': '立志篇',
 654             'season_id': '26801',
 655             'season_number': 1,
 656             'episode': '炭治郎篇',
 657             'episode_id': '345120',
 658             'episode_number': 27,
 659             'title': '#1 炭治郎篇',
 660             'duration': 1922.129,
 661             'timestamp': 1602853860,
 662             'upload_date': '20201016',
 663             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 664         },
 665     }]
 666
 667     def _real_extract(self, url):
 668         episode_id = self._match_id(url)
 669         webpage = self._download_webpage(url, episode_id)
 670
 671         if '您所在的地区无法观看本片' in webpage:
 672             raise GeoRestrictedError('This video is restricted')
 673         elif '正在观看预览，大会员免费看全片' in webpage:
 674             self.raise_login_required('This video is for premium members only')
 675
 676         headers = {'Referer': url, **self.geo_verification_headers()}
 677         play_info = self._download_json(
 678             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 679             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 680             headers=headers)
 681         premium_only = play_info.get('code') == -10403
 682         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 683
 684         formats = self.extract_formats(play_info)
 685         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 686             self.raise_login_required('This video is for premium members only')
 687
 688         bangumi_info = self._download_json(
 689             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 690             query={'ep_id': episode_id}, headers=headers)['result']
 691
 692         episode_number, episode_info = next((
 693             (idx, ep) for idx, ep in enumerate(traverse_obj(
 694                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 695             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 696
 697         season_id = bangumi_info.get('season_id')
 698         season_number, season_title = season_id and next((
 699             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 700                 traverse_obj(bangumi_info, ('seasons', ...)))
 701             if e.get('season_id') == season_id
 702         ), (None, None))
 703
 704         aid = episode_info.get('aid')
 705
 706         return {
 707             'id': episode_id,
 708             'formats': formats,
 709             **traverse_obj(bangumi_info, {
 710                 'series': ('series', 'series_title', {str}),
 711                 'series_id': ('series', 'series_id', {str_or_none}),
 712                 'thumbnail': ('square_cover', {url_or_none}),
 713             }),
 714             **traverse_obj(episode_info, {
 715                 'episode': ('long_title', {str}),
 716                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 717                 'timestamp': ('pub_time', {int_or_none}),
 718                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 719             }),
 720             'episode_id': episode_id,
 721             'season': str_or_none(season_title),
 722             'season_id': str_or_none(season_id),
 723             'season_number': season_number,
 724             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 725             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 726             '__post_extractor': self.extract_comments(aid),
 727             'http_headers': headers,
 728         }
 729
 730
 731 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 732     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 733     _TESTS = [{
 734         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 735         'info_dict': {
 736             'id': '24097891',
 737             'title': 'CAROLE & TUESDAY',
 738             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 739         },
 740         'playlist_mincount': 25,
 741     }, {
 742         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 743         'info_dict': {
 744             'id': '1565',
 745             'title': '攻壳机动队 S.A.C. 2nd GIG',
 746             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 747         },
 748         'playlist_count': 26,
 749         'playlist': [{
 750             'info_dict': {
 751                 'id': '68540',
 752                 'ext': 'mp4',
 753                 'series': '攻壳机动队',
 754                 'series_id': '1077',
 755                 'season': '第二季',
 756                 'season_id': '1565',
 757                 'season_number': 2,
 758                 'episode': '再启动 REEMBODY',
 759                 'episode_id': '68540',
 760                 'episode_number': 1,
 761                 'title': '1 再启动 REEMBODY',
 762                 'duration': 1525.777,
 763                 'timestamp': 1425074413,
 764                 'upload_date': '20150227',
 765                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 766             },
 767         }],
 768     }]
 769
 770     def _real_extract(self, url):
 771         media_id = self._match_id(url)
 772         webpage = self._download_webpage(url, media_id)
 773
 774         initial_state = self._search_json(
 775             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 776         ss_id = initial_state['mediaInfo']['season_id']
 777
 778         return self.playlist_result(
 779             self._get_episodes_from_season(ss_id, url), media_id,
 780             **traverse_obj(initial_state, ('mediaInfo', {
 781                 'title': ('title', {str}),
 782                 'description': ('evaluate', {str}),
 783             })))
 784
 785
 786 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 787     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 788     _TESTS = [{
 789         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 790         'info_dict': {
 791             'id': '26801',
 792             'title': '鬼灭之刃',
 793             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 794         },
 795         'playlist_mincount': 26
 796     }, {
 797         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 798         'info_dict': {
 799             'id': '2251',
 800             'title': '玲音',
 801             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 802         },
 803         'playlist_count': 13,
 804         'playlist': [{
 805             'info_dict': {
 806                 'id': '50188',
 807                 'ext': 'mp4',
 808                 'series': '玲音',
 809                 'series_id': '1526',
 810                 'season': 'TV',
 811                 'season_id': '2251',
 812                 'season_number': 1,
 813                 'episode': 'WEIRD',
 814                 'episode_id': '50188',
 815                 'episode_number': 1,
 816                 'title': '1 WEIRD',
 817                 'duration': 1436.992,
 818                 'timestamp': 1343185080,
 819                 'upload_date': '20120725',
 820                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 821             },
 822         }],
 823     }]
 824
 825     def _real_extract(self, url):
 826         ss_id = self._match_id(url)
 827         webpage = self._download_webpage(url, ss_id)
 828         metainfo = traverse_obj(
 829             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
 830             ('itemListElement', ..., {
 831                 'title': ('name', {str}),
 832                 'description': ('description', {str}),
 833             }), get_all=False)
 834
 835         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
 836
 837
 838 class BilibiliCheeseBaseIE(BilibiliBaseIE):
 839     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
 840
 841     def _extract_episode(self, season_info, ep_id):
 842         episode_info = traverse_obj(season_info, (
 843             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
 844         aid, cid = episode_info['aid'], episode_info['cid']
 845
 846         if traverse_obj(episode_info, 'ep_status') == -1:
 847             raise ExtractorError('This course episode is not yet available.', expected=True)
 848         if not traverse_obj(episode_info, 'playable'):
 849             self.raise_login_required('You need to purchase the course to download this episode')
 850
 851         play_info = self._download_json(
 852             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
 853             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
 854             headers=self._HEADERS, note='Downloading playinfo')['data']
 855
 856         return {
 857             'id': str_or_none(ep_id),
 858             'episode_id': str_or_none(ep_id),
 859             'formats': self.extract_formats(play_info),
 860             'extractor_key': BilibiliCheeseIE.ie_key(),
 861             'extractor': BilibiliCheeseIE.IE_NAME,
 862             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
 863             **traverse_obj(episode_info, {
 864                 'episode': ('title', {str}),
 865                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
 866                 'alt_title': ('subtitle', {str}),
 867                 'duration': ('duration', {int_or_none}),
 868                 'episode_number': ('index', {int_or_none}),
 869                 'thumbnail': ('cover', {url_or_none}),
 870                 'timestamp': ('release_date', {int_or_none}),
 871                 'view_count': ('play', {int_or_none}),
 872             }),
 873             **traverse_obj(season_info, {
 874                 'uploader': ('up_info', 'uname', {str}),
 875                 'uploader_id': ('up_info', 'mid', {str_or_none}),
 876             }),
 877             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
 878             '__post_extractor': self.extract_comments(aid),
 879             'http_headers': self._HEADERS,
 880         }
 881
 882     def _download_season_info(self, query_key, video_id):
 883         return self._download_json(
 884             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
 885             headers=self._HEADERS, note='Downloading season info')['data']
 886
 887
 888 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
 889     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
 890     _TESTS = [{
 891         'url': 'https://www.bilibili.com/cheese/play/ep229832',
 892         'info_dict': {
 893             'id': '229832',
 894             'ext': 'mp4',
 895             'title': '1 - 课程先导片',
 896             'alt_title': '视频课 · 3分41秒',
 897             'uploader': '马督工',
 898             'uploader_id': '316568752',
 899             'episode': '课程先导片',
 900             'episode_id': '229832',
 901             'episode_number': 1,
 902             'duration': 221,
 903             'timestamp': 1695549606,
 904             'upload_date': '20230924',
 905             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 906             'view_count': int,
 907         }
 908     }]
 909
 910     def _real_extract(self, url):
 911         ep_id = self._match_id(url)
 912         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
 913
 914
 915 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
 916     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
 917     _TESTS = [{
 918         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 919         'info_dict': {
 920             'id': '5918',
 921             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 922             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 923         },
 924         'playlist': [{
 925             'info_dict': {
 926                 'id': '229832',
 927                 'ext': 'mp4',
 928                 'title': '1 - 课程先导片',
 929                 'alt_title': '视频课 · 3分41秒',
 930                 'uploader': '马督工',
 931                 'uploader_id': '316568752',
 932                 'episode': '课程先导片',
 933                 'episode_id': '229832',
 934                 'episode_number': 1,
 935                 'duration': 221,
 936                 'timestamp': 1695549606,
 937                 'upload_date': '20230924',
 938                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 939                 'view_count': int,
 940             }
 941         }],
 942         'params': {'playlist_items': '1'},
 943     }, {
 944         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 945         'info_dict': {
 946             'id': '5918',
 947             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 948             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 949         },
 950         'playlist_mincount': 5,
 951         'skip': 'paid video in list',
 952     }]
 953
 954     def _get_cheese_entries(self, season_info):
 955         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
 956             yield self._extract_episode(season_info, ep_id)
 957
 958     def _real_extract(self, url):
 959         season_id = self._match_id(url)
 960         season_info = self._download_season_info('season_id', season_id)
 961
 962         return self.playlist_result(
 963             self._get_cheese_entries(season_info), season_id,
 964             **traverse_obj(season_info, {
 965                 'title': ('title', {str}),
 966                 'description': ('subtitle', {str}),
 967             }))
 968
 969
 970 class BilibiliSpaceBaseIE(InfoExtractor):
 971     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 972         first_page = fetch_page(0)
 973         metadata = get_metadata(first_page)
 974
 975         paged_list = InAdvancePagedList(
 976             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 977             metadata['page_count'], metadata['page_size'])
 978
 979         return metadata, paged_list
 980
 981
 982 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 983     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 984     _TESTS = [{
 985         'url': 'https://space.bilibili.com/3985676/video',
 986         'info_dict': {
 987             'id': '3985676',
 988         },
 989         'playlist_mincount': 178,
 990     }, {
 991         'url': 'https://space.bilibili.com/313580179/video',
 992         'info_dict': {
 993             'id': '313580179',
 994         },
 995         'playlist_mincount': 92,
 996     }]
 997
 998     def _extract_signature(self, playlist_id):
 999         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1000
1001         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1002         img_key = traverse_obj(
1003             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1004         sub_key = traverse_obj(
1005             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1006
1007         session_key = img_key + sub_key
1008
1009         signature_values = []
1010         for position in (
1011             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1012             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1013             57, 62, 11, 36, 20, 34, 44, 52
1014         ):
1015             char_at_position = try_call(lambda: session_key[position])
1016             if char_at_position:
1017                 signature_values.append(char_at_position)
1018
1019         return ''.join(signature_values)[:32]
1020
1021     def _real_extract(self, url):
1022         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1023         if not is_video_url:
1024             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1025                            'To download audios, add a "/audio" to the URL')
1026
1027         signature = self._extract_signature(playlist_id)
1028
1029         def fetch_page(page_idx):
1030             query = {
1031                 'keyword': '',
1032                 'mid': playlist_id,
1033                 'order': 'pubdate',
1034                 'order_avoided': 'true',
1035                 'platform': 'web',
1036                 'pn': page_idx + 1,
1037                 'ps': 30,
1038                 'tid': 0,
1039                 'web_location': 1550101,
1040                 'wts': int(time.time()),
1041             }
1042             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1043
1044             try:
1045                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1046                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
1047             except ExtractorError as e:
1048                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1049                     raise ExtractorError(
1050                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1051                 raise
1052             if response['code'] == -401:
1053                 raise ExtractorError(
1054                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1055             return response['data']
1056
1057         def get_metadata(page_data):
1058             page_size = page_data['page']['ps']
1059             entry_count = page_data['page']['count']
1060             return {
1061                 'page_count': math.ceil(entry_count / page_size),
1062                 'page_size': page_size,
1063             }
1064
1065         def get_entries(page_data):
1066             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1067                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1068
1069         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1070         return self.playlist_result(paged_list, playlist_id)
1071
1072
1073 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1074     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1075     _TESTS = [{
1076         'url': 'https://space.bilibili.com/313580179/audio',
1077         'info_dict': {
1078             'id': '313580179',
1079         },
1080         'playlist_mincount': 1,
1081     }]
1082
1083     def _real_extract(self, url):
1084         playlist_id = self._match_id(url)
1085
1086         def fetch_page(page_idx):
1087             return self._download_json(
1088                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1089                 note=f'Downloading page {page_idx}',
1090                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1091
1092         def get_metadata(page_data):
1093             return {
1094                 'page_count': page_data['pageCount'],
1095                 'page_size': page_data['pageSize'],
1096             }
1097
1098         def get_entries(page_data):
1099             for entry in page_data.get('data', []):
1100                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1101
1102         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1103         return self.playlist_result(paged_list, playlist_id)
1104
1105
1106 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1107     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1108         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1109             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1110
1111     def _get_uploader(self, uid, playlist_id):
1112         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1113         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1114
1115     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1116         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1117         metadata.pop('page_count', None)
1118         metadata.pop('page_size', None)
1119         return metadata, page_list
1120
1121
1122 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1123     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1124     _TESTS = [{
1125         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1126         'info_dict': {
1127             'id': '2142762_57445',
1128             'title': '【完结】《底特律 变人》全结局流程解说',
1129             'description': '',
1130             'uploader': '老戴在此',
1131             'uploader_id': '2142762',
1132             'timestamp': int,
1133             'upload_date': str,
1134             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1135         },
1136         'playlist_mincount': 31,
1137     }]
1138
1139     def _real_extract(self, url):
1140         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1141         playlist_id = f'{mid}_{sid}'
1142
1143         def fetch_page(page_idx):
1144             return self._download_json(
1145                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1146                 playlist_id, note=f'Downloading page {page_idx}',
1147                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1148
1149         def get_metadata(page_data):
1150             page_size = page_data['page']['page_size']
1151             entry_count = page_data['page']['total']
1152             return {
1153                 'page_count': math.ceil(entry_count / page_size),
1154                 'page_size': page_size,
1155                 'uploader': self._get_uploader(mid, playlist_id),
1156                 **traverse_obj(page_data, {
1157                     'title': ('meta', 'name', {str}),
1158                     'description': ('meta', 'description', {str}),
1159                     'uploader_id': ('meta', 'mid', {str_or_none}),
1160                     'timestamp': ('meta', 'ptime', {int_or_none}),
1161                     'thumbnail': ('meta', 'cover', {url_or_none}),
1162                 })
1163             }
1164
1165         def get_entries(page_data):
1166             return self._get_entries(page_data, 'archives')
1167
1168         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1169         return self.playlist_result(paged_list, playlist_id, **metadata)
1170
1171
1172 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1173     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1174     _TESTS = [{
1175         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1176         'info_dict': {
1177             'id': '1958703906_547718',
1178             'title': '直播回放',
1179             'description': '直播回放',
1180             'uploader': '靡烟miya',
1181             'uploader_id': '1958703906',
1182             'timestamp': 1637985853,
1183             'upload_date': '20211127',
1184             'modified_timestamp': int,
1185             'modified_date': str,
1186         },
1187         'playlist_mincount': 513,
1188     }]
1189
1190     def _real_extract(self, url):
1191         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1192         playlist_id = f'{mid}_{sid}'
1193         playlist_meta = traverse_obj(self._download_json(
1194             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1195         ), {
1196             'title': ('data', 'meta', 'name', {str}),
1197             'description': ('data', 'meta', 'description', {str}),
1198             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1199             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1200             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1201         })
1202
1203         def fetch_page(page_idx):
1204             return self._download_json(
1205                 'https://api.bilibili.com/x/series/archives',
1206                 playlist_id, note=f'Downloading page {page_idx}',
1207                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1208
1209         def get_metadata(page_data):
1210             page_size = page_data['page']['size']
1211             entry_count = page_data['page']['total']
1212             return {
1213                 'page_count': math.ceil(entry_count / page_size),
1214                 'page_size': page_size,
1215                 'uploader': self._get_uploader(mid, playlist_id),
1216                 **playlist_meta
1217             }
1218
1219         def get_entries(page_data):
1220             return self._get_entries(page_data, 'archives')
1221
1222         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1223         return self.playlist_result(paged_list, playlist_id, **metadata)
1224
1225
1226 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1227     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1228     _TESTS = [{
1229         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1230         'info_dict': {
1231             'id': '1103407912',
1232             'title': '【V2】（旧）',
1233             'description': '',
1234             'uploader': '晓月春日',
1235             'uploader_id': '84912',
1236             'timestamp': 1604905176,
1237             'upload_date': '20201109',
1238             'modified_timestamp': int,
1239             'modified_date': str,
1240             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1241             'view_count': int,
1242             'like_count': int,
1243         },
1244         'playlist_mincount': 22,
1245     }, {
1246         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1247         'only_matching': True,
1248     }]
1249
1250     def _real_extract(self, url):
1251         fid = self._match_id(url)
1252
1253         list_info = self._download_json(
1254             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1255             fid, note='Downloading favlist metadata')
1256         if list_info['code'] == -403:
1257             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1258
1259         entries = self._get_entries(self._download_json(
1260             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1261             fid, note='Download favlist entries'), 'data')
1262
1263         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1264             'title': ('title', {str}),
1265             'description': ('intro', {str}),
1266             'uploader': ('upper', 'name', {str}),
1267             'uploader_id': ('upper', 'mid', {str_or_none}),
1268             'timestamp': ('ctime', {int_or_none}),
1269             'modified_timestamp': ('mtime', {int_or_none}),
1270             'thumbnail': ('cover', {url_or_none}),
1271             'view_count': ('cnt_info', 'play', {int_or_none}),
1272             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1273         })))
1274
1275
1276 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1277     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1278     _TESTS = [{
1279         'url': 'https://www.bilibili.com/watchlater/#/list',
1280         'info_dict': {'id': 'watchlater'},
1281         'playlist_mincount': 0,
1282         'skip': 'login required',
1283     }]
1284
1285     def _real_extract(self, url):
1286         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1287         watchlater_info = self._download_json(
1288             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1289         if watchlater_info['code'] == -101:
1290             self.raise_login_required(msg='You need to login to access your watchlater list')
1291         entries = self._get_entries(watchlater_info, ('data', 'list'))
1292         return self.playlist_result(entries, id=list_id, title='稍后再看')
1293
1294
1295 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1296     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1297     _TESTS = [{
1298         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1299         'info_dict': {
1300             'id': '5_547718',
1301             'title': '直播回放',
1302             'uploader': '靡烟miya',
1303             'uploader_id': '1958703906',
1304             'timestamp': 1637985853,
1305             'upload_date': '20211127',
1306         },
1307         'playlist_mincount': 513,
1308     }, {
1309         'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1310         'info_dict': {
1311             'id': 'BV1DU4y1r7tz',
1312             'ext': 'mp4',
1313             'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1314             'upload_date': '20220820',
1315             'description': '',
1316             'timestamp': 1661016330,
1317             'uploader_id': '1958703906',
1318             'uploader': '靡烟miya',
1319             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1320             'duration': 9552.903,
1321             'tags': list,
1322             'comment_count': int,
1323             'view_count': int,
1324             'like_count': int,
1325             '_old_archive_ids': ['bilibili 687146339_part1'],
1326         },
1327         'params': {'noplaylist': True},
1328     }, {
1329         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1330         'info_dict': {
1331             'id': '5_547718',
1332         },
1333         'playlist_mincount': 513,
1334         'skip': 'redirect url',
1335     }, {
1336         'url': 'https://www.bilibili.com/list/ml1103407912',
1337         'info_dict': {
1338             'id': '3_1103407912',
1339             'title': '【V2】（旧）',
1340             'uploader': '晓月春日',
1341             'uploader_id': '84912',
1342             'timestamp': 1604905176,
1343             'upload_date': '20201109',
1344             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1345         },
1346         'playlist_mincount': 22,
1347     }, {
1348         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1349         'info_dict': {
1350             'id': '3_1103407912',
1351         },
1352         'playlist_mincount': 22,
1353         'skip': 'redirect url',
1354     }, {
1355         'url': 'https://www.bilibili.com/list/watchlater',
1356         'info_dict': {'id': 'watchlater'},
1357         'playlist_mincount': 0,
1358         'skip': 'login required',
1359     }, {
1360         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1361         'info_dict': {'id': 'watchlater'},
1362         'playlist_mincount': 0,
1363         'skip': 'login required',
1364     }]
1365
1366     def _extract_medialist(self, query, list_id):
1367         for page_num in itertools.count(1):
1368             page_data = self._download_json(
1369                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1370                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1371             )['data']
1372             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1373             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1374             if not page_data.get('has_more', False):
1375                 break
1376
1377     def _real_extract(self, url):
1378         list_id = self._match_id(url)
1379
1380         bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1381         if not self._yes_playlist(list_id, bvid):
1382             return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1383
1384         webpage = self._download_webpage(url, list_id)
1385         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1386         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1387             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1388             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1389             if error_code == -400 and list_id == 'watchlater':
1390                 self.raise_login_required('You need to login to access your watchlater playlist')
1391             elif error_code == -403:
1392                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1393             elif error_code == 11010:
1394                 raise ExtractorError('Playlist is no longer available', expected=True)
1395             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1396
1397         query = {
1398             'ps': 20,
1399             'with_current': False,
1400             **traverse_obj(initial_state, {
1401                 'type': ('playlist', 'type', {int_or_none}),
1402                 'biz_id': ('playlist', 'id', {int_or_none}),
1403                 'tid': ('tid', {int_or_none}),
1404                 'sort_field': ('sortFiled', {int_or_none}),
1405                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1406             })
1407         }
1408         metadata = {
1409             'id': f'{query["type"]}_{query["biz_id"]}',
1410             **traverse_obj(initial_state, ('mediaListInfo', {
1411                 'title': ('title', {str}),
1412                 'uploader': ('upper', 'name', {str}),
1413                 'uploader_id': ('upper', 'mid', {str_or_none}),
1414                 'timestamp': ('ctime', {int_or_none}),
1415                 'thumbnail': ('cover', {url_or_none}),
1416             })),
1417         }
1418         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1419
1420
1421 class BilibiliCategoryIE(InfoExtractor):
1422     IE_NAME = 'Bilibili category extractor'
1423     _MAX_RESULTS = 1000000
1424     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1425     _TESTS = [{
1426         'url': 'https://www.bilibili.com/v/kichiku/mad',
1427         'info_dict': {
1428             'id': 'kichiku: mad',
1429             'title': 'kichiku: mad'
1430         },
1431         'playlist_mincount': 45,
1432         'params': {
1433             'playlistend': 45
1434         }
1435     }]
1436
1437     def _fetch_page(self, api_url, num_pages, query, page_num):
1438         parsed_json = self._download_json(
1439             api_url, query, query={'Search_key': query, 'pn': page_num},
1440             note='Extracting results from page %s of %s' % (page_num, num_pages))
1441
1442         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1443         if not video_list:
1444             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1445
1446         for video in video_list:
1447             yield self.url_result(
1448                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1449
1450     def _entries(self, category, subcategory, query):
1451         # map of categories : subcategories : RIDs
1452         rid_map = {
1453             'kichiku': {
1454                 'mad': 26,
1455                 'manual_vocaloid': 126,
1456                 'guide': 22,
1457                 'theatre': 216,
1458                 'course': 127
1459             },
1460         }
1461
1462         if category not in rid_map:
1463             raise ExtractorError(
1464                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1465         if subcategory not in rid_map[category]:
1466             raise ExtractorError(
1467                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1468         rid_value = rid_map[category][subcategory]
1469
1470         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1471         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1472         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1473         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1474         if count is None or not size:
1475             raise ExtractorError('Failed to calculate either page count or size')
1476
1477         num_pages = math.ceil(count / size)
1478
1479         return OnDemandPagedList(functools.partial(
1480             self._fetch_page, api_url, num_pages, query), size)
1481
1482     def _real_extract(self, url):
1483         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1484         query = '%s: %s' % (category, subcategory)
1485
1486         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1487
1488
1489 class BiliBiliSearchIE(SearchInfoExtractor):
1490     IE_DESC = 'Bilibili video search'
1491     _MAX_RESULTS = 100000
1492     _SEARCH_KEY = 'bilisearch'
1493     _TESTS = [{
1494         'url': 'bilisearch3:靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1495         'playlist_count': 3,
1496         'info_dict': {
1497             'id': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1498             'title': '靡烟 出道一年，我怎么还在等你单推的女人睡觉后开播啊',
1499         },
1500         'playlist': [{
1501             'info_dict': {
1502                 'id': 'BV1n44y1Q7sc',
1503                 'ext': 'mp4',
1504                 'title': '“出道一年，我怎么还在等你单推的女人睡觉后开播啊？”【一分钟了解靡烟miya】',
1505                 'timestamp': 1669889987,
1506                 'upload_date': '20221201',
1507                 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1508                 'tags': list,
1509                 'uploader': '靡烟miya',
1510                 'duration': 123.156,
1511                 'uploader_id': '1958703906',
1512                 'comment_count': int,
1513                 'view_count': int,
1514                 'like_count': int,
1515                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1516                 '_old_archive_ids': ['bilibili 988222410_part1'],
1517             },
1518         }],
1519     }]
1520
1521     def _search_results(self, query):
1522         if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1523             self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1524         for page_num in itertools.count(1):
1525             videos = self._download_json(
1526                 'https://api.bilibili.com/x/web-interface/search/type', query,
1527                 note=f'Extracting results from page {page_num}', query={
1528                     'Search_key': query,
1529                     'keyword': query,
1530                     'page': page_num,
1531                     'context': '',
1532                     'duration': 0,
1533                     'tids_2': '',
1534                     '__refresh__': 'true',
1535                     'search_type': 'video',
1536                     'tids': 0,
1537                     'highlight': 1,
1538                 })['data'].get('result')
1539             if not videos:
1540                 break
1541             for video in videos:
1542                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1543
1544
1545 class BilibiliAudioBaseIE(InfoExtractor):
1546     def _call_api(self, path, sid, query=None):
1547         if not query:
1548             query = {'sid': sid}
1549         return self._download_json(
1550             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1551             sid, query=query)['data']
1552
1553
1554 class BilibiliAudioIE(BilibiliAudioBaseIE):
1555     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1556     _TEST = {
1557         'url': 'https://www.bilibili.com/audio/au1003142',
1558         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1559         'info_dict': {
1560             'id': '1003142',
1561             'ext': 'm4a',
1562             'title': '【tsukimi】YELLOW / 神山羊',
1563             'artist': 'tsukimi',
1564             'comment_count': int,
1565             'description': 'YELLOW的mp3版！',
1566             'duration': 183,
1567             'subtitles': {
1568                 'origin': [{
1569                     'ext': 'lrc',
1570                 }],
1571             },
1572             'thumbnail': r're:^https?://.+\.jpg',
1573             'timestamp': 1564836614,
1574             'upload_date': '20190803',
1575             'uploader': 'tsukimi-つきみぐー',
1576             'view_count': int,
1577         },
1578     }
1579
1580     def _real_extract(self, url):
1581         au_id = self._match_id(url)
1582
1583         play_data = self._call_api('url', au_id)
1584         formats = [{
1585             'url': play_data['cdns'][0],
1586             'filesize': int_or_none(play_data.get('size')),
1587             'vcodec': 'none'
1588         }]
1589
1590         for a_format in formats:
1591             a_format.setdefault('http_headers', {}).update({
1592                 'Referer': url,
1593             })
1594
1595         song = self._call_api('song/info', au_id)
1596         title = song['title']
1597         statistic = song.get('statistic') or {}
1598
1599         subtitles = None
1600         lyric = song.get('lyric')
1601         if lyric:
1602             subtitles = {
1603                 'origin': [{
1604                     'url': lyric,
1605                 }]
1606             }
1607
1608         return {
1609             'id': au_id,
1610             'title': title,
1611             'formats': formats,
1612             'artist': song.get('author'),
1613             'comment_count': int_or_none(statistic.get('comment')),
1614             'description': song.get('intro'),
1615             'duration': int_or_none(song.get('duration')),
1616             'subtitles': subtitles,
1617             'thumbnail': song.get('cover'),
1618             'timestamp': int_or_none(song.get('passtime')),
1619             'uploader': song.get('uname'),
1620             'view_count': int_or_none(statistic.get('play')),
1621         }
1622
1623
1624 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1625     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1626     _TEST = {
1627         'url': 'https://www.bilibili.com/audio/am10624',
1628         'info_dict': {
1629             'id': '10624',
1630             'title': '每日新曲推荐（每日11:00更新）',
1631             'description': '每天11:00更新，为你推送最新音乐',
1632         },
1633         'playlist_count': 19,
1634     }
1635
1636     def _real_extract(self, url):
1637         am_id = self._match_id(url)
1638
1639         songs = self._call_api(
1640             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1641
1642         entries = []
1643         for song in songs:
1644             sid = str_or_none(song.get('id'))
1645             if not sid:
1646                 continue
1647             entries.append(self.url_result(
1648                 'https://www.bilibili.com/audio/au' + sid,
1649                 BilibiliAudioIE.ie_key(), sid))
1650
1651         if entries:
1652             album_data = self._call_api('menu/info', am_id) or {}
1653             album_title = album_data.get('title')
1654             if album_title:
1655                 for entry in entries:
1656                     entry['album'] = album_title
1657                 return self.playlist_result(
1658                     entries, am_id, album_title, album_data.get('intro'))
1659
1660         return self.playlist_result(entries, am_id)
1661
1662
1663 class BiliBiliPlayerIE(InfoExtractor):
1664     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1665     _TEST = {
1666         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1667         'only_matching': True,
1668     }
1669
1670     def _real_extract(self, url):
1671         video_id = self._match_id(url)
1672         return self.url_result(
1673             'http://www.bilibili.tv/video/av%s/' % video_id,
1674             ie=BiliBiliIE.ie_key(), video_id=video_id)
1675
1676
1677 class BiliIntlBaseIE(InfoExtractor):
1678     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1679     _NETRC_MACHINE = 'biliintl'
1680     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1681
1682     def _call_api(self, endpoint, *args, **kwargs):
1683         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1684         if json.get('code'):
1685             if json['code'] in (10004004, 10004005, 10023006):
1686                 self.raise_login_required()
1687             elif json['code'] == 10004001:
1688                 self.raise_geo_restricted()
1689             else:
1690                 if json.get('message') and str(json['code']) != json['message']:
1691                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1692                 else:
1693                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1694                 if kwargs.get('fatal'):
1695                     raise ExtractorError(errmsg)
1696                 else:
1697                     self.report_warning(errmsg)
1698         return json.get('data')
1699
1700     def json2srt(self, json):
1701         data = '\n\n'.join(
1702             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1703             for i, line in enumerate(traverse_obj(json, (
1704                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1705         return data
1706
1707     def _get_subtitles(self, *, ep_id=None, aid=None):
1708         sub_json = self._call_api(
1709             '/web/v2/subtitle', ep_id or aid, fatal=False,
1710             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1711             query=filter_dict({
1712                 'platform': 'web',
1713                 's_locale': 'en_US',
1714                 'episode_id': ep_id,
1715                 'aid': aid,
1716             })) or {}
1717         subtitles = {}
1718         fetched_urls = set()
1719         for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1720             for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1721                 if url in fetched_urls:
1722                     continue
1723                 fetched_urls.add(url)
1724                 sub_ext = determine_ext(url)
1725                 sub_lang = sub.get('lang_key') or 'en'
1726
1727                 if sub_ext == 'ass':
1728                     subtitles.setdefault(sub_lang, []).append({
1729                         'ext': 'ass',
1730                         'url': url,
1731                     })
1732                 elif sub_ext == 'json':
1733                     sub_data = self._download_json(
1734                         url, ep_id or aid, fatal=False,
1735                         note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1736                         errnote='Unable to download subtitles')
1737
1738                     if sub_data:
1739                         subtitles.setdefault(sub_lang, []).append({
1740                             'ext': 'srt',
1741                             'data': self.json2srt(sub_data),
1742                         })
1743                 else:
1744                     self.report_warning('Unexpected subtitle extension', ep_id or aid)
1745
1746         return subtitles
1747
1748     def _get_formats(self, *, ep_id=None, aid=None):
1749         video_json = self._call_api(
1750             '/web/playurl', ep_id or aid, note='Downloading video formats',
1751             errnote='Unable to download video formats', query=filter_dict({
1752                 'platform': 'web',
1753                 'ep_id': ep_id,
1754                 'aid': aid,
1755             }))
1756         video_json = video_json['playurl']
1757         formats = []
1758         for vid in video_json.get('video') or []:
1759             video_res = vid.get('video_resource') or {}
1760             video_info = vid.get('stream_info') or {}
1761             if not video_res.get('url'):
1762                 continue
1763             formats.append({
1764                 'url': video_res['url'],
1765                 'ext': 'mp4',
1766                 'format_note': video_info.get('desc_words'),
1767                 'width': video_res.get('width'),
1768                 'height': video_res.get('height'),
1769                 'vbr': video_res.get('bandwidth'),
1770                 'acodec': 'none',
1771                 'vcodec': video_res.get('codecs'),
1772                 'filesize': video_res.get('size'),
1773             })
1774         for aud in video_json.get('audio_resource') or []:
1775             if not aud.get('url'):
1776                 continue
1777             formats.append({
1778                 'url': aud['url'],
1779                 'ext': 'mp4',
1780                 'abr': aud.get('bandwidth'),
1781                 'acodec': aud.get('codecs'),
1782                 'vcodec': 'none',
1783                 'filesize': aud.get('size'),
1784             })
1785
1786         return formats
1787
1788     def _parse_video_metadata(self, video_data):
1789         return {
1790             'title': video_data.get('title_display') or video_data.get('title'),
1791             'description': video_data.get('desc'),
1792             'thumbnail': video_data.get('cover'),
1793             'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1794             'episode_number': int_or_none(self._search_regex(
1795                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1796         }
1797
1798     def _perform_login(self, username, password):
1799         if not Cryptodome.RSA:
1800             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1801
1802         key_data = self._download_json(
1803             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1804             note='Downloading login key', errnote='Unable to download login key')['data']
1805
1806         public_key = Cryptodome.RSA.importKey(key_data['key'])
1807         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1808         login_post = self._download_json(
1809             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1810                 'username': username,
1811                 'password': base64.b64encode(password_hash).decode('ascii'),
1812                 'keep_me': 'true',
1813                 's_locale': 'en_US',
1814                 'isTrusted': 'true'
1815             }), note='Logging in', errnote='Unable to log in')
1816         if login_post.get('code'):
1817             if login_post.get('message'):
1818                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1819             else:
1820                 raise ExtractorError('Unable to log in')
1821
1822
1823 class BiliIntlIE(BiliIntlBaseIE):
1824     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1825     _TESTS = [{
1826         # Bstation page
1827         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1828         'info_dict': {
1829             'id': '341736',
1830             'ext': 'mp4',
1831             'title': 'E2 - The First Night',
1832             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1833             'episode_number': 2,
1834             'upload_date': '20201009',
1835             'episode': 'Episode 2',
1836             'timestamp': 1602259500,
1837             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1838             'chapters': [{
1839                 'start_time': 0,
1840                 'end_time': 76.242,
1841                 'title': '<Untitled Chapter 1>'
1842             }, {
1843                 'start_time': 76.242,
1844                 'end_time': 161.161,
1845                 'title': 'Intro'
1846             }, {
1847                 'start_time': 1325.742,
1848                 'end_time': 1403.903,
1849                 'title': 'Outro'
1850             }],
1851         }
1852     }, {
1853         # Non-Bstation page
1854         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1855         'info_dict': {
1856             'id': '11005006',
1857             'ext': 'mp4',
1858             'title': 'E3 - Who?',
1859             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1860             'episode_number': 3,
1861             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1862             'episode': 'Episode 3',
1863             'upload_date': '20211219',
1864             'timestamp': 1639928700,
1865             'chapters': [{
1866                 'start_time': 0,
1867                 'end_time': 88.0,
1868                 'title': '<Untitled Chapter 1>'
1869             }, {
1870                 'start_time': 88.0,
1871                 'end_time': 156.0,
1872                 'title': 'Intro'
1873             }, {
1874                 'start_time': 1173.0,
1875                 'end_time': 1259.535,
1876                 'title': 'Outro'
1877             }],
1878         }
1879     }, {
1880         # Subtitle with empty content
1881         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1882         'info_dict': {
1883             'id': '10131790',
1884             'ext': 'mp4',
1885             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1886             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1887             'episode_number': 140,
1888         },
1889         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1890     }, {
1891         # episode comment extraction
1892         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1893         'info_dict': {
1894             'id': '340317',
1895             'ext': 'mp4',
1896             'timestamp': 1604057820,
1897             'upload_date': '20201030',
1898             'episode_number': 5,
1899             'title': 'E5 - My Own Steel',
1900             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1901             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1902             'episode': 'Episode 5',
1903             'comment_count': int,
1904             'chapters': [{
1905                 'start_time': 0,
1906                 'end_time': 61.0,
1907                 'title': '<Untitled Chapter 1>'
1908             }, {
1909                 'start_time': 61.0,
1910                 'end_time': 134.0,
1911                 'title': 'Intro'
1912             }, {
1913                 'start_time': 1290.0,
1914                 'end_time': 1379.0,
1915                 'title': 'Outro'
1916             }],
1917         },
1918         'params': {
1919             'getcomments': True
1920         }
1921     }, {
1922         # user generated content comment extraction
1923         'url': 'https://www.bilibili.tv/en/video/2045730385',
1924         'info_dict': {
1925             'id': '2045730385',
1926             'ext': 'mp4',
1927             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1928             'timestamp': 1667891924,
1929             'upload_date': '20221108',
1930             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1931             'comment_count': int,
1932             'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1933         },
1934         'params': {
1935             'getcomments': True
1936         }
1937     }, {
1938         # episode id without intro and outro
1939         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1940         'info_dict': {
1941             'id': '11246489',
1942             'ext': 'mp4',
1943             'title': 'E1 - Operation \'Strix\' <Owl>',
1944             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1945             'timestamp': 1649516400,
1946             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1947             'episode': 'Episode 1',
1948             'episode_number': 1,
1949             'upload_date': '20220409',
1950         },
1951     }, {
1952         'url': 'https://www.biliintl.com/en/play/34613/341736',
1953         'only_matching': True,
1954     }, {
1955         # User-generated content (as opposed to a series licensed from a studio)
1956         'url': 'https://bilibili.tv/en/video/2019955076',
1957         'only_matching': True,
1958     }, {
1959         # No language in URL
1960         'url': 'https://www.bilibili.tv/video/2019955076',
1961         'only_matching': True,
1962     }, {
1963         # Uppercase language in URL
1964         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1965         'only_matching': True,
1966     }]
1967
1968     @staticmethod
1969     def _make_url(video_id, series_id=None):
1970         if series_id:
1971             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1972         return f'https://www.bilibili.tv/en/video/{video_id}'
1973
1974     def _extract_video_metadata(self, url, video_id, season_id):
1975         url, smuggled_data = unsmuggle_url(url, {})
1976         if smuggled_data.get('title'):
1977             return smuggled_data
1978
1979         webpage = self._download_webpage(url, video_id)
1980         # Bstation layout
1981         initial_data = (
1982             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1983             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1984         video_data = traverse_obj(
1985             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1986
1987         if season_id and not video_data:
1988             # Non-Bstation layout, read through episode list
1989             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1990             video_data = traverse_obj(season_json, (
1991                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1992             ), expected_type=dict, get_all=False)
1993
1994         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1995         return merge_dicts(
1996             self._parse_video_metadata(video_data), {
1997                 'title': get_element_by_class(
1998                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
1999                 'description': get_element_by_class(
2000                     'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2001             }, self._search_json_ld(webpage, video_id, default={}))
2002
2003     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2004         comment_api_raw_data = self._download_json(
2005             'https://api.bilibili.tv/reply/web/detail', display_id,
2006             note=f'Downloading reply comment of {root_id} - {next_id}',
2007             query={
2008                 'platform': 'web',
2009                 'ps': 20,  # comment's reply per page (default: 3)
2010                 'root': root_id,
2011                 'next': next_id,
2012             })
2013
2014         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2015             yield {
2016                 'author': traverse_obj(replies, ('member', 'name')),
2017                 'author_id': traverse_obj(replies, ('member', 'mid')),
2018                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2019                 'text': traverse_obj(replies, ('content', 'message')),
2020                 'id': replies.get('rpid'),
2021                 'like_count': int_or_none(replies.get('like_count')),
2022                 'parent': replies.get('parent'),
2023                 'timestamp': unified_timestamp(replies.get('ctime_text'))
2024             }
2025
2026         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2027             yield from self._get_comments_reply(
2028                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2029
2030     def _get_comments(self, video_id, ep_id):
2031         for i in itertools.count(0):
2032             comment_api_raw_data = self._download_json(
2033                 'https://api.bilibili.tv/reply/web/root', video_id,
2034                 note=f'Downloading comment page {i + 1}',
2035                 query={
2036                     'platform': 'web',
2037                     'pn': i,  # page number
2038                     'ps': 20,  # comment per page (default: 20)
2039                     'oid': video_id,
2040                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
2041                     'sort_type': 1,  # 1: best, 2: recent
2042                 })
2043
2044             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2045                 yield {
2046                     'author': traverse_obj(replies, ('member', 'name')),
2047                     'author_id': traverse_obj(replies, ('member', 'mid')),
2048                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2049                     'text': traverse_obj(replies, ('content', 'message')),
2050                     'id': replies.get('rpid'),
2051                     'like_count': int_or_none(replies.get('like_count')),
2052                     'timestamp': unified_timestamp(replies.get('ctime_text')),
2053                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2054                 }
2055                 if replies.get('count'):
2056                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2057
2058             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2059                 break
2060
2061     def _real_extract(self, url):
2062         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2063         video_id = ep_id or aid
2064         chapters = None
2065
2066         if ep_id:
2067             intro_ending_json = self._call_api(
2068                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2069                 video_id, fatal=False) or {}
2070             if intro_ending_json.get('skip'):
2071                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2072                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2073                 chapters = [{
2074                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2075                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2076                     'title': 'Intro'
2077                 }, {
2078                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2079                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2080                     'title': 'Outro'
2081                 }]
2082
2083         return {
2084             'id': video_id,
2085             **self._extract_video_metadata(url, video_id, season_id),
2086             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2087             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2088             'chapters': chapters,
2089             '__post_extractor': self.extract_comments(video_id, ep_id),
2090             'http_headers': self._HEADERS,
2091         }
2092
2093
2094 class BiliIntlSeriesIE(BiliIntlBaseIE):
2095     IE_NAME = 'biliIntl:series'
2096     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2097     _TESTS = [{
2098         'url': 'https://www.bilibili.tv/en/play/34613',
2099         'playlist_mincount': 15,
2100         'info_dict': {
2101             'id': '34613',
2102             'title': 'TONIKAWA: Over the Moon For You',
2103             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2104             'categories': ['Slice of life', 'Comedy', 'Romance'],
2105             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2106             'view_count': int,
2107         },
2108         'params': {
2109             'skip_download': True,
2110         },
2111     }, {
2112         'url': 'https://www.bilibili.tv/en/media/1048837',
2113         'info_dict': {
2114             'id': '1048837',
2115             'title': 'SPY×FAMILY',
2116             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2117             'categories': ['Adventure', 'Action', 'Comedy'],
2118             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2119             'view_count': int,
2120         },
2121         'playlist_mincount': 25,
2122     }, {
2123         'url': 'https://www.biliintl.com/en/play/34613',
2124         'only_matching': True,
2125     }, {
2126         'url': 'https://www.biliintl.com/EN/play/34613',
2127         'only_matching': True,
2128     }]
2129
2130     def _entries(self, series_id):
2131         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2132         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2133             episode_id = str(episode['episode_id'])
2134             yield self.url_result(smuggle_url(
2135                 BiliIntlIE._make_url(episode_id, series_id),
2136                 self._parse_video_metadata(episode)
2137             ), BiliIntlIE, episode_id)
2138
2139     def _real_extract(self, url):
2140         series_id = self._match_id(url)
2141         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2142         return self.playlist_result(
2143             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2144             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2145             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2146
2147
2148 class BiliLiveIE(InfoExtractor):
2149     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2150
2151     _TESTS = [{
2152         'url': 'https://live.bilibili.com/196',
2153         'info_dict': {
2154             'id': '33989',
2155             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
2156             'ext': 'flv',
2157             'title': "太空狼人杀联动，不被爆杀就算赢",
2158             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2159             'timestamp': 1650802769,
2160         },
2161         'skip': 'not live'
2162     }, {
2163         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2164         'only_matching': True
2165     }, {
2166         'url': 'https://live.bilibili.com/blanc/196',
2167         'only_matching': True
2168     }]
2169
2170     _FORMATS = {
2171         80: {'format_id': 'low', 'format_note': '流畅'},
2172         150: {'format_id': 'high_res', 'format_note': '高清'},
2173         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2174         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2175         10000: {'format_id': 'source', 'format_note': '原画'},
2176         20000: {'format_id': '4K', 'format_note': '4K'},
2177         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2178     }
2179
2180     _quality = staticmethod(qualities(list(_FORMATS)))
2181
2182     def _call_api(self, path, room_id, query):
2183         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2184         if api_result.get('code') != 0:
2185             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2186         return api_result.get('data') or {}
2187
2188     def _parse_formats(self, qn, fmt):
2189         for codec in fmt.get('codec') or []:
2190             if codec.get('current_qn') != qn:
2191                 continue
2192             for url_info in codec['url_info']:
2193                 yield {
2194                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2195                     'ext': fmt.get('format_name'),
2196                     'vcodec': codec.get('codec_name'),
2197                     'quality': self._quality(qn),
2198                     **self._FORMATS[qn],
2199                 }
2200
2201     def _real_extract(self, url):
2202         room_id = self._match_id(url)
2203         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2204         if room_data.get('live_status') == 0:
2205             raise ExtractorError('Streamer is not live', expected=True)
2206
2207         formats = []
2208         for qn in self._FORMATS.keys():
2209             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2210                 'room_id': room_id,
2211                 'qn': qn,
2212                 'codec': '0,1',
2213                 'format': '0,2',
2214                 'mask': '0',
2215                 'no_playurl': '0',
2216                 'platform': 'web',
2217                 'protocol': '0,1',
2218             })
2219             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2220                 formats.extend(self._parse_formats(qn, fmt))
2221
2222         return {
2223             'id': room_id,
2224             'title': room_data.get('title'),
2225             'description': room_data.get('description'),
2226             'thumbnail': room_data.get('user_cover'),
2227             'timestamp': stream_data.get('live_time'),
2228             'formats': formats,
2229             'is_live': True,
2230             'http_headers': {
2231                 'Referer': url,
2232             },
2233         }