yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import json
   6 import math
   7 import re
   8 import time
   9 import urllib.parse
  10
  11 from .common import InfoExtractor, SearchInfoExtractor
  12 from ..dependencies import Cryptodome
  13 from ..networking.exceptions import HTTPError
  14 from ..utils import (
  15     ExtractorError,
  16     GeoRestrictedError,
  17     InAdvancePagedList,
  18     OnDemandPagedList,
  19     bool_or_none,
  20     clean_html,
  21     filter_dict,
  22     float_or_none,
  23     format_field,
  24     get_element_by_class,
  25     int_or_none,
  26     join_nonempty,
  27     make_archive_id,
  28     merge_dicts,
  29     mimetype2ext,
  30     parse_count,
  31     parse_qs,
  32     qualities,
  33     smuggle_url,
  34     srt_subtitles_timecode,
  35     str_or_none,
  36     traverse_obj,
  37     try_call,
  38     unified_timestamp,
  39     unsmuggle_url,
  40     url_or_none,
  41     urlencode_postdata,
  42     variadic,
  43 )
  44
  45
  46 class BilibiliBaseIE(InfoExtractor):
  47     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  48
  49     def extract_formats(self, play_info):
  50         format_names = {
  51             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  52             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  53         }
  54
  55         audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
  56         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  57         if flac_audio:
  58             audios.append(flac_audio)
  59         formats = [{
  60             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  61             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  62             'acodec': traverse_obj(audio, ('codecs', {str.lower})),
  63             'vcodec': 'none',
  64             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  65             'filesize': int_or_none(audio.get('size')),
  66             'format_id': str_or_none(audio.get('id')),
  67         } for audio in audios]
  68
  69         formats.extend({
  70             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  71             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  72             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  73             'width': int_or_none(video.get('width')),
  74             'height': int_or_none(video.get('height')),
  75             'vcodec': video.get('codecs'),
  76             'acodec': 'none' if audios else None,
  77             'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
  78             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  79             'filesize': int_or_none(video.get('size')),
  80             'quality': int_or_none(video.get('id')),
  81             'format_id': traverse_obj(
  82                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  83                 ('id', {str_or_none}), get_all=False),
  84             'format': format_names.get(video.get('id')),
  85         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  86
  87         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  88         if missing_formats:
  89             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  90                            f'you have to login or become premium member to download them. {self._login_hint()}')
  91
  92         return formats
  93
  94     def _download_playinfo(self, video_id, cid):
  95         return self._download_json(
  96             'https://api.bilibili.com/x/player/playurl', video_id,
  97             query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
  98             note=f'Downloading video formats for cid {cid}')['data']
  99
 100     def json2srt(self, json_data):
 101         srt_data = ''
 102         for idx, line in enumerate(json_data.get('body') or []):
 103             srt_data += (f'{idx + 1}\n'
 104                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
 105                          f'{line["content"]}\n\n')
 106         return srt_data
 107
 108     def _get_subtitles(self, video_id, cid, aid=None):
 109         subtitles = {
 110             'danmaku': [{
 111                 'ext': 'xml',
 112                 'url': f'https://comment.bilibili.com/{cid}.xml',
 113             }]
 114         }
 115
 116         subtitle_info = traverse_obj(self._download_json(
 117             'https://api.bilibili.com/x/player/v2', video_id,
 118             query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
 119             note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
 120         subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
 121         if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
 122             if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'):  # no login session cookie
 123                 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
 124         for s in subs_list:
 125             subtitles.setdefault(s['lan'], []).append({
 126                 'ext': 'srt',
 127                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 128             })
 129         return subtitles
 130
 131     def _get_chapters(self, aid, cid):
 132         chapters = aid and cid and self._download_json(
 133             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 134             note='Extracting chapters', fatal=False)
 135         return traverse_obj(chapters, ('data', 'view_points', ..., {
 136             'title': 'content',
 137             'start_time': 'from',
 138             'end_time': 'to',
 139         })) or None
 140
 141     def _get_comments(self, aid):
 142         for idx in itertools.count(1):
 143             replies = traverse_obj(
 144                 self._download_json(
 145                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 146                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 147                 ('data', 'replies'))
 148             if not replies:
 149                 return
 150             for children in map(self._get_all_children, replies):
 151                 yield from children
 152
 153     def _get_all_children(self, reply):
 154         yield {
 155             'author': traverse_obj(reply, ('member', 'uname')),
 156             'author_id': traverse_obj(reply, ('member', 'mid')),
 157             'id': reply.get('rpid'),
 158             'text': traverse_obj(reply, ('content', 'message')),
 159             'timestamp': reply.get('ctime'),
 160             'parent': reply.get('parent') or 'root',
 161         }
 162         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 163             yield from children
 164
 165     def _get_episodes_from_season(self, ss_id, url):
 166         season_info = self._download_json(
 167             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 168             note='Downloading season info', query={'season_id': ss_id},
 169             headers={'Referer': url, **self.geo_verification_headers()})
 170
 171         for entry in traverse_obj(season_info, (
 172                 'result', 'main_section', 'episodes',
 173                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 174             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
 175
 176     def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
 177         cid_edges = cid_edges or {}
 178         division_data = self._download_json(
 179             'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
 180             query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
 181             note=f'Extracting divisions from edge {edge_id}')
 182         edges.setdefault(edge_id, {}).update(
 183             traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
 184                 'title': ('title', {str}),
 185                 'cid': ('cid', {int_or_none}),
 186             }), get_all=False))
 187
 188         edges[edge_id].update(traverse_obj(division_data, ('data', {
 189             'title': ('title', {str}),
 190             'choices': ('edges', 'questions', ..., 'choices', ..., {
 191                 'edge_id': ('id', {int_or_none}),
 192                 'cid': ('cid', {int_or_none}),
 193                 'text': ('option', {str}),
 194             }),
 195         })))
 196         # use dict to combine edges that use the same video section (same cid)
 197         cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
 198         for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
 199             if choice['edge_id'] not in edges:
 200                 edges[choice['edge_id']] = {'cid': choice['cid']}
 201                 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
 202         return cid_edges
 203
 204     def _get_interactive_entries(self, video_id, cid, metainfo):
 205         graph_version = traverse_obj(
 206             self._download_json(
 207                 'https://api.bilibili.com/x/player/wbi/v2', video_id,
 208                 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
 209             ('data', 'interaction', 'graph_version', {int_or_none}))
 210         cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
 211         for cid, edges in cid_edges.items():
 212             play_info = self._download_playinfo(video_id, cid)
 213             yield {
 214                 **metainfo,
 215                 'id': f'{video_id}_{cid}',
 216                 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
 217                 'formats': self.extract_formats(play_info),
 218                 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
 219                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 220                 'subtitles': self.extract_subtitles(video_id, cid),
 221             }
 222
 223
 224 class BiliBiliIE(BilibiliBaseIE):
 225     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 226
 227     _TESTS = [{
 228         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 229         'info_dict': {
 230             'id': 'BV13x41117TL',
 231             'title': '阿滴英文｜英文歌分享#6 "Closer',
 232             'ext': 'mp4',
 233             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 234             'uploader_id': '65880958',
 235             'uploader': '阿滴英文',
 236             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 237             'duration': 554.117,
 238             'tags': list,
 239             'comment_count': int,
 240             'upload_date': '20170301',
 241             'timestamp': 1488353834,
 242             'like_count': int,
 243             'view_count': int,
 244         },
 245     }, {
 246         'note': 'old av URL version',
 247         'url': 'http://www.bilibili.com/video/av1074402/',
 248         'info_dict': {
 249             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 250             'ext': 'mp4',
 251             'uploader': '菊子桑',
 252             'uploader_id': '156160',
 253             'id': 'BV11x411K7CN',
 254             'title': '【金坷垃】金泡沫',
 255             'duration': 308.36,
 256             'upload_date': '20140420',
 257             'timestamp': 1397983878,
 258             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 259             'like_count': int,
 260             'comment_count': int,
 261             'view_count': int,
 262             'tags': list,
 263         },
 264         'params': {'skip_download': True},
 265     }, {
 266         'note': 'Anthology',
 267         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 268         'info_dict': {
 269             'id': 'BV1bK411W797',
 270             'title': '物语中的人物是如何吐槽自己的OP的'
 271         },
 272         'playlist_count': 18,
 273         'playlist': [{
 274             'info_dict': {
 275                 'id': 'BV1bK411W797_p1',
 276                 'ext': 'mp4',
 277                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 278                 'tags': 'count:10',
 279                 'timestamp': 1589601697,
 280                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 281                 'uploader': '打牌还是打桩',
 282                 'uploader_id': '150259984',
 283                 'like_count': int,
 284                 'comment_count': int,
 285                 'upload_date': '20200516',
 286                 'view_count': int,
 287                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 288                 'duration': 90.314,
 289             }
 290         }]
 291     }, {
 292         'note': 'Specific page of Anthology',
 293         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 294         'info_dict': {
 295             'id': 'BV1bK411W797_p1',
 296             'ext': 'mp4',
 297             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 298             'tags': 'count:10',
 299             'timestamp': 1589601697,
 300             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 301             'uploader': '打牌还是打桩',
 302             'uploader_id': '150259984',
 303             'like_count': int,
 304             'comment_count': int,
 305             'upload_date': '20200516',
 306             'view_count': int,
 307             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 308             'duration': 90.314,
 309         }
 310     }, {
 311         'note': 'video has subtitles',
 312         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 313         'info_dict': {
 314             'id': 'BV12N4y1M7rh',
 315             'ext': 'mp4',
 316             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 317             'tags': list,
 318             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 319             'duration': 313.557,
 320             'upload_date': '20220709',
 321             'uploader': '小夫太渴',
 322             'timestamp': 1657347907,
 323             'uploader_id': '1326814124',
 324             'comment_count': int,
 325             'view_count': int,
 326             'like_count': int,
 327             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 328             'subtitles': 'count:2'
 329         },
 330         'params': {'listsubtitles': True},
 331     }, {
 332         'url': 'https://www.bilibili.com/video/av8903802/',
 333         'info_dict': {
 334             'id': 'BV13x41117TL',
 335             'ext': 'mp4',
 336             'title': '阿滴英文｜英文歌分享#6 "Closer',
 337             'upload_date': '20170301',
 338             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 339             'timestamp': 1488353834,
 340             'uploader_id': '65880958',
 341             'uploader': '阿滴英文',
 342             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 343             'duration': 554.117,
 344             'tags': list,
 345             'comment_count': int,
 346             'view_count': int,
 347             'like_count': int,
 348         },
 349         'params': {
 350             'skip_download': True,
 351         },
 352     }, {
 353         'note': 'video has chapter',
 354         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 355         'info_dict': {
 356             'id': 'BV1vL411G7N7',
 357             'ext': 'mp4',
 358             'title': '如何为你的B站视频添加进度条分段',
 359             'timestamp': 1634554558,
 360             'upload_date': '20211018',
 361             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 362             'tags': list,
 363             'uploader': '爱喝咖啡的当麻',
 364             'duration': 669.482,
 365             'uploader_id': '1680903',
 366             'chapters': 'count:6',
 367             'comment_count': int,
 368             'view_count': int,
 369             'like_count': int,
 370             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 371         },
 372         'params': {'skip_download': True},
 373     }, {
 374         'note': 'video redirects to festival page',
 375         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 376         'info_dict': {
 377             'id': 'BV1wP4y1P72h',
 378             'ext': 'mp4',
 379             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 380             'timestamp': 1643947497,
 381             'upload_date': '20220204',
 382             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 383             'uploader': '叨叨冯聊音乐',
 384             'duration': 246.719,
 385             'uploader_id': '528182630',
 386             'view_count': int,
 387             'like_count': int,
 388             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 389         },
 390         'params': {'skip_download': True},
 391     }, {
 392         'note': 'newer festival video',
 393         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 394         'info_dict': {
 395             'id': 'BV1ay4y1d77f',
 396             'ext': 'mp4',
 397             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 398             'timestamp': 1674273600,
 399             'upload_date': '20230121',
 400             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 401             'uploader': '果蝇轰',
 402             'duration': 1111.722,
 403             'uploader_id': '8469526',
 404             'view_count': int,
 405             'like_count': int,
 406             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 407         },
 408         'params': {'skip_download': True},
 409     }, {
 410         'note': 'interactive/split-path video',
 411         'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
 412         'info_dict': {
 413             'id': 'BV1af4y1H7ga',
 414             'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！',
 415             'timestamp': 1630500414,
 416             'upload_date': '20210901',
 417             'description': 'md5:01113e39ab06e28042d74ac356a08786',
 418             'tags': list,
 419             'uploader': '钉宫妮妮Ninico',
 420             'duration': 1503,
 421             'uploader_id': '8881297',
 422             'comment_count': int,
 423             'view_count': int,
 424             'like_count': int,
 425             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 426         },
 427         'playlist_count': 33,
 428         'playlist': [{
 429             'info_dict': {
 430                 'id': 'BV1af4y1H7ga_400950101',
 431                 'ext': 'mp4',
 432                 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收！！ - 听见猫猫叫~',
 433                 'timestamp': 1630500414,
 434                 'upload_date': '20210901',
 435                 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
 436                 'tags': list,
 437                 'uploader': '钉宫妮妮Ninico',
 438                 'duration': 11.605,
 439                 'uploader_id': '8881297',
 440                 'comment_count': int,
 441                 'view_count': int,
 442                 'like_count': int,
 443                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 444             },
 445         }],
 446     }, {
 447         'note': '301 redirect to bangumi link',
 448         'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
 449         'info_dict': {
 450             'id': '288525',
 451             'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么？',
 452             'ext': 'mp4',
 453             'series': '我和我的祖国',
 454             'series_id': '4780',
 455             'season': '幕后纪实',
 456             'season_id': '28609',
 457             'season_number': 1,
 458             'episode': '钱学森弹道和乘波体飞行器是什么？',
 459             'episode_id': '288525',
 460             'episode_number': 105,
 461             'duration': 1183.957,
 462             'timestamp': 1571648124,
 463             'upload_date': '20191021',
 464             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 465         },
 466     }, {
 467         'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
 468         'info_dict': {
 469             'id': 'BV1jL41167ZG',
 470             'title': '一场大火引发的离奇死亡！古典推理经典短篇集《不可能犯罪诊断书》！',
 471             'ext': 'mp4',
 472         },
 473         'skip': 'supporter-only video',
 474     }, {
 475         'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
 476         'info_dict': {
 477             'id': 'BV1Ks411f7aQ',
 478             'title': '【BD1080P】狼与香辛料I【华盟】',
 479             'ext': 'mp4',
 480         },
 481         'skip': 'login required',
 482     }, {
 483         'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
 484         'info_dict': {
 485             'id': 'BV1GJ411x7h7',
 486             'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
 487             'ext': 'mp4',
 488         },
 489         'skip': 'geo-restricted',
 490     }]
 491
 492     def _real_extract(self, url):
 493         video_id = self._match_id(url)
 494         webpage, urlh = self._download_webpage_handle(url, video_id)
 495         if not self._match_valid_url(urlh.url):
 496             return self.url_result(urlh.url)
 497
 498         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 499
 500         is_festival = 'videoData' not in initial_state
 501         if is_festival:
 502             video_data = initial_state['videoInfo']
 503         else:
 504             play_info_obj = self._search_json(
 505                 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
 506             if not play_info_obj:
 507                 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
 508                     self.raise_login_required()
 509                 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
 510                     raise ExtractorError(
 511                         'This video may be deleted or geo-restricted. '
 512                         'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
 513             play_info = traverse_obj(play_info_obj, ('data', {dict}))
 514             if not play_info:
 515                 if traverse_obj(play_info_obj, 'code') == 87007:
 516                     toast = get_element_by_class('tips-toast', webpage) or ''
 517                     msg = clean_html(
 518                         f'{get_element_by_class("belongs-to", toast) or ""}，'
 519                         + (get_element_by_class('level', toast) or ''))
 520                     raise ExtractorError(
 521                         f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
 522                 raise ExtractorError('Failed to extract play info')
 523             video_data = initial_state['videoData']
 524
 525         video_id, title = video_data['bvid'], video_data.get('title')
 526
 527         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 528         page_list_json = not is_festival and traverse_obj(
 529             self._download_json(
 530                 'https://api.bilibili.com/x/player/pagelist', video_id,
 531                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 532                 note='Extracting videos in anthology'),
 533             'data', expected_type=list) or []
 534         is_anthology = len(page_list_json) > 1
 535
 536         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 537         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 538             return self.playlist_from_matches(
 539                 page_list_json, video_id, title, ie=BiliBiliIE,
 540                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 541
 542         if is_anthology:
 543             part_id = part_id or 1
 544             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 545
 546         aid = video_data.get('aid')
 547         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 548
 549         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 550
 551         festival_info = {}
 552         if is_festival:
 553             play_info = self._download_playinfo(video_id, cid)
 554
 555             festival_info = traverse_obj(initial_state, {
 556                 'uploader': ('videoInfo', 'upName'),
 557                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 558                 'like_count': ('videoStatus', 'like', {int_or_none}),
 559                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 560             }, get_all=False)
 561
 562         metainfo = {
 563             **traverse_obj(initial_state, {
 564                 'uploader': ('upData', 'name'),
 565                 'uploader_id': ('upData', 'mid', {str_or_none}),
 566                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 567                 'tags': ('tags', ..., 'tag_name'),
 568                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 569             }),
 570             **festival_info,
 571             **traverse_obj(video_data, {
 572                 'description': 'desc',
 573                 'timestamp': ('pubdate', {int_or_none}),
 574                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 575                 'comment_count': ('stat', 'reply', {int_or_none}),
 576             }, get_all=False),
 577             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 578             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 579             'title': title,
 580             'http_headers': {'Referer': url},
 581         }
 582
 583         is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
 584         if is_interactive:
 585             return self.playlist_result(
 586                 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
 587                     'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
 588                     '__post_extractor': self.extract_comments(aid),
 589                 })
 590         else:
 591             return {
 592                 **metainfo,
 593                 'duration': float_or_none(play_info.get('timelength'), scale=1000),
 594                 'chapters': self._get_chapters(aid, cid),
 595                 'subtitles': self.extract_subtitles(video_id, cid),
 596                 'formats': self.extract_formats(play_info),
 597                 '__post_extractor': self.extract_comments(aid),
 598             }
 599
 600
 601 class BiliBiliBangumiIE(BilibiliBaseIE):
 602     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
 603
 604     _TESTS = [{
 605         'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
 606         'info_dict': {
 607             'id': '21495',
 608             'ext': 'mp4',
 609             'series': '悠久之翼',
 610             'series_id': '774',
 611             'season': '第二季',
 612             'season_id': '1182',
 613             'season_number': 2,
 614             'episode': 'forever／ef',
 615             'episode_id': '21495',
 616             'episode_number': 12,
 617             'title': '12 forever／ef',
 618             'duration': 1420.791,
 619             'timestamp': 1320412200,
 620             'upload_date': '20111104',
 621             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 622         },
 623     }, {
 624         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 625         'info_dict': {
 626             'id': '267851',
 627             'ext': 'mp4',
 628             'series': '鬼灭之刃',
 629             'series_id': '4358',
 630             'season': '立志篇',
 631             'season_id': '26801',
 632             'season_number': 1,
 633             'episode': '残酷',
 634             'episode_id': '267851',
 635             'episode_number': 1,
 636             'title': '1 残酷',
 637             'duration': 1425.256,
 638             'timestamp': 1554566400,
 639             'upload_date': '20190406',
 640             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 641         },
 642         'skip': 'Geo-restricted',
 643     }, {
 644         'note': 'a making-of which falls outside main section',
 645         'url': 'https://www.bilibili.com/bangumi/play/ep345120',
 646         'info_dict': {
 647             'id': '345120',
 648             'ext': 'mp4',
 649             'series': '鬼灭之刃',
 650             'series_id': '4358',
 651             'season': '立志篇',
 652             'season_id': '26801',
 653             'season_number': 1,
 654             'episode': '炭治郎篇',
 655             'episode_id': '345120',
 656             'episode_number': 27,
 657             'title': '#1 炭治郎篇',
 658             'duration': 1922.129,
 659             'timestamp': 1602853860,
 660             'upload_date': '20201016',
 661             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 662         },
 663     }]
 664
 665     def _real_extract(self, url):
 666         episode_id = self._match_id(url)
 667         webpage = self._download_webpage(url, episode_id)
 668
 669         if '您所在的地区无法观看本片' in webpage:
 670             raise GeoRestrictedError('This video is restricted')
 671         elif '正在观看预览，大会员免费看全片' in webpage:
 672             self.raise_login_required('This video is for premium members only')
 673
 674         headers = {'Referer': url, **self.geo_verification_headers()}
 675         play_info = self._download_json(
 676             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
 677             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 678             headers=headers)
 679         premium_only = play_info.get('code') == -10403
 680         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 681
 682         formats = self.extract_formats(play_info)
 683         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 684             self.raise_login_required('This video is for premium members only')
 685
 686         bangumi_info = self._download_json(
 687             'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
 688             query={'ep_id': episode_id}, headers=headers)['result']
 689
 690         episode_number, episode_info = next((
 691             (idx, ep) for idx, ep in enumerate(traverse_obj(
 692                 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
 693             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 694
 695         season_id = bangumi_info.get('season_id')
 696         season_number, season_title = season_id and next((
 697             (idx + 1, e.get('season_title')) for idx, e in enumerate(
 698                 traverse_obj(bangumi_info, ('seasons', ...)))
 699             if e.get('season_id') == season_id
 700         ), (None, None))
 701
 702         aid = episode_info.get('aid')
 703
 704         return {
 705             'id': episode_id,
 706             'formats': formats,
 707             **traverse_obj(bangumi_info, {
 708                 'series': ('series', 'series_title', {str}),
 709                 'series_id': ('series', 'series_id', {str_or_none}),
 710                 'thumbnail': ('square_cover', {url_or_none}),
 711             }),
 712             **traverse_obj(episode_info, {
 713                 'episode': ('long_title', {str}),
 714                 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
 715                 'timestamp': ('pub_time', {int_or_none}),
 716                 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
 717             }),
 718             'episode_id': episode_id,
 719             'season': str_or_none(season_title),
 720             'season_id': str_or_none(season_id),
 721             'season_number': season_number,
 722             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 723             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
 724             '__post_extractor': self.extract_comments(aid),
 725             'http_headers': headers,
 726         }
 727
 728
 729 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 730     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 731     _TESTS = [{
 732         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 733         'info_dict': {
 734             'id': '24097891',
 735             'title': 'CAROLE & TUESDAY',
 736             'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
 737         },
 738         'playlist_mincount': 25,
 739     }, {
 740         'url': 'https://www.bilibili.com/bangumi/media/md1565/',
 741         'info_dict': {
 742             'id': '1565',
 743             'title': '攻壳机动队 S.A.C. 2nd GIG',
 744             'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
 745         },
 746         'playlist_count': 26,
 747         'playlist': [{
 748             'info_dict': {
 749                 'id': '68540',
 750                 'ext': 'mp4',
 751                 'series': '攻壳机动队',
 752                 'series_id': '1077',
 753                 'season': '第二季',
 754                 'season_id': '1565',
 755                 'season_number': 2,
 756                 'episode': '再启动 REEMBODY',
 757                 'episode_id': '68540',
 758                 'episode_number': 1,
 759                 'title': '1 再启动 REEMBODY',
 760                 'duration': 1525.777,
 761                 'timestamp': 1425074413,
 762                 'upload_date': '20150227',
 763                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 764             },
 765         }],
 766     }]
 767
 768     def _real_extract(self, url):
 769         media_id = self._match_id(url)
 770         webpage = self._download_webpage(url, media_id)
 771
 772         initial_state = self._search_json(
 773             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
 774         ss_id = initial_state['mediaInfo']['season_id']
 775
 776         return self.playlist_result(
 777             self._get_episodes_from_season(ss_id, url), media_id,
 778             **traverse_obj(initial_state, ('mediaInfo', {
 779                 'title': ('title', {str}),
 780                 'description': ('evaluate', {str}),
 781             })))
 782
 783
 784 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 785     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 786     _TESTS = [{
 787         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 788         'info_dict': {
 789             'id': '26801',
 790             'title': '鬼灭之刃',
 791             'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
 792         },
 793         'playlist_mincount': 26
 794     }, {
 795         'url': 'https://www.bilibili.com/bangumi/play/ss2251',
 796         'info_dict': {
 797             'id': '2251',
 798             'title': '玲音',
 799             'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
 800         },
 801         'playlist_count': 13,
 802         'playlist': [{
 803             'info_dict': {
 804                 'id': '50188',
 805                 'ext': 'mp4',
 806                 'series': '玲音',
 807                 'series_id': '1526',
 808                 'season': 'TV',
 809                 'season_id': '2251',
 810                 'season_number': 1,
 811                 'episode': 'WEIRD',
 812                 'episode_id': '50188',
 813                 'episode_number': 1,
 814                 'title': '1 WEIRD',
 815                 'duration': 1436.992,
 816                 'timestamp': 1343185080,
 817                 'upload_date': '20120725',
 818                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 819             },
 820         }],
 821     }]
 822
 823     def _real_extract(self, url):
 824         ss_id = self._match_id(url)
 825         webpage = self._download_webpage(url, ss_id)
 826         metainfo = traverse_obj(
 827             self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
 828             ('itemListElement', ..., {
 829                 'title': ('name', {str}),
 830                 'description': ('description', {str}),
 831             }), get_all=False)
 832
 833         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
 834
 835
 836 class BilibiliCheeseBaseIE(BilibiliBaseIE):
 837     _HEADERS = {'Referer': 'https://www.bilibili.com/'}
 838
 839     def _extract_episode(self, season_info, ep_id):
 840         episode_info = traverse_obj(season_info, (
 841             'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
 842         aid, cid = episode_info['aid'], episode_info['cid']
 843
 844         if traverse_obj(episode_info, 'ep_status') == -1:
 845             raise ExtractorError('This course episode is not yet available.', expected=True)
 846         if not traverse_obj(episode_info, 'playable'):
 847             self.raise_login_required('You need to purchase the course to download this episode')
 848
 849         play_info = self._download_json(
 850             'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
 851             query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
 852             headers=self._HEADERS, note='Downloading playinfo')['data']
 853
 854         return {
 855             'id': str_or_none(ep_id),
 856             'episode_id': str_or_none(ep_id),
 857             'formats': self.extract_formats(play_info),
 858             'extractor_key': BilibiliCheeseIE.ie_key(),
 859             'extractor': BilibiliCheeseIE.IE_NAME,
 860             'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
 861             **traverse_obj(episode_info, {
 862                 'episode': ('title', {str}),
 863                 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
 864                 'alt_title': ('subtitle', {str}),
 865                 'duration': ('duration', {int_or_none}),
 866                 'episode_number': ('index', {int_or_none}),
 867                 'thumbnail': ('cover', {url_or_none}),
 868                 'timestamp': ('release_date', {int_or_none}),
 869                 'view_count': ('play', {int_or_none}),
 870             }),
 871             **traverse_obj(season_info, {
 872                 'uploader': ('up_info', 'uname', {str}),
 873                 'uploader_id': ('up_info', 'mid', {str_or_none}),
 874             }),
 875             'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
 876             '__post_extractor': self.extract_comments(aid),
 877             'http_headers': self._HEADERS,
 878         }
 879
 880     def _download_season_info(self, query_key, video_id):
 881         return self._download_json(
 882             f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
 883             headers=self._HEADERS, note='Downloading season info')['data']
 884
 885
 886 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
 887     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
 888     _TESTS = [{
 889         'url': 'https://www.bilibili.com/cheese/play/ep229832',
 890         'info_dict': {
 891             'id': '229832',
 892             'ext': 'mp4',
 893             'title': '1 - 课程先导片',
 894             'alt_title': '视频课 · 3分41秒',
 895             'uploader': '马督工',
 896             'uploader_id': '316568752',
 897             'episode': '课程先导片',
 898             'episode_id': '229832',
 899             'episode_number': 1,
 900             'duration': 221,
 901             'timestamp': 1695549606,
 902             'upload_date': '20230924',
 903             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 904             'view_count': int,
 905         }
 906     }]
 907
 908     def _real_extract(self, url):
 909         ep_id = self._match_id(url)
 910         return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
 911
 912
 913 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
 914     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
 915     _TESTS = [{
 916         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 917         'info_dict': {
 918             'id': '5918',
 919             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 920             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 921         },
 922         'playlist': [{
 923             'info_dict': {
 924                 'id': '229832',
 925                 'ext': 'mp4',
 926                 'title': '1 - 课程先导片',
 927                 'alt_title': '视频课 · 3分41秒',
 928                 'uploader': '马督工',
 929                 'uploader_id': '316568752',
 930                 'episode': '课程先导片',
 931                 'episode_id': '229832',
 932                 'episode_number': 1,
 933                 'duration': 221,
 934                 'timestamp': 1695549606,
 935                 'upload_date': '20230924',
 936                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 937                 'view_count': int,
 938             }
 939         }],
 940         'params': {'playlist_items': '1'},
 941     }, {
 942         'url': 'https://www.bilibili.com/cheese/play/ss5918',
 943         'info_dict': {
 944             'id': '5918',
 945             'title': '【限时五折】新闻系学不到：马督工教你做自媒体',
 946             'description': '帮普通人建立世界模型，降低人与人的沟通门槛',
 947         },
 948         'playlist_mincount': 5,
 949         'skip': 'paid video in list',
 950     }]
 951
 952     def _get_cheese_entries(self, season_info):
 953         for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
 954             yield self._extract_episode(season_info, ep_id)
 955
 956     def _real_extract(self, url):
 957         season_id = self._match_id(url)
 958         season_info = self._download_season_info('season_id', season_id)
 959
 960         return self.playlist_result(
 961             self._get_cheese_entries(season_info), season_id,
 962             **traverse_obj(season_info, {
 963                 'title': ('title', {str}),
 964                 'description': ('subtitle', {str}),
 965             }))
 966
 967
 968 class BilibiliSpaceBaseIE(InfoExtractor):
 969     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 970         first_page = fetch_page(0)
 971         metadata = get_metadata(first_page)
 972
 973         paged_list = InAdvancePagedList(
 974             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 975             metadata['page_count'], metadata['page_size'])
 976
 977         return metadata, paged_list
 978
 979
 980 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 981     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 982     _TESTS = [{
 983         'url': 'https://space.bilibili.com/3985676/video',
 984         'info_dict': {
 985             'id': '3985676',
 986         },
 987         'playlist_mincount': 178,
 988     }, {
 989         'url': 'https://space.bilibili.com/313580179/video',
 990         'info_dict': {
 991             'id': '313580179',
 992         },
 993         'playlist_mincount': 92,
 994     }]
 995
 996     def _extract_signature(self, playlist_id):
 997         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
 998
 999         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1000         img_key = traverse_obj(
1001             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1002         sub_key = traverse_obj(
1003             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1004
1005         session_key = img_key + sub_key
1006
1007         signature_values = []
1008         for position in (
1009             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1010             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1011             57, 62, 11, 36, 20, 34, 44, 52
1012         ):
1013             char_at_position = try_call(lambda: session_key[position])
1014             if char_at_position:
1015                 signature_values.append(char_at_position)
1016
1017         return ''.join(signature_values)[:32]
1018
1019     def _real_extract(self, url):
1020         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1021         if not is_video_url:
1022             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1023                            'To download audios, add a "/audio" to the URL')
1024
1025         signature = self._extract_signature(playlist_id)
1026
1027         def fetch_page(page_idx):
1028             query = {
1029                 'keyword': '',
1030                 'mid': playlist_id,
1031                 'order': 'pubdate',
1032                 'order_avoided': 'true',
1033                 'platform': 'web',
1034                 'pn': page_idx + 1,
1035                 'ps': 30,
1036                 'tid': 0,
1037                 'web_location': 1550101,
1038                 'wts': int(time.time()),
1039             }
1040             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1041
1042             try:
1043                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1044                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
1045             except ExtractorError as e:
1046                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1047                     raise ExtractorError(
1048                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1049                 raise
1050             if response['code'] == -401:
1051                 raise ExtractorError(
1052                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1053             return response['data']
1054
1055         def get_metadata(page_data):
1056             page_size = page_data['page']['ps']
1057             entry_count = page_data['page']['count']
1058             return {
1059                 'page_count': math.ceil(entry_count / page_size),
1060                 'page_size': page_size,
1061             }
1062
1063         def get_entries(page_data):
1064             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1065                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1066
1067         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1068         return self.playlist_result(paged_list, playlist_id)
1069
1070
1071 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1072     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1073     _TESTS = [{
1074         'url': 'https://space.bilibili.com/313580179/audio',
1075         'info_dict': {
1076             'id': '313580179',
1077         },
1078         'playlist_mincount': 1,
1079     }]
1080
1081     def _real_extract(self, url):
1082         playlist_id = self._match_id(url)
1083
1084         def fetch_page(page_idx):
1085             return self._download_json(
1086                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1087                 note=f'Downloading page {page_idx}',
1088                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1089
1090         def get_metadata(page_data):
1091             return {
1092                 'page_count': page_data['pageCount'],
1093                 'page_size': page_data['pageSize'],
1094             }
1095
1096         def get_entries(page_data):
1097             for entry in page_data.get('data', []):
1098                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1099
1100         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1101         return self.playlist_result(paged_list, playlist_id)
1102
1103
1104 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1105     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1106         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1107             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1108
1109     def _get_uploader(self, uid, playlist_id):
1110         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1111         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1112
1113     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1114         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1115         metadata.pop('page_count', None)
1116         metadata.pop('page_size', None)
1117         return metadata, page_list
1118
1119
1120 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1121     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1122     _TESTS = [{
1123         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1124         'info_dict': {
1125             'id': '2142762_57445',
1126             'title': '【完结】《底特律 变人》全结局流程解说',
1127             'description': '',
1128             'uploader': '老戴在此',
1129             'uploader_id': '2142762',
1130             'timestamp': int,
1131             'upload_date': str,
1132             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1133         },
1134         'playlist_mincount': 31,
1135     }]
1136
1137     def _real_extract(self, url):
1138         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1139         playlist_id = f'{mid}_{sid}'
1140
1141         def fetch_page(page_idx):
1142             return self._download_json(
1143                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1144                 playlist_id, note=f'Downloading page {page_idx}',
1145                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1146
1147         def get_metadata(page_data):
1148             page_size = page_data['page']['page_size']
1149             entry_count = page_data['page']['total']
1150             return {
1151                 'page_count': math.ceil(entry_count / page_size),
1152                 'page_size': page_size,
1153                 'uploader': self._get_uploader(mid, playlist_id),
1154                 **traverse_obj(page_data, {
1155                     'title': ('meta', 'name', {str}),
1156                     'description': ('meta', 'description', {str}),
1157                     'uploader_id': ('meta', 'mid', {str_or_none}),
1158                     'timestamp': ('meta', 'ptime', {int_or_none}),
1159                     'thumbnail': ('meta', 'cover', {url_or_none}),
1160                 })
1161             }
1162
1163         def get_entries(page_data):
1164             return self._get_entries(page_data, 'archives')
1165
1166         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1167         return self.playlist_result(paged_list, playlist_id, **metadata)
1168
1169
1170 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1171     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1172     _TESTS = [{
1173         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1174         'info_dict': {
1175             'id': '1958703906_547718',
1176             'title': '直播回放',
1177             'description': '直播回放',
1178             'uploader': '靡烟miya',
1179             'uploader_id': '1958703906',
1180             'timestamp': 1637985853,
1181             'upload_date': '20211127',
1182             'modified_timestamp': int,
1183             'modified_date': str,
1184         },
1185         'playlist_mincount': 513,
1186     }]
1187
1188     def _real_extract(self, url):
1189         mid, sid = self._match_valid_url(url).group('mid', 'sid')
1190         playlist_id = f'{mid}_{sid}'
1191         playlist_meta = traverse_obj(self._download_json(
1192             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1193         ), {
1194             'title': ('data', 'meta', 'name', {str}),
1195             'description': ('data', 'meta', 'description', {str}),
1196             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1197             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1198             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1199         })
1200
1201         def fetch_page(page_idx):
1202             return self._download_json(
1203                 'https://api.bilibili.com/x/series/archives',
1204                 playlist_id, note=f'Downloading page {page_idx}',
1205                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1206
1207         def get_metadata(page_data):
1208             page_size = page_data['page']['size']
1209             entry_count = page_data['page']['total']
1210             return {
1211                 'page_count': math.ceil(entry_count / page_size),
1212                 'page_size': page_size,
1213                 'uploader': self._get_uploader(mid, playlist_id),
1214                 **playlist_meta
1215             }
1216
1217         def get_entries(page_data):
1218             return self._get_entries(page_data, 'archives')
1219
1220         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1221         return self.playlist_result(paged_list, playlist_id, **metadata)
1222
1223
1224 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1225     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1226     _TESTS = [{
1227         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1228         'info_dict': {
1229             'id': '1103407912',
1230             'title': '【V2】（旧）',
1231             'description': '',
1232             'uploader': '晓月春日',
1233             'uploader_id': '84912',
1234             'timestamp': 1604905176,
1235             'upload_date': '20201109',
1236             'modified_timestamp': int,
1237             'modified_date': str,
1238             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1239             'view_count': int,
1240             'like_count': int,
1241         },
1242         'playlist_mincount': 22,
1243     }, {
1244         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1245         'only_matching': True,
1246     }]
1247
1248     def _real_extract(self, url):
1249         fid = self._match_id(url)
1250
1251         list_info = self._download_json(
1252             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1253             fid, note='Downloading favlist metadata')
1254         if list_info['code'] == -403:
1255             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1256
1257         entries = self._get_entries(self._download_json(
1258             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1259             fid, note='Download favlist entries'), 'data')
1260
1261         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1262             'title': ('title', {str}),
1263             'description': ('intro', {str}),
1264             'uploader': ('upper', 'name', {str}),
1265             'uploader_id': ('upper', 'mid', {str_or_none}),
1266             'timestamp': ('ctime', {int_or_none}),
1267             'modified_timestamp': ('mtime', {int_or_none}),
1268             'thumbnail': ('cover', {url_or_none}),
1269             'view_count': ('cnt_info', 'play', {int_or_none}),
1270             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1271         })))
1272
1273
1274 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1275     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1276     _TESTS = [{
1277         'url': 'https://www.bilibili.com/watchlater/#/list',
1278         'info_dict': {'id': 'watchlater'},
1279         'playlist_mincount': 0,
1280         'skip': 'login required',
1281     }]
1282
1283     def _real_extract(self, url):
1284         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1285         watchlater_info = self._download_json(
1286             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1287         if watchlater_info['code'] == -101:
1288             self.raise_login_required(msg='You need to login to access your watchlater list')
1289         entries = self._get_entries(watchlater_info, ('data', 'list'))
1290         return self.playlist_result(entries, id=list_id, title='稍后再看')
1291
1292
1293 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1294     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1295     _TESTS = [{
1296         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1297         'info_dict': {
1298             'id': '5_547718',
1299             'title': '直播回放',
1300             'uploader': '靡烟miya',
1301             'uploader_id': '1958703906',
1302             'timestamp': 1637985853,
1303             'upload_date': '20211127',
1304         },
1305         'playlist_mincount': 513,
1306     }, {
1307         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1308         'info_dict': {
1309             'id': '5_547718',
1310         },
1311         'playlist_mincount': 513,
1312         'skip': 'redirect url',
1313     }, {
1314         'url': 'https://www.bilibili.com/list/ml1103407912',
1315         'info_dict': {
1316             'id': '3_1103407912',
1317             'title': '【V2】（旧）',
1318             'uploader': '晓月春日',
1319             'uploader_id': '84912',
1320             'timestamp': 1604905176,
1321             'upload_date': '20201109',
1322             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1323         },
1324         'playlist_mincount': 22,
1325     }, {
1326         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1327         'info_dict': {
1328             'id': '3_1103407912',
1329         },
1330         'playlist_mincount': 22,
1331         'skip': 'redirect url',
1332     }, {
1333         'url': 'https://www.bilibili.com/list/watchlater',
1334         'info_dict': {'id': 'watchlater'},
1335         'playlist_mincount': 0,
1336         'skip': 'login required',
1337     }, {
1338         'url': 'https://www.bilibili.com/medialist/play/watchlater',
1339         'info_dict': {'id': 'watchlater'},
1340         'playlist_mincount': 0,
1341         'skip': 'login required',
1342     }]
1343
1344     def _extract_medialist(self, query, list_id):
1345         for page_num in itertools.count(1):
1346             page_data = self._download_json(
1347                 'https://api.bilibili.com/x/v2/medialist/resource/list',
1348                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1349             )['data']
1350             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1351             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1352             if not page_data.get('has_more', False):
1353                 break
1354
1355     def _real_extract(self, url):
1356         list_id = self._match_id(url)
1357         webpage = self._download_webpage(url, list_id)
1358         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1359         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1360             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1361             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1362             if error_code == -400 and list_id == 'watchlater':
1363                 self.raise_login_required('You need to login to access your watchlater playlist')
1364             elif error_code == -403:
1365                 self.raise_login_required('This is a private playlist. You need to login as its owner')
1366             elif error_code == 11010:
1367                 raise ExtractorError('Playlist is no longer available', expected=True)
1368             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1369
1370         query = {
1371             'ps': 20,
1372             'with_current': False,
1373             **traverse_obj(initial_state, {
1374                 'type': ('playlist', 'type', {int_or_none}),
1375                 'biz_id': ('playlist', 'id', {int_or_none}),
1376                 'tid': ('tid', {int_or_none}),
1377                 'sort_field': ('sortFiled', {int_or_none}),
1378                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1379             })
1380         }
1381         metadata = {
1382             'id': f'{query["type"]}_{query["biz_id"]}',
1383             **traverse_obj(initial_state, ('mediaListInfo', {
1384                 'title': ('title', {str}),
1385                 'uploader': ('upper', 'name', {str}),
1386                 'uploader_id': ('upper', 'mid', {str_or_none}),
1387                 'timestamp': ('ctime', {int_or_none}),
1388                 'thumbnail': ('cover', {url_or_none}),
1389             })),
1390         }
1391         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1392
1393
1394 class BilibiliCategoryIE(InfoExtractor):
1395     IE_NAME = 'Bilibili category extractor'
1396     _MAX_RESULTS = 1000000
1397     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1398     _TESTS = [{
1399         'url': 'https://www.bilibili.com/v/kichiku/mad',
1400         'info_dict': {
1401             'id': 'kichiku: mad',
1402             'title': 'kichiku: mad'
1403         },
1404         'playlist_mincount': 45,
1405         'params': {
1406             'playlistend': 45
1407         }
1408     }]
1409
1410     def _fetch_page(self, api_url, num_pages, query, page_num):
1411         parsed_json = self._download_json(
1412             api_url, query, query={'Search_key': query, 'pn': page_num},
1413             note='Extracting results from page %s of %s' % (page_num, num_pages))
1414
1415         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1416         if not video_list:
1417             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1418
1419         for video in video_list:
1420             yield self.url_result(
1421                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1422
1423     def _entries(self, category, subcategory, query):
1424         # map of categories : subcategories : RIDs
1425         rid_map = {
1426             'kichiku': {
1427                 'mad': 26,
1428                 'manual_vocaloid': 126,
1429                 'guide': 22,
1430                 'theatre': 216,
1431                 'course': 127
1432             },
1433         }
1434
1435         if category not in rid_map:
1436             raise ExtractorError(
1437                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1438         if subcategory not in rid_map[category]:
1439             raise ExtractorError(
1440                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1441         rid_value = rid_map[category][subcategory]
1442
1443         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1444         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1445         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1446         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1447         if count is None or not size:
1448             raise ExtractorError('Failed to calculate either page count or size')
1449
1450         num_pages = math.ceil(count / size)
1451
1452         return OnDemandPagedList(functools.partial(
1453             self._fetch_page, api_url, num_pages, query), size)
1454
1455     def _real_extract(self, url):
1456         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1457         query = '%s: %s' % (category, subcategory)
1458
1459         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1460
1461
1462 class BiliBiliSearchIE(SearchInfoExtractor):
1463     IE_DESC = 'Bilibili video search'
1464     _MAX_RESULTS = 100000
1465     _SEARCH_KEY = 'bilisearch'
1466
1467     def _search_results(self, query):
1468         for page_num in itertools.count(1):
1469             videos = self._download_json(
1470                 'https://api.bilibili.com/x/web-interface/search/type', query,
1471                 note=f'Extracting results from page {page_num}', query={
1472                     'Search_key': query,
1473                     'keyword': query,
1474                     'page': page_num,
1475                     'context': '',
1476                     'duration': 0,
1477                     'tids_2': '',
1478                     '__refresh__': 'true',
1479                     'search_type': 'video',
1480                     'tids': 0,
1481                     'highlight': 1,
1482                 })['data'].get('result')
1483             if not videos:
1484                 break
1485             for video in videos:
1486                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1487
1488
1489 class BilibiliAudioBaseIE(InfoExtractor):
1490     def _call_api(self, path, sid, query=None):
1491         if not query:
1492             query = {'sid': sid}
1493         return self._download_json(
1494             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1495             sid, query=query)['data']
1496
1497
1498 class BilibiliAudioIE(BilibiliAudioBaseIE):
1499     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1500     _TEST = {
1501         'url': 'https://www.bilibili.com/audio/au1003142',
1502         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1503         'info_dict': {
1504             'id': '1003142',
1505             'ext': 'm4a',
1506             'title': '【tsukimi】YELLOW / 神山羊',
1507             'artist': 'tsukimi',
1508             'comment_count': int,
1509             'description': 'YELLOW的mp3版！',
1510             'duration': 183,
1511             'subtitles': {
1512                 'origin': [{
1513                     'ext': 'lrc',
1514                 }],
1515             },
1516             'thumbnail': r're:^https?://.+\.jpg',
1517             'timestamp': 1564836614,
1518             'upload_date': '20190803',
1519             'uploader': 'tsukimi-つきみぐー',
1520             'view_count': int,
1521         },
1522     }
1523
1524     def _real_extract(self, url):
1525         au_id = self._match_id(url)
1526
1527         play_data = self._call_api('url', au_id)
1528         formats = [{
1529             'url': play_data['cdns'][0],
1530             'filesize': int_or_none(play_data.get('size')),
1531             'vcodec': 'none'
1532         }]
1533
1534         for a_format in formats:
1535             a_format.setdefault('http_headers', {}).update({
1536                 'Referer': url,
1537             })
1538
1539         song = self._call_api('song/info', au_id)
1540         title = song['title']
1541         statistic = song.get('statistic') or {}
1542
1543         subtitles = None
1544         lyric = song.get('lyric')
1545         if lyric:
1546             subtitles = {
1547                 'origin': [{
1548                     'url': lyric,
1549                 }]
1550             }
1551
1552         return {
1553             'id': au_id,
1554             'title': title,
1555             'formats': formats,
1556             'artist': song.get('author'),
1557             'comment_count': int_or_none(statistic.get('comment')),
1558             'description': song.get('intro'),
1559             'duration': int_or_none(song.get('duration')),
1560             'subtitles': subtitles,
1561             'thumbnail': song.get('cover'),
1562             'timestamp': int_or_none(song.get('passtime')),
1563             'uploader': song.get('uname'),
1564             'view_count': int_or_none(statistic.get('play')),
1565         }
1566
1567
1568 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1569     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1570     _TEST = {
1571         'url': 'https://www.bilibili.com/audio/am10624',
1572         'info_dict': {
1573             'id': '10624',
1574             'title': '每日新曲推荐（每日11:00更新）',
1575             'description': '每天11:00更新，为你推送最新音乐',
1576         },
1577         'playlist_count': 19,
1578     }
1579
1580     def _real_extract(self, url):
1581         am_id = self._match_id(url)
1582
1583         songs = self._call_api(
1584             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1585
1586         entries = []
1587         for song in songs:
1588             sid = str_or_none(song.get('id'))
1589             if not sid:
1590                 continue
1591             entries.append(self.url_result(
1592                 'https://www.bilibili.com/audio/au' + sid,
1593                 BilibiliAudioIE.ie_key(), sid))
1594
1595         if entries:
1596             album_data = self._call_api('menu/info', am_id) or {}
1597             album_title = album_data.get('title')
1598             if album_title:
1599                 for entry in entries:
1600                     entry['album'] = album_title
1601                 return self.playlist_result(
1602                     entries, am_id, album_title, album_data.get('intro'))
1603
1604         return self.playlist_result(entries, am_id)
1605
1606
1607 class BiliBiliPlayerIE(InfoExtractor):
1608     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1609     _TEST = {
1610         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1611         'only_matching': True,
1612     }
1613
1614     def _real_extract(self, url):
1615         video_id = self._match_id(url)
1616         return self.url_result(
1617             'http://www.bilibili.tv/video/av%s/' % video_id,
1618             ie=BiliBiliIE.ie_key(), video_id=video_id)
1619
1620
1621 class BiliIntlBaseIE(InfoExtractor):
1622     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1623     _NETRC_MACHINE = 'biliintl'
1624
1625     def _call_api(self, endpoint, *args, **kwargs):
1626         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1627         if json.get('code'):
1628             if json['code'] in (10004004, 10004005, 10023006):
1629                 self.raise_login_required()
1630             elif json['code'] == 10004001:
1631                 self.raise_geo_restricted()
1632             else:
1633                 if json.get('message') and str(json['code']) != json['message']:
1634                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1635                 else:
1636                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1637                 if kwargs.get('fatal'):
1638                     raise ExtractorError(errmsg)
1639                 else:
1640                     self.report_warning(errmsg)
1641         return json.get('data')
1642
1643     def json2srt(self, json):
1644         data = '\n\n'.join(
1645             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1646             for i, line in enumerate(traverse_obj(json, (
1647                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1648         return data
1649
1650     def _get_subtitles(self, *, ep_id=None, aid=None):
1651         sub_json = self._call_api(
1652             '/web/v2/subtitle', ep_id or aid, fatal=False,
1653             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1654             query=filter_dict({
1655                 'platform': 'web',
1656                 's_locale': 'en_US',
1657                 'episode_id': ep_id,
1658                 'aid': aid,
1659             })) or {}
1660         subtitles = {}
1661         for sub in sub_json.get('subtitles') or []:
1662             sub_url = sub.get('url')
1663             if not sub_url:
1664                 continue
1665             sub_data = self._download_json(
1666                 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
1667                 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
1668             if not sub_data:
1669                 continue
1670             subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
1671                 'ext': 'srt',
1672                 'data': self.json2srt(sub_data)
1673             })
1674         return subtitles
1675
1676     def _get_formats(self, *, ep_id=None, aid=None):
1677         video_json = self._call_api(
1678             '/web/playurl', ep_id or aid, note='Downloading video formats',
1679             errnote='Unable to download video formats', query=filter_dict({
1680                 'platform': 'web',
1681                 'ep_id': ep_id,
1682                 'aid': aid,
1683             }))
1684         video_json = video_json['playurl']
1685         formats = []
1686         for vid in video_json.get('video') or []:
1687             video_res = vid.get('video_resource') or {}
1688             video_info = vid.get('stream_info') or {}
1689             if not video_res.get('url'):
1690                 continue
1691             formats.append({
1692                 'url': video_res['url'],
1693                 'ext': 'mp4',
1694                 'format_note': video_info.get('desc_words'),
1695                 'width': video_res.get('width'),
1696                 'height': video_res.get('height'),
1697                 'vbr': video_res.get('bandwidth'),
1698                 'acodec': 'none',
1699                 'vcodec': video_res.get('codecs'),
1700                 'filesize': video_res.get('size'),
1701             })
1702         for aud in video_json.get('audio_resource') or []:
1703             if not aud.get('url'):
1704                 continue
1705             formats.append({
1706                 'url': aud['url'],
1707                 'ext': 'mp4',
1708                 'abr': aud.get('bandwidth'),
1709                 'acodec': aud.get('codecs'),
1710                 'vcodec': 'none',
1711                 'filesize': aud.get('size'),
1712             })
1713
1714         return formats
1715
1716     def _parse_video_metadata(self, video_data):
1717         return {
1718             'title': video_data.get('title_display') or video_data.get('title'),
1719             'thumbnail': video_data.get('cover'),
1720             'episode_number': int_or_none(self._search_regex(
1721                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1722         }
1723
1724     def _perform_login(self, username, password):
1725         if not Cryptodome.RSA:
1726             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1727
1728         key_data = self._download_json(
1729             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1730             note='Downloading login key', errnote='Unable to download login key')['data']
1731
1732         public_key = Cryptodome.RSA.importKey(key_data['key'])
1733         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1734         login_post = self._download_json(
1735             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1736                 'username': username,
1737                 'password': base64.b64encode(password_hash).decode('ascii'),
1738                 'keep_me': 'true',
1739                 's_locale': 'en_US',
1740                 'isTrusted': 'true'
1741             }), note='Logging in', errnote='Unable to log in')
1742         if login_post.get('code'):
1743             if login_post.get('message'):
1744                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1745             else:
1746                 raise ExtractorError('Unable to log in')
1747
1748
1749 class BiliIntlIE(BiliIntlBaseIE):
1750     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1751     _TESTS = [{
1752         # Bstation page
1753         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1754         'info_dict': {
1755             'id': '341736',
1756             'ext': 'mp4',
1757             'title': 'E2 - The First Night',
1758             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1759             'episode_number': 2,
1760             'upload_date': '20201009',
1761             'episode': 'Episode 2',
1762             'timestamp': 1602259500,
1763             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1764             'chapters': [{
1765                 'start_time': 0,
1766                 'end_time': 76.242,
1767                 'title': '<Untitled Chapter 1>'
1768             }, {
1769                 'start_time': 76.242,
1770                 'end_time': 161.161,
1771                 'title': 'Intro'
1772             }, {
1773                 'start_time': 1325.742,
1774                 'end_time': 1403.903,
1775                 'title': 'Outro'
1776             }],
1777         }
1778     }, {
1779         # Non-Bstation page
1780         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1781         'info_dict': {
1782             'id': '11005006',
1783             'ext': 'mp4',
1784             'title': 'E3 - Who?',
1785             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1786             'episode_number': 3,
1787             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1788             'episode': 'Episode 3',
1789             'upload_date': '20211219',
1790             'timestamp': 1639928700,
1791             'chapters': [{
1792                 'start_time': 0,
1793                 'end_time': 88.0,
1794                 'title': '<Untitled Chapter 1>'
1795             }, {
1796                 'start_time': 88.0,
1797                 'end_time': 156.0,
1798                 'title': 'Intro'
1799             }, {
1800                 'start_time': 1173.0,
1801                 'end_time': 1259.535,
1802                 'title': 'Outro'
1803             }],
1804         }
1805     }, {
1806         # Subtitle with empty content
1807         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1808         'info_dict': {
1809             'id': '10131790',
1810             'ext': 'mp4',
1811             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1812             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1813             'episode_number': 140,
1814         },
1815         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1816     }, {
1817         'url': 'https://www.bilibili.tv/en/video/2041863208',
1818         'info_dict': {
1819             'id': '2041863208',
1820             'ext': 'mp4',
1821             'timestamp': 1670874843,
1822             'description': 'Scheduled for April 2023.\nStudio: ufotable',
1823             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1824             'upload_date': '20221212',
1825             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1826         },
1827     }, {
1828         # episode comment extraction
1829         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1830         'info_dict': {
1831             'id': '340317',
1832             'ext': 'mp4',
1833             'timestamp': 1604057820,
1834             'upload_date': '20201030',
1835             'episode_number': 5,
1836             'title': 'E5 - My Own Steel',
1837             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1838             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1839             'episode': 'Episode 5',
1840             'comment_count': int,
1841             'chapters': [{
1842                 'start_time': 0,
1843                 'end_time': 61.0,
1844                 'title': '<Untitled Chapter 1>'
1845             }, {
1846                 'start_time': 61.0,
1847                 'end_time': 134.0,
1848                 'title': 'Intro'
1849             }, {
1850                 'start_time': 1290.0,
1851                 'end_time': 1379.0,
1852                 'title': 'Outro'
1853             }],
1854         },
1855         'params': {
1856             'getcomments': True
1857         }
1858     }, {
1859         # user generated content comment extraction
1860         'url': 'https://www.bilibili.tv/en/video/2045730385',
1861         'info_dict': {
1862             'id': '2045730385',
1863             'ext': 'mp4',
1864             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1865             'timestamp': 1667891924,
1866             'upload_date': '20221108',
1867             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1868             'comment_count': int,
1869             'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1870         },
1871         'params': {
1872             'getcomments': True
1873         }
1874     }, {
1875         # episode id without intro and outro
1876         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1877         'info_dict': {
1878             'id': '11246489',
1879             'ext': 'mp4',
1880             'title': 'E1 - Operation \'Strix\' <Owl>',
1881             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1882             'timestamp': 1649516400,
1883             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1884             'episode': 'Episode 1',
1885             'episode_number': 1,
1886             'upload_date': '20220409',
1887         },
1888     }, {
1889         'url': 'https://www.biliintl.com/en/play/34613/341736',
1890         'only_matching': True,
1891     }, {
1892         # User-generated content (as opposed to a series licensed from a studio)
1893         'url': 'https://bilibili.tv/en/video/2019955076',
1894         'only_matching': True,
1895     }, {
1896         # No language in URL
1897         'url': 'https://www.bilibili.tv/video/2019955076',
1898         'only_matching': True,
1899     }, {
1900         # Uppercase language in URL
1901         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1902         'only_matching': True,
1903     }]
1904
1905     def _make_url(video_id, series_id=None):
1906         if series_id:
1907             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1908         return f'https://www.bilibili.tv/en/video/{video_id}'
1909
1910     def _extract_video_metadata(self, url, video_id, season_id):
1911         url, smuggled_data = unsmuggle_url(url, {})
1912         if smuggled_data.get('title'):
1913             return smuggled_data
1914
1915         webpage = self._download_webpage(url, video_id)
1916         # Bstation layout
1917         initial_data = (
1918             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1919             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1920         video_data = traverse_obj(
1921             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1922
1923         if season_id and not video_data:
1924             # Non-Bstation layout, read through episode list
1925             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1926             video_data = traverse_obj(season_json, (
1927                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1928             ), expected_type=dict, get_all=False)
1929
1930         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1931         return merge_dicts(
1932             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
1933                 'title': self._html_search_meta('og:title', webpage),
1934                 'description': self._html_search_meta('og:description', webpage)
1935             })
1936
1937     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1938         comment_api_raw_data = self._download_json(
1939             'https://api.bilibili.tv/reply/web/detail', display_id,
1940             note=f'Downloading reply comment of {root_id} - {next_id}',
1941             query={
1942                 'platform': 'web',
1943                 'ps': 20,  # comment's reply per page (default: 3)
1944                 'root': root_id,
1945                 'next': next_id,
1946             })
1947
1948         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1949             yield {
1950                 'author': traverse_obj(replies, ('member', 'name')),
1951                 'author_id': traverse_obj(replies, ('member', 'mid')),
1952                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1953                 'text': traverse_obj(replies, ('content', 'message')),
1954                 'id': replies.get('rpid'),
1955                 'like_count': int_or_none(replies.get('like_count')),
1956                 'parent': replies.get('parent'),
1957                 'timestamp': unified_timestamp(replies.get('ctime_text'))
1958             }
1959
1960         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1961             yield from self._get_comments_reply(
1962                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1963
1964     def _get_comments(self, video_id, ep_id):
1965         for i in itertools.count(0):
1966             comment_api_raw_data = self._download_json(
1967                 'https://api.bilibili.tv/reply/web/root', video_id,
1968                 note=f'Downloading comment page {i + 1}',
1969                 query={
1970                     'platform': 'web',
1971                     'pn': i,  # page number
1972                     'ps': 20,  # comment per page (default: 20)
1973                     'oid': video_id,
1974                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
1975                     'sort_type': 1,  # 1: best, 2: recent
1976                 })
1977
1978             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1979                 yield {
1980                     'author': traverse_obj(replies, ('member', 'name')),
1981                     'author_id': traverse_obj(replies, ('member', 'mid')),
1982                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1983                     'text': traverse_obj(replies, ('content', 'message')),
1984                     'id': replies.get('rpid'),
1985                     'like_count': int_or_none(replies.get('like_count')),
1986                     'timestamp': unified_timestamp(replies.get('ctime_text')),
1987                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1988                 }
1989                 if replies.get('count'):
1990                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1991
1992             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1993                 break
1994
1995     def _real_extract(self, url):
1996         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1997         video_id = ep_id or aid
1998         chapters = None
1999
2000         if ep_id:
2001             intro_ending_json = self._call_api(
2002                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2003                 video_id, fatal=False) or {}
2004             if intro_ending_json.get('skip'):
2005                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2006                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2007                 chapters = [{
2008                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2009                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2010                     'title': 'Intro'
2011                 }, {
2012                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2013                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2014                     'title': 'Outro'
2015                 }]
2016
2017         return {
2018             'id': video_id,
2019             **self._extract_video_metadata(url, video_id, season_id),
2020             'formats': self._get_formats(ep_id=ep_id, aid=aid),
2021             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2022             'chapters': chapters,
2023             '__post_extractor': self.extract_comments(video_id, ep_id)
2024         }
2025
2026
2027 class BiliIntlSeriesIE(BiliIntlBaseIE):
2028     IE_NAME = 'biliIntl:series'
2029     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2030     _TESTS = [{
2031         'url': 'https://www.bilibili.tv/en/play/34613',
2032         'playlist_mincount': 15,
2033         'info_dict': {
2034             'id': '34613',
2035             'title': 'TONIKAWA: Over the Moon For You',
2036             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2037             'categories': ['Slice of life', 'Comedy', 'Romance'],
2038             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2039             'view_count': int,
2040         },
2041         'params': {
2042             'skip_download': True,
2043         },
2044     }, {
2045         'url': 'https://www.bilibili.tv/en/media/1048837',
2046         'info_dict': {
2047             'id': '1048837',
2048             'title': 'SPY×FAMILY',
2049             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2050             'categories': ['Adventure', 'Action', 'Comedy'],
2051             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2052             'view_count': int,
2053         },
2054         'playlist_mincount': 25,
2055     }, {
2056         'url': 'https://www.biliintl.com/en/play/34613',
2057         'only_matching': True,
2058     }, {
2059         'url': 'https://www.biliintl.com/EN/play/34613',
2060         'only_matching': True,
2061     }]
2062
2063     def _entries(self, series_id):
2064         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2065         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2066             episode_id = str(episode['episode_id'])
2067             yield self.url_result(smuggle_url(
2068                 BiliIntlIE._make_url(episode_id, series_id),
2069                 self._parse_video_metadata(episode)
2070             ), BiliIntlIE, episode_id)
2071
2072     def _real_extract(self, url):
2073         series_id = self._match_id(url)
2074         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2075         return self.playlist_result(
2076             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2077             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2078             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2079
2080
2081 class BiliLiveIE(InfoExtractor):
2082     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2083
2084     _TESTS = [{
2085         'url': 'https://live.bilibili.com/196',
2086         'info_dict': {
2087             'id': '33989',
2088             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
2089             'ext': 'flv',
2090             'title': "太空狼人杀联动，不被爆杀就算赢",
2091             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2092             'timestamp': 1650802769,
2093         },
2094         'skip': 'not live'
2095     }, {
2096         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2097         'only_matching': True
2098     }, {
2099         'url': 'https://live.bilibili.com/blanc/196',
2100         'only_matching': True
2101     }]
2102
2103     _FORMATS = {
2104         80: {'format_id': 'low', 'format_note': '流畅'},
2105         150: {'format_id': 'high_res', 'format_note': '高清'},
2106         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2107         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2108         10000: {'format_id': 'source', 'format_note': '原画'},
2109         20000: {'format_id': '4K', 'format_note': '4K'},
2110         30000: {'format_id': 'dolby', 'format_note': '杜比'},
2111     }
2112
2113     _quality = staticmethod(qualities(list(_FORMATS)))
2114
2115     def _call_api(self, path, room_id, query):
2116         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2117         if api_result.get('code') != 0:
2118             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2119         return api_result.get('data') or {}
2120
2121     def _parse_formats(self, qn, fmt):
2122         for codec in fmt.get('codec') or []:
2123             if codec.get('current_qn') != qn:
2124                 continue
2125             for url_info in codec['url_info']:
2126                 yield {
2127                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2128                     'ext': fmt.get('format_name'),
2129                     'vcodec': codec.get('codec_name'),
2130                     'quality': self._quality(qn),
2131                     **self._FORMATS[qn],
2132                 }
2133
2134     def _real_extract(self, url):
2135         room_id = self._match_id(url)
2136         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2137         if room_data.get('live_status') == 0:
2138             raise ExtractorError('Streamer is not live', expected=True)
2139
2140         formats = []
2141         for qn in self._FORMATS.keys():
2142             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2143                 'room_id': room_id,
2144                 'qn': qn,
2145                 'codec': '0,1',
2146                 'format': '0,2',
2147                 'mask': '0',
2148                 'no_playurl': '0',
2149                 'platform': 'web',
2150                 'protocol': '0,1',
2151             })
2152             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2153                 formats.extend(self._parse_formats(qn, fmt))
2154
2155         return {
2156             'id': room_id,
2157             'title': room_data.get('title'),
2158             'description': room_data.get('description'),
2159             'thumbnail': room_data.get('user_cover'),
2160             'timestamp': stream_data.get('live_time'),
2161             'formats': formats,
2162             'is_live': True,
2163             'http_headers': {
2164                 'Referer': url,
2165             },
2166         }