yt_dlp/extractor/bilibili.py

   1 import base64
   2 import functools
   3 import hashlib
   4 import itertools
   5 import math
   6 import re
   7 import time
   8 import urllib.parse
   9
  10 from .common import InfoExtractor, SearchInfoExtractor
  11 from ..dependencies import Cryptodome
  12 from ..networking.exceptions import HTTPError
  13 from ..utils import (
  14     ExtractorError,
  15     GeoRestrictedError,
  16     InAdvancePagedList,
  17     OnDemandPagedList,
  18     bool_or_none,
  19     filter_dict,
  20     float_or_none,
  21     format_field,
  22     int_or_none,
  23     join_nonempty,
  24     make_archive_id,
  25     merge_dicts,
  26     mimetype2ext,
  27     parse_count,
  28     parse_qs,
  29     qualities,
  30     smuggle_url,
  31     srt_subtitles_timecode,
  32     str_or_none,
  33     traverse_obj,
  34     try_call,
  35     unified_timestamp,
  36     unsmuggle_url,
  37     url_or_none,
  38     urlencode_postdata,
  39     variadic,
  40 )
  41
  42
  43 class BilibiliBaseIE(InfoExtractor):
  44     _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
  45
  46     def extract_formats(self, play_info):
  47         format_names = {
  48             r['quality']: traverse_obj(r, 'new_description', 'display_desc')
  49             for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
  50         }
  51
  52         audios = traverse_obj(play_info, ('dash', 'audio', ...))
  53         flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
  54         if flac_audio:
  55             audios.append(flac_audio)
  56         formats = [{
  57             'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
  58             'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
  59             'acodec': audio.get('codecs'),
  60             'vcodec': 'none',
  61             'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
  62             'filesize': int_or_none(audio.get('size')),
  63             'format_id': str_or_none(audio.get('id')),
  64         } for audio in audios]
  65
  66         formats.extend({
  67             'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
  68             'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
  69             'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
  70             'width': int_or_none(video.get('width')),
  71             'height': int_or_none(video.get('height')),
  72             'vcodec': video.get('codecs'),
  73             'acodec': 'none' if audios else None,
  74             'tbr': float_or_none(video.get('bandwidth'), scale=1000),
  75             'filesize': int_or_none(video.get('size')),
  76             'quality': int_or_none(video.get('id')),
  77             'format_id': traverse_obj(
  78                 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
  79                 ('id', {str_or_none}), get_all=False),
  80             'format': format_names.get(video.get('id')),
  81         } for video in traverse_obj(play_info, ('dash', 'video', ...)))
  82
  83         missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
  84         if missing_formats:
  85             self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
  86                            f'you have to login or become premium member to download them. {self._login_hint()}')
  87
  88         return formats
  89
  90     def json2srt(self, json_data):
  91         srt_data = ''
  92         for idx, line in enumerate(json_data.get('body') or []):
  93             srt_data += (f'{idx + 1}\n'
  94                          f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
  95                          f'{line["content"]}\n\n')
  96         return srt_data
  97
  98     def _get_subtitles(self, video_id, aid, cid):
  99         subtitles = {
 100             'danmaku': [{
 101                 'ext': 'xml',
 102                 'url': f'https://comment.bilibili.com/{cid}.xml',
 103             }]
 104         }
 105
 106         video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
 107         for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
 108             subtitles.setdefault(s['lan'], []).append({
 109                 'ext': 'srt',
 110                 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
 111             })
 112         return subtitles
 113
 114     def _get_chapters(self, aid, cid):
 115         chapters = aid and cid and self._download_json(
 116             'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
 117             note='Extracting chapters', fatal=False)
 118         return traverse_obj(chapters, ('data', 'view_points', ..., {
 119             'title': 'content',
 120             'start_time': 'from',
 121             'end_time': 'to',
 122         })) or None
 123
 124     def _get_comments(self, aid):
 125         for idx in itertools.count(1):
 126             replies = traverse_obj(
 127                 self._download_json(
 128                     f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
 129                     aid, note=f'Extracting comments from page {idx}', fatal=False),
 130                 ('data', 'replies'))
 131             if not replies:
 132                 return
 133             for children in map(self._get_all_children, replies):
 134                 yield from children
 135
 136     def _get_all_children(self, reply):
 137         yield {
 138             'author': traverse_obj(reply, ('member', 'uname')),
 139             'author_id': traverse_obj(reply, ('member', 'mid')),
 140             'id': reply.get('rpid'),
 141             'text': traverse_obj(reply, ('content', 'message')),
 142             'timestamp': reply.get('ctime'),
 143             'parent': reply.get('parent') or 'root',
 144         }
 145         for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
 146             yield from children
 147
 148     def _get_episodes_from_season(self, ss_id, url):
 149         season_info = self._download_json(
 150             'https://api.bilibili.com/pgc/web/season/section', ss_id,
 151             note='Downloading season info', query={'season_id': ss_id},
 152             headers={'Referer': url, **self.geo_verification_headers()})
 153
 154         for entry in traverse_obj(season_info, (
 155                 'result', 'main_section', 'episodes',
 156                 lambda _, v: url_or_none(v['share_url']) and v['id'])):
 157             yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
 158
 159
 160 class BiliBiliIE(BilibiliBaseIE):
 161     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
 162
 163     _TESTS = [{
 164         'url': 'https://www.bilibili.com/video/BV13x41117TL',
 165         'info_dict': {
 166             'id': 'BV13x41117TL',
 167             'title': '阿滴英文｜英文歌分享#6 "Closer',
 168             'ext': 'mp4',
 169             'description': '滴妹今天唱Closer給你聽! 有史以来，被推最多次也是最久的歌曲，其实歌词跟我原本想像差蛮多的，不过还是好听！ 微博@阿滴英文',
 170             'uploader_id': '65880958',
 171             'uploader': '阿滴英文',
 172             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 173             'duration': 554.117,
 174             'tags': list,
 175             'comment_count': int,
 176             'upload_date': '20170301',
 177             'timestamp': 1488353834,
 178             'like_count': int,
 179             'view_count': int,
 180         },
 181     }, {
 182         # old av URL version
 183         'url': 'http://www.bilibili.com/video/av1074402/',
 184         'info_dict': {
 185             'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
 186             'ext': 'mp4',
 187             'uploader': '菊子桑',
 188             'uploader_id': '156160',
 189             'id': 'BV11x411K7CN',
 190             'title': '【金坷垃】金泡沫',
 191             'duration': 308.36,
 192             'upload_date': '20140420',
 193             'timestamp': 1397983878,
 194             'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
 195             'like_count': int,
 196             'comment_count': int,
 197             'view_count': int,
 198             'tags': list,
 199         },
 200         'params': {'skip_download': True},
 201     }, {
 202         'note': 'Anthology',
 203         'url': 'https://www.bilibili.com/video/BV1bK411W797',
 204         'info_dict': {
 205             'id': 'BV1bK411W797',
 206             'title': '物语中的人物是如何吐槽自己的OP的'
 207         },
 208         'playlist_count': 18,
 209         'playlist': [{
 210             'info_dict': {
 211                 'id': 'BV1bK411W797_p1',
 212                 'ext': 'mp4',
 213                 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 214                 'tags': 'count:11',
 215                 'timestamp': 1589601697,
 216                 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 217                 'uploader': '打牌还是打桩',
 218                 'uploader_id': '150259984',
 219                 'like_count': int,
 220                 'comment_count': int,
 221                 'upload_date': '20200516',
 222                 'view_count': int,
 223                 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 224                 'duration': 90.314,
 225             }
 226         }]
 227     }, {
 228         'note': 'Specific page of Anthology',
 229         'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
 230         'info_dict': {
 231             'id': 'BV1bK411W797_p1',
 232             'ext': 'mp4',
 233             'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
 234             'tags': 'count:11',
 235             'timestamp': 1589601697,
 236             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 237             'uploader': '打牌还是打桩',
 238             'uploader_id': '150259984',
 239             'like_count': int,
 240             'comment_count': int,
 241             'upload_date': '20200516',
 242             'view_count': int,
 243             'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
 244             'duration': 90.314,
 245         }
 246     }, {
 247         'note': 'video has subtitles',
 248         'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
 249         'info_dict': {
 250             'id': 'BV12N4y1M7rh',
 251             'ext': 'mp4',
 252             'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
 253             'tags': list,
 254             'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
 255             'duration': 313.557,
 256             'upload_date': '20220709',
 257             'uploader': '小夫太渴',
 258             'timestamp': 1657347907,
 259             'uploader_id': '1326814124',
 260             'comment_count': int,
 261             'view_count': int,
 262             'like_count': int,
 263             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 264             'subtitles': 'count:2'
 265         },
 266         'params': {'listsubtitles': True},
 267     }, {
 268         'url': 'https://www.bilibili.com/video/av8903802/',
 269         'info_dict': {
 270             'id': 'BV13x41117TL',
 271             'ext': 'mp4',
 272             'title': '阿滴英文｜英文歌分享#6 "Closer',
 273             'upload_date': '20170301',
 274             'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
 275             'timestamp': 1488353834,
 276             'uploader_id': '65880958',
 277             'uploader': '阿滴英文',
 278             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 279             'duration': 554.117,
 280             'tags': list,
 281             'comment_count': int,
 282             'view_count': int,
 283             'like_count': int,
 284         },
 285         'params': {
 286             'skip_download': True,
 287         },
 288     }, {
 289         'note': 'video has chapter',
 290         'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
 291         'info_dict': {
 292             'id': 'BV1vL411G7N7',
 293             'ext': 'mp4',
 294             'title': '如何为你的B站视频添加进度条分段',
 295             'timestamp': 1634554558,
 296             'upload_date': '20211018',
 297             'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
 298             'tags': list,
 299             'uploader': '爱喝咖啡的当麻',
 300             'duration': 669.482,
 301             'uploader_id': '1680903',
 302             'chapters': 'count:6',
 303             'comment_count': int,
 304             'view_count': int,
 305             'like_count': int,
 306             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 307         },
 308         'params': {'skip_download': True},
 309     }, {
 310         'note': 'video redirects to festival page',
 311         'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
 312         'info_dict': {
 313             'id': 'BV1wP4y1P72h',
 314             'ext': 'mp4',
 315             'title': '牛虎年相交之际，一首传统民族打击乐《牛斗虎》祝大家新春快乐，虎年大吉！【bilibili音乐虎闹新春】',
 316             'timestamp': 1643947497,
 317             'upload_date': '20220204',
 318             'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
 319             'uploader': '叨叨冯聊音乐',
 320             'duration': 246.719,
 321             'uploader_id': '528182630',
 322             'view_count': int,
 323             'like_count': int,
 324             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 325         },
 326         'params': {'skip_download': True},
 327     }, {
 328         'note': 'newer festival video',
 329         'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
 330         'info_dict': {
 331             'id': 'BV1ay4y1d77f',
 332             'ext': 'mp4',
 333             'title': '【崩坏3新春剧场】为特别的你送上祝福！',
 334             'timestamp': 1674273600,
 335             'upload_date': '20230121',
 336             'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
 337             'uploader': '果蝇轰',
 338             'duration': 1111.722,
 339             'uploader_id': '8469526',
 340             'view_count': int,
 341             'like_count': int,
 342             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
 343         },
 344         'params': {'skip_download': True},
 345     }]
 346
 347     def _real_extract(self, url):
 348         video_id = self._match_id(url)
 349         webpage = self._download_webpage(url, video_id)
 350         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
 351
 352         is_festival = 'videoData' not in initial_state
 353         if is_festival:
 354             video_data = initial_state['videoInfo']
 355         else:
 356             play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
 357             video_data = initial_state['videoData']
 358
 359         video_id, title = video_data['bvid'], video_data.get('title')
 360
 361         # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
 362         page_list_json = not is_festival and traverse_obj(
 363             self._download_json(
 364                 'https://api.bilibili.com/x/player/pagelist', video_id,
 365                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
 366                 note='Extracting videos in anthology'),
 367             'data', expected_type=list) or []
 368         is_anthology = len(page_list_json) > 1
 369
 370         part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
 371         if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
 372             return self.playlist_from_matches(
 373                 page_list_json, video_id, title, ie=BiliBiliIE,
 374                 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
 375
 376         if is_anthology:
 377             part_id = part_id or 1
 378             title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
 379
 380         aid = video_data.get('aid')
 381         old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
 382
 383         cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
 384
 385         festival_info = {}
 386         if is_festival:
 387             play_info = self._download_json(
 388                 'https://api.bilibili.com/x/player/playurl', video_id,
 389                 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
 390                 note='Extracting festival video formats')['data']
 391
 392             festival_info = traverse_obj(initial_state, {
 393                 'uploader': ('videoInfo', 'upName'),
 394                 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
 395                 'like_count': ('videoStatus', 'like', {int_or_none}),
 396                 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
 397             }, get_all=False)
 398
 399         return {
 400             **traverse_obj(initial_state, {
 401                 'uploader': ('upData', 'name'),
 402                 'uploader_id': ('upData', 'mid', {str_or_none}),
 403                 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
 404                 'tags': ('tags', ..., 'tag_name'),
 405                 'thumbnail': ('videoData', 'pic', {url_or_none}),
 406             }),
 407             **festival_info,
 408             **traverse_obj(video_data, {
 409                 'description': 'desc',
 410                 'timestamp': ('pubdate', {int_or_none}),
 411                 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
 412                 'comment_count': ('stat', 'reply', {int_or_none}),
 413             }, get_all=False),
 414             'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
 415             'formats': self.extract_formats(play_info),
 416             '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
 417             'title': title,
 418             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 419             'chapters': self._get_chapters(aid, cid),
 420             'subtitles': self.extract_subtitles(video_id, aid, cid),
 421             '__post_extractor': self.extract_comments(aid),
 422             'http_headers': {'Referer': url},
 423         }
 424
 425
 426 class BiliBiliBangumiIE(BilibiliBaseIE):
 427     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
 428
 429     _TESTS = [{
 430         'url': 'https://www.bilibili.com/bangumi/play/ep267851',
 431         'info_dict': {
 432             'id': '267851',
 433             'ext': 'mp4',
 434             'series': '鬼灭之刃',
 435             'series_id': '4358',
 436             'season': '鬼灭之刃',
 437             'season_id': '26801',
 438             'season_number': 1,
 439             'episode': '残酷',
 440             'episode_id': '267851',
 441             'episode_number': 1,
 442             'title': '1 残酷',
 443             'duration': 1425.256,
 444             'timestamp': 1554566400,
 445             'upload_date': '20190406',
 446             'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
 447         },
 448         'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
 449     }]
 450
 451     def _real_extract(self, url):
 452         video_id = self._match_id(url)
 453         episode_id = video_id[2:]
 454         webpage = self._download_webpage(url, video_id)
 455
 456         if '您所在的地区无法观看本片' in webpage:
 457             raise GeoRestrictedError('This video is restricted')
 458         elif '正在观看预览，大会员免费看全片' in webpage:
 459             self.raise_login_required('This video is for premium members only')
 460
 461         headers = {'Referer': url, **self.geo_verification_headers()}
 462         play_info = self._download_json(
 463             'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
 464             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
 465             headers=headers)
 466         premium_only = play_info.get('code') == -10403
 467         play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
 468
 469         formats = self.extract_formats(play_info)
 470         if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
 471             self.raise_login_required('This video is for premium members only')
 472
 473         bangumi_info = self._download_json(
 474             'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
 475             query={'ep_id': episode_id}, headers=headers)['result']
 476
 477         episode_number, episode_info = next((
 478             (idx, ep) for idx, ep in enumerate(traverse_obj(
 479                 bangumi_info, ('episodes', ..., {dict})), 1)
 480             if str_or_none(ep.get('id')) == episode_id), (1, {}))
 481
 482         season_id = bangumi_info.get('season_id')
 483         season_number = season_id and next((
 484             idx + 1 for idx, e in enumerate(
 485                 traverse_obj(bangumi_info, ('seasons', ...)))
 486             if e.get('season_id') == season_id
 487         ), None)
 488
 489         aid = episode_info.get('aid')
 490
 491         return {
 492             'id': video_id,
 493             'formats': formats,
 494             **traverse_obj(bangumi_info, {
 495                 'series': ('series', 'series_title', {str}),
 496                 'series_id': ('series', 'series_id', {str_or_none}),
 497                 'thumbnail': ('square_cover', {url_or_none}),
 498             }),
 499             'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
 500             'episode': episode_info.get('long_title'),
 501             'episode_id': episode_id,
 502             'episode_number': int_or_none(episode_info.get('title')) or episode_number,
 503             'season_id': str_or_none(season_id),
 504             'season_number': season_number,
 505             'timestamp': int_or_none(episode_info.get('pub_time')),
 506             'duration': float_or_none(play_info.get('timelength'), scale=1000),
 507             'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
 508             '__post_extractor': self.extract_comments(aid),
 509             'http_headers': headers,
 510         }
 511
 512
 513 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
 514     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
 515     _TESTS = [{
 516         'url': 'https://www.bilibili.com/bangumi/media/md24097891',
 517         'info_dict': {
 518             'id': '24097891',
 519         },
 520         'playlist_mincount': 25,
 521     }]
 522
 523     def _real_extract(self, url):
 524         media_id = self._match_id(url)
 525         webpage = self._download_webpage(url, media_id)
 526         ss_id = self._search_json(
 527             r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
 528
 529         return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
 530
 531
 532 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
 533     _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
 534     _TESTS = [{
 535         'url': 'https://www.bilibili.com/bangumi/play/ss26801',
 536         'info_dict': {
 537             'id': '26801'
 538         },
 539         'playlist_mincount': 26
 540     }]
 541
 542     def _real_extract(self, url):
 543         ss_id = self._match_id(url)
 544
 545         return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
 546
 547
 548 class BilibiliSpaceBaseIE(InfoExtractor):
 549     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 550         first_page = fetch_page(0)
 551         metadata = get_metadata(first_page)
 552
 553         paged_list = InAdvancePagedList(
 554             lambda idx: get_entries(fetch_page(idx) if idx else first_page),
 555             metadata['page_count'], metadata['page_size'])
 556
 557         return metadata, paged_list
 558
 559
 560 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
 561     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
 562     _TESTS = [{
 563         'url': 'https://space.bilibili.com/3985676/video',
 564         'info_dict': {
 565             'id': '3985676',
 566         },
 567         'playlist_mincount': 178,
 568     }, {
 569         'url': 'https://space.bilibili.com/313580179/video',
 570         'info_dict': {
 571             'id': '313580179',
 572         },
 573         'playlist_mincount': 92,
 574     }]
 575
 576     def _extract_signature(self, playlist_id):
 577         session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
 578
 579         key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
 580         img_key = traverse_obj(
 581             session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
 582         sub_key = traverse_obj(
 583             session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
 584
 585         session_key = img_key + sub_key
 586
 587         signature_values = []
 588         for position in (
 589             46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
 590             12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
 591             57, 62, 11, 36, 20, 34, 44, 52
 592         ):
 593             char_at_position = try_call(lambda: session_key[position])
 594             if char_at_position:
 595                 signature_values.append(char_at_position)
 596
 597         return ''.join(signature_values)[:32]
 598
 599     def _real_extract(self, url):
 600         playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
 601         if not is_video_url:
 602             self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
 603                            'To download audios, add a "/audio" to the URL')
 604
 605         signature = self._extract_signature(playlist_id)
 606
 607         def fetch_page(page_idx):
 608             query = {
 609                 'keyword': '',
 610                 'mid': playlist_id,
 611                 'order': 'pubdate',
 612                 'order_avoided': 'true',
 613                 'platform': 'web',
 614                 'pn': page_idx + 1,
 615                 'ps': 30,
 616                 'tid': 0,
 617                 'web_location': 1550101,
 618                 'wts': int(time.time()),
 619             }
 620             query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
 621
 622             try:
 623                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
 624                                                playlist_id, note=f'Downloading page {page_idx}', query=query)
 625             except ExtractorError as e:
 626                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
 627                     raise ExtractorError(
 628                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
 629                 raise
 630             if response['code'] == -401:
 631                 raise ExtractorError(
 632                     'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
 633             return response['data']
 634
 635         def get_metadata(page_data):
 636             page_size = page_data['page']['ps']
 637             entry_count = page_data['page']['count']
 638             return {
 639                 'page_count': math.ceil(entry_count / page_size),
 640                 'page_size': page_size,
 641             }
 642
 643         def get_entries(page_data):
 644             for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
 645                 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
 646
 647         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 648         return self.playlist_result(paged_list, playlist_id)
 649
 650
 651 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
 652     _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
 653     _TESTS = [{
 654         'url': 'https://space.bilibili.com/313580179/audio',
 655         'info_dict': {
 656             'id': '313580179',
 657         },
 658         'playlist_mincount': 1,
 659     }]
 660
 661     def _real_extract(self, url):
 662         playlist_id = self._match_id(url)
 663
 664         def fetch_page(page_idx):
 665             return self._download_json(
 666                 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
 667                 note=f'Downloading page {page_idx}',
 668                 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
 669
 670         def get_metadata(page_data):
 671             return {
 672                 'page_count': page_data['pageCount'],
 673                 'page_size': page_data['pageSize'],
 674             }
 675
 676         def get_entries(page_data):
 677             for entry in page_data.get('data', []):
 678                 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
 679
 680         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 681         return self.playlist_result(paged_list, playlist_id)
 682
 683
 684 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
 685     def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
 686         for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
 687             yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
 688
 689     def _get_uploader(self, uid, playlist_id):
 690         webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
 691         return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
 692
 693     def _extract_playlist(self, fetch_page, get_metadata, get_entries):
 694         metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
 695         metadata.pop('page_count', None)
 696         metadata.pop('page_size', None)
 697         return metadata, page_list
 698
 699
 700 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
 701     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
 702     _TESTS = [{
 703         'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
 704         'info_dict': {
 705             'id': '2142762_57445',
 706             'title': '【完结】《底特律 变人》全结局流程解说',
 707             'description': '',
 708             'uploader': '老戴在此',
 709             'uploader_id': '2142762',
 710             'timestamp': int,
 711             'upload_date': str,
 712             'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
 713         },
 714         'playlist_mincount': 31,
 715     }]
 716
 717     def _real_extract(self, url):
 718         mid, sid = self._match_valid_url(url).group('mid', 'sid')
 719         playlist_id = f'{mid}_{sid}'
 720
 721         def fetch_page(page_idx):
 722             return self._download_json(
 723                 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
 724                 playlist_id, note=f'Downloading page {page_idx}',
 725                 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
 726
 727         def get_metadata(page_data):
 728             page_size = page_data['page']['page_size']
 729             entry_count = page_data['page']['total']
 730             return {
 731                 'page_count': math.ceil(entry_count / page_size),
 732                 'page_size': page_size,
 733                 'uploader': self._get_uploader(mid, playlist_id),
 734                 **traverse_obj(page_data, {
 735                     'title': ('meta', 'name', {str}),
 736                     'description': ('meta', 'description', {str}),
 737                     'uploader_id': ('meta', 'mid', {str_or_none}),
 738                     'timestamp': ('meta', 'ptime', {int_or_none}),
 739                     'thumbnail': ('meta', 'cover', {url_or_none}),
 740                 })
 741             }
 742
 743         def get_entries(page_data):
 744             return self._get_entries(page_data, 'archives')
 745
 746         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 747         return self.playlist_result(paged_list, playlist_id, **metadata)
 748
 749
 750 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
 751     _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
 752     _TESTS = [{
 753         'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
 754         'info_dict': {
 755             'id': '1958703906_547718',
 756             'title': '直播回放',
 757             'description': '直播回放',
 758             'uploader': '靡烟miya',
 759             'uploader_id': '1958703906',
 760             'timestamp': 1637985853,
 761             'upload_date': '20211127',
 762             'modified_timestamp': int,
 763             'modified_date': str,
 764         },
 765         'playlist_mincount': 513,
 766     }]
 767
 768     def _real_extract(self, url):
 769         mid, sid = self._match_valid_url(url).group('mid', 'sid')
 770         playlist_id = f'{mid}_{sid}'
 771         playlist_meta = traverse_obj(self._download_json(
 772             f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
 773         ), {
 774             'title': ('data', 'meta', 'name', {str}),
 775             'description': ('data', 'meta', 'description', {str}),
 776             'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
 777             'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
 778             'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
 779         })
 780
 781         def fetch_page(page_idx):
 782             return self._download_json(
 783                 'https://api.bilibili.com/x/series/archives',
 784                 playlist_id, note=f'Downloading page {page_idx}',
 785                 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
 786
 787         def get_metadata(page_data):
 788             page_size = page_data['page']['size']
 789             entry_count = page_data['page']['total']
 790             return {
 791                 'page_count': math.ceil(entry_count / page_size),
 792                 'page_size': page_size,
 793                 'uploader': self._get_uploader(mid, playlist_id),
 794                 **playlist_meta
 795             }
 796
 797         def get_entries(page_data):
 798             return self._get_entries(page_data, 'archives')
 799
 800         metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
 801         return self.playlist_result(paged_list, playlist_id, **metadata)
 802
 803
 804 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
 805     _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
 806     _TESTS = [{
 807         'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
 808         'info_dict': {
 809             'id': '1103407912',
 810             'title': '【V2】（旧）',
 811             'description': '',
 812             'uploader': '晓月春日',
 813             'uploader_id': '84912',
 814             'timestamp': 1604905176,
 815             'upload_date': '20201109',
 816             'modified_timestamp': int,
 817             'modified_date': str,
 818             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
 819             'view_count': int,
 820             'like_count': int,
 821         },
 822         'playlist_mincount': 22,
 823     }, {
 824         'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
 825         'only_matching': True,
 826     }]
 827
 828     def _real_extract(self, url):
 829         fid = self._match_id(url)
 830
 831         list_info = self._download_json(
 832             f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
 833             fid, note='Downloading favlist metadata')
 834         if list_info['code'] == -403:
 835             self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
 836
 837         entries = self._get_entries(self._download_json(
 838             f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
 839             fid, note='Download favlist entries'), 'data')
 840
 841         return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
 842             'title': ('title', {str}),
 843             'description': ('intro', {str}),
 844             'uploader': ('upper', 'name', {str}),
 845             'uploader_id': ('upper', 'mid', {str_or_none}),
 846             'timestamp': ('ctime', {int_or_none}),
 847             'modified_timestamp': ('mtime', {int_or_none}),
 848             'thumbnail': ('cover', {url_or_none}),
 849             'view_count': ('cnt_info', 'play', {int_or_none}),
 850             'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
 851         })))
 852
 853
 854 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
 855     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
 856     _TESTS = [{
 857         'url': 'https://www.bilibili.com/watchlater/#/list',
 858         'info_dict': {'id': 'watchlater'},
 859         'playlist_mincount': 0,
 860         'skip': 'login required',
 861     }]
 862
 863     def _real_extract(self, url):
 864         list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
 865         watchlater_info = self._download_json(
 866             'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
 867         if watchlater_info['code'] == -101:
 868             self.raise_login_required(msg='You need to login to access your watchlater list')
 869         entries = self._get_entries(watchlater_info, ('data', 'list'))
 870         return self.playlist_result(entries, id=list_id, title='稍后再看')
 871
 872
 873 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
 874     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
 875     _TESTS = [{
 876         'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
 877         'info_dict': {
 878             'id': '5_547718',
 879             'title': '直播回放',
 880             'uploader': '靡烟miya',
 881             'uploader_id': '1958703906',
 882             'timestamp': 1637985853,
 883             'upload_date': '20211127',
 884         },
 885         'playlist_mincount': 513,
 886     }, {
 887         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
 888         'info_dict': {
 889             'id': '5_547718',
 890         },
 891         'playlist_mincount': 513,
 892         'skip': 'redirect url',
 893     }, {
 894         'url': 'https://www.bilibili.com/list/ml1103407912',
 895         'info_dict': {
 896             'id': '3_1103407912',
 897             'title': '【V2】（旧）',
 898             'uploader': '晓月春日',
 899             'uploader_id': '84912',
 900             'timestamp': 1604905176,
 901             'upload_date': '20201109',
 902             'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
 903         },
 904         'playlist_mincount': 22,
 905     }, {
 906         'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
 907         'info_dict': {
 908             'id': '3_1103407912',
 909         },
 910         'playlist_mincount': 22,
 911         'skip': 'redirect url',
 912     }, {
 913         'url': 'https://www.bilibili.com/list/watchlater',
 914         'info_dict': {'id': 'watchlater'},
 915         'playlist_mincount': 0,
 916         'skip': 'login required',
 917     }, {
 918         'url': 'https://www.bilibili.com/medialist/play/watchlater',
 919         'info_dict': {'id': 'watchlater'},
 920         'playlist_mincount': 0,
 921         'skip': 'login required',
 922     }]
 923
 924     def _extract_medialist(self, query, list_id):
 925         for page_num in itertools.count(1):
 926             page_data = self._download_json(
 927                 'https://api.bilibili.com/x/v2/medialist/resource/list',
 928                 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
 929             )['data']
 930             yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
 931             query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
 932             if not page_data.get('has_more', False):
 933                 break
 934
 935     def _real_extract(self, url):
 936         list_id = self._match_id(url)
 937         webpage = self._download_webpage(url, list_id)
 938         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
 939         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
 940             error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
 941             error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
 942             if error_code == -400 and list_id == 'watchlater':
 943                 self.raise_login_required('You need to login to access your watchlater playlist')
 944             elif error_code == -403:
 945                 self.raise_login_required('This is a private playlist. You need to login as its owner')
 946             elif error_code == 11010:
 947                 raise ExtractorError('Playlist is no longer available', expected=True)
 948             raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
 949
 950         query = {
 951             'ps': 20,
 952             'with_current': False,
 953             **traverse_obj(initial_state, {
 954                 'type': ('playlist', 'type', {int_or_none}),
 955                 'biz_id': ('playlist', 'id', {int_or_none}),
 956                 'tid': ('tid', {int_or_none}),
 957                 'sort_field': ('sortFiled', {int_or_none}),
 958                 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
 959             })
 960         }
 961         metadata = {
 962             'id': f'{query["type"]}_{query["biz_id"]}',
 963             **traverse_obj(initial_state, ('mediaListInfo', {
 964                 'title': ('title', {str}),
 965                 'uploader': ('upper', 'name', {str}),
 966                 'uploader_id': ('upper', 'mid', {str_or_none}),
 967                 'timestamp': ('ctime', {int_or_none}),
 968                 'thumbnail': ('cover', {url_or_none}),
 969             })),
 970         }
 971         return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
 972
 973
 974 class BilibiliCategoryIE(InfoExtractor):
 975     IE_NAME = 'Bilibili category extractor'
 976     _MAX_RESULTS = 1000000
 977     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
 978     _TESTS = [{
 979         'url': 'https://www.bilibili.com/v/kichiku/mad',
 980         'info_dict': {
 981             'id': 'kichiku: mad',
 982             'title': 'kichiku: mad'
 983         },
 984         'playlist_mincount': 45,
 985         'params': {
 986             'playlistend': 45
 987         }
 988     }]
 989
 990     def _fetch_page(self, api_url, num_pages, query, page_num):
 991         parsed_json = self._download_json(
 992             api_url, query, query={'Search_key': query, 'pn': page_num},
 993             note='Extracting results from page %s of %s' % (page_num, num_pages))
 994
 995         video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
 996         if not video_list:
 997             raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
 998
 999         for video in video_list:
1000             yield self.url_result(
1001                 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1002
1003     def _entries(self, category, subcategory, query):
1004         # map of categories : subcategories : RIDs
1005         rid_map = {
1006             'kichiku': {
1007                 'mad': 26,
1008                 'manual_vocaloid': 126,
1009                 'guide': 22,
1010                 'theatre': 216,
1011                 'course': 127
1012             },
1013         }
1014
1015         if category not in rid_map:
1016             raise ExtractorError(
1017                 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1018         if subcategory not in rid_map[category]:
1019             raise ExtractorError(
1020                 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1021         rid_value = rid_map[category][subcategory]
1022
1023         api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1024         page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1025         page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1026         count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1027         if count is None or not size:
1028             raise ExtractorError('Failed to calculate either page count or size')
1029
1030         num_pages = math.ceil(count / size)
1031
1032         return OnDemandPagedList(functools.partial(
1033             self._fetch_page, api_url, num_pages, query), size)
1034
1035     def _real_extract(self, url):
1036         category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1037         query = '%s: %s' % (category, subcategory)
1038
1039         return self.playlist_result(self._entries(category, subcategory, query), query, query)
1040
1041
1042 class BiliBiliSearchIE(SearchInfoExtractor):
1043     IE_DESC = 'Bilibili video search'
1044     _MAX_RESULTS = 100000
1045     _SEARCH_KEY = 'bilisearch'
1046
1047     def _search_results(self, query):
1048         for page_num in itertools.count(1):
1049             videos = self._download_json(
1050                 'https://api.bilibili.com/x/web-interface/search/type', query,
1051                 note=f'Extracting results from page {page_num}', query={
1052                     'Search_key': query,
1053                     'keyword': query,
1054                     'page': page_num,
1055                     'context': '',
1056                     'duration': 0,
1057                     'tids_2': '',
1058                     '__refresh__': 'true',
1059                     'search_type': 'video',
1060                     'tids': 0,
1061                     'highlight': 1,
1062                 })['data'].get('result')
1063             if not videos:
1064                 break
1065             for video in videos:
1066                 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1067
1068
1069 class BilibiliAudioBaseIE(InfoExtractor):
1070     def _call_api(self, path, sid, query=None):
1071         if not query:
1072             query = {'sid': sid}
1073         return self._download_json(
1074             'https://www.bilibili.com/audio/music-service-c/web/' + path,
1075             sid, query=query)['data']
1076
1077
1078 class BilibiliAudioIE(BilibiliAudioBaseIE):
1079     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1080     _TEST = {
1081         'url': 'https://www.bilibili.com/audio/au1003142',
1082         'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1083         'info_dict': {
1084             'id': '1003142',
1085             'ext': 'm4a',
1086             'title': '【tsukimi】YELLOW / 神山羊',
1087             'artist': 'tsukimi',
1088             'comment_count': int,
1089             'description': 'YELLOW的mp3版！',
1090             'duration': 183,
1091             'subtitles': {
1092                 'origin': [{
1093                     'ext': 'lrc',
1094                 }],
1095             },
1096             'thumbnail': r're:^https?://.+\.jpg',
1097             'timestamp': 1564836614,
1098             'upload_date': '20190803',
1099             'uploader': 'tsukimi-つきみぐー',
1100             'view_count': int,
1101         },
1102     }
1103
1104     def _real_extract(self, url):
1105         au_id = self._match_id(url)
1106
1107         play_data = self._call_api('url', au_id)
1108         formats = [{
1109             'url': play_data['cdns'][0],
1110             'filesize': int_or_none(play_data.get('size')),
1111             'vcodec': 'none'
1112         }]
1113
1114         for a_format in formats:
1115             a_format.setdefault('http_headers', {}).update({
1116                 'Referer': url,
1117             })
1118
1119         song = self._call_api('song/info', au_id)
1120         title = song['title']
1121         statistic = song.get('statistic') or {}
1122
1123         subtitles = None
1124         lyric = song.get('lyric')
1125         if lyric:
1126             subtitles = {
1127                 'origin': [{
1128                     'url': lyric,
1129                 }]
1130             }
1131
1132         return {
1133             'id': au_id,
1134             'title': title,
1135             'formats': formats,
1136             'artist': song.get('author'),
1137             'comment_count': int_or_none(statistic.get('comment')),
1138             'description': song.get('intro'),
1139             'duration': int_or_none(song.get('duration')),
1140             'subtitles': subtitles,
1141             'thumbnail': song.get('cover'),
1142             'timestamp': int_or_none(song.get('passtime')),
1143             'uploader': song.get('uname'),
1144             'view_count': int_or_none(statistic.get('play')),
1145         }
1146
1147
1148 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1149     _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1150     _TEST = {
1151         'url': 'https://www.bilibili.com/audio/am10624',
1152         'info_dict': {
1153             'id': '10624',
1154             'title': '每日新曲推荐（每日11:00更新）',
1155             'description': '每天11:00更新，为你推送最新音乐',
1156         },
1157         'playlist_count': 19,
1158     }
1159
1160     def _real_extract(self, url):
1161         am_id = self._match_id(url)
1162
1163         songs = self._call_api(
1164             'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1165
1166         entries = []
1167         for song in songs:
1168             sid = str_or_none(song.get('id'))
1169             if not sid:
1170                 continue
1171             entries.append(self.url_result(
1172                 'https://www.bilibili.com/audio/au' + sid,
1173                 BilibiliAudioIE.ie_key(), sid))
1174
1175         if entries:
1176             album_data = self._call_api('menu/info', am_id) or {}
1177             album_title = album_data.get('title')
1178             if album_title:
1179                 for entry in entries:
1180                     entry['album'] = album_title
1181                 return self.playlist_result(
1182                     entries, am_id, album_title, album_data.get('intro'))
1183
1184         return self.playlist_result(entries, am_id)
1185
1186
1187 class BiliBiliPlayerIE(InfoExtractor):
1188     _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1189     _TEST = {
1190         'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1191         'only_matching': True,
1192     }
1193
1194     def _real_extract(self, url):
1195         video_id = self._match_id(url)
1196         return self.url_result(
1197             'http://www.bilibili.tv/video/av%s/' % video_id,
1198             ie=BiliBiliIE.ie_key(), video_id=video_id)
1199
1200
1201 class BiliIntlBaseIE(InfoExtractor):
1202     _API_URL = 'https://api.bilibili.tv/intl/gateway'
1203     _NETRC_MACHINE = 'biliintl'
1204
1205     def _call_api(self, endpoint, *args, **kwargs):
1206         json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1207         if json.get('code'):
1208             if json['code'] in (10004004, 10004005, 10023006):
1209                 self.raise_login_required()
1210             elif json['code'] == 10004001:
1211                 self.raise_geo_restricted()
1212             else:
1213                 if json.get('message') and str(json['code']) != json['message']:
1214                     errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1215                 else:
1216                     errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1217                 if kwargs.get('fatal'):
1218                     raise ExtractorError(errmsg)
1219                 else:
1220                     self.report_warning(errmsg)
1221         return json.get('data')
1222
1223     def json2srt(self, json):
1224         data = '\n\n'.join(
1225             f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1226             for i, line in enumerate(traverse_obj(json, (
1227                 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1228         return data
1229
1230     def _get_subtitles(self, *, ep_id=None, aid=None):
1231         sub_json = self._call_api(
1232             '/web/v2/subtitle', ep_id or aid, fatal=False,
1233             note='Downloading subtitles list', errnote='Unable to download subtitles list',
1234             query=filter_dict({
1235                 'platform': 'web',
1236                 's_locale': 'en_US',
1237                 'episode_id': ep_id,
1238                 'aid': aid,
1239             })) or {}
1240         subtitles = {}
1241         for sub in sub_json.get('subtitles') or []:
1242             sub_url = sub.get('url')
1243             if not sub_url:
1244                 continue
1245             sub_data = self._download_json(
1246                 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
1247                 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
1248             if not sub_data:
1249                 continue
1250             subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
1251                 'ext': 'srt',
1252                 'data': self.json2srt(sub_data)
1253             })
1254         return subtitles
1255
1256     def _get_formats(self, *, ep_id=None, aid=None):
1257         video_json = self._call_api(
1258             '/web/playurl', ep_id or aid, note='Downloading video formats',
1259             errnote='Unable to download video formats', query=filter_dict({
1260                 'platform': 'web',
1261                 'ep_id': ep_id,
1262                 'aid': aid,
1263             }))
1264         video_json = video_json['playurl']
1265         formats = []
1266         for vid in video_json.get('video') or []:
1267             video_res = vid.get('video_resource') or {}
1268             video_info = vid.get('stream_info') or {}
1269             if not video_res.get('url'):
1270                 continue
1271             formats.append({
1272                 'url': video_res['url'],
1273                 'ext': 'mp4',
1274                 'format_note': video_info.get('desc_words'),
1275                 'width': video_res.get('width'),
1276                 'height': video_res.get('height'),
1277                 'vbr': video_res.get('bandwidth'),
1278                 'acodec': 'none',
1279                 'vcodec': video_res.get('codecs'),
1280                 'filesize': video_res.get('size'),
1281             })
1282         for aud in video_json.get('audio_resource') or []:
1283             if not aud.get('url'):
1284                 continue
1285             formats.append({
1286                 'url': aud['url'],
1287                 'ext': 'mp4',
1288                 'abr': aud.get('bandwidth'),
1289                 'acodec': aud.get('codecs'),
1290                 'vcodec': 'none',
1291                 'filesize': aud.get('size'),
1292             })
1293
1294         return formats
1295
1296     def _parse_video_metadata(self, video_data):
1297         return {
1298             'title': video_data.get('title_display') or video_data.get('title'),
1299             'thumbnail': video_data.get('cover'),
1300             'episode_number': int_or_none(self._search_regex(
1301                 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1302         }
1303
1304     def _perform_login(self, username, password):
1305         if not Cryptodome.RSA:
1306             raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1307
1308         key_data = self._download_json(
1309             'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1310             note='Downloading login key', errnote='Unable to download login key')['data']
1311
1312         public_key = Cryptodome.RSA.importKey(key_data['key'])
1313         password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1314         login_post = self._download_json(
1315             'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1316                 'username': username,
1317                 'password': base64.b64encode(password_hash).decode('ascii'),
1318                 'keep_me': 'true',
1319                 's_locale': 'en_US',
1320                 'isTrusted': 'true'
1321             }), note='Logging in', errnote='Unable to log in')
1322         if login_post.get('code'):
1323             if login_post.get('message'):
1324                 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1325             else:
1326                 raise ExtractorError('Unable to log in')
1327
1328
1329 class BiliIntlIE(BiliIntlBaseIE):
1330     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1331     _TESTS = [{
1332         # Bstation page
1333         'url': 'https://www.bilibili.tv/en/play/34613/341736',
1334         'info_dict': {
1335             'id': '341736',
1336             'ext': 'mp4',
1337             'title': 'E2 - The First Night',
1338             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1339             'episode_number': 2,
1340             'upload_date': '20201009',
1341             'episode': 'Episode 2',
1342             'timestamp': 1602259500,
1343             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1344             'chapters': [{
1345                 'start_time': 0,
1346                 'end_time': 76.242,
1347                 'title': '<Untitled Chapter 1>'
1348             }, {
1349                 'start_time': 76.242,
1350                 'end_time': 161.161,
1351                 'title': 'Intro'
1352             }, {
1353                 'start_time': 1325.742,
1354                 'end_time': 1403.903,
1355                 'title': 'Outro'
1356             }],
1357         }
1358     }, {
1359         # Non-Bstation page
1360         'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1361         'info_dict': {
1362             'id': '11005006',
1363             'ext': 'mp4',
1364             'title': 'E3 - Who?',
1365             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1366             'episode_number': 3,
1367             'description': 'md5:e1a775e71a35c43f141484715470ad09',
1368             'episode': 'Episode 3',
1369             'upload_date': '20211219',
1370             'timestamp': 1639928700,
1371             'chapters': [{
1372                 'start_time': 0,
1373                 'end_time': 88.0,
1374                 'title': '<Untitled Chapter 1>'
1375             }, {
1376                 'start_time': 88.0,
1377                 'end_time': 156.0,
1378                 'title': 'Intro'
1379             }, {
1380                 'start_time': 1173.0,
1381                 'end_time': 1259.535,
1382                 'title': 'Outro'
1383             }],
1384         }
1385     }, {
1386         # Subtitle with empty content
1387         'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1388         'info_dict': {
1389             'id': '10131790',
1390             'ext': 'mp4',
1391             'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1392             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1393             'episode_number': 140,
1394         },
1395         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1396     }, {
1397         'url': 'https://www.bilibili.tv/en/video/2041863208',
1398         'info_dict': {
1399             'id': '2041863208',
1400             'ext': 'mp4',
1401             'timestamp': 1670874843,
1402             'description': 'Scheduled for April 2023.\nStudio: ufotable',
1403             'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1404             'upload_date': '20221212',
1405             'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1406         },
1407     }, {
1408         # episode comment extraction
1409         'url': 'https://www.bilibili.tv/en/play/34580/340317',
1410         'info_dict': {
1411             'id': '340317',
1412             'ext': 'mp4',
1413             'timestamp': 1604057820,
1414             'upload_date': '20201030',
1415             'episode_number': 5,
1416             'title': 'E5 - My Own Steel',
1417             'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1418             'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1419             'episode': 'Episode 5',
1420             'comment_count': int,
1421             'chapters': [{
1422                 'start_time': 0,
1423                 'end_time': 61.0,
1424                 'title': '<Untitled Chapter 1>'
1425             }, {
1426                 'start_time': 61.0,
1427                 'end_time': 134.0,
1428                 'title': 'Intro'
1429             }, {
1430                 'start_time': 1290.0,
1431                 'end_time': 1379.0,
1432                 'title': 'Outro'
1433             }],
1434         },
1435         'params': {
1436             'getcomments': True
1437         }
1438     }, {
1439         # user generated content comment extraction
1440         'url': 'https://www.bilibili.tv/en/video/2045730385',
1441         'info_dict': {
1442             'id': '2045730385',
1443             'ext': 'mp4',
1444             'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1445             'timestamp': 1667891924,
1446             'upload_date': '20221108',
1447             'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1448             'comment_count': int,
1449             'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1450         },
1451         'params': {
1452             'getcomments': True
1453         }
1454     }, {
1455         # episode id without intro and outro
1456         'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1457         'info_dict': {
1458             'id': '11246489',
1459             'ext': 'mp4',
1460             'title': 'E1 - Operation \'Strix\' <Owl>',
1461             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1462             'timestamp': 1649516400,
1463             'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1464             'episode': 'Episode 1',
1465             'episode_number': 1,
1466             'upload_date': '20220409',
1467         },
1468     }, {
1469         'url': 'https://www.biliintl.com/en/play/34613/341736',
1470         'only_matching': True,
1471     }, {
1472         # User-generated content (as opposed to a series licensed from a studio)
1473         'url': 'https://bilibili.tv/en/video/2019955076',
1474         'only_matching': True,
1475     }, {
1476         # No language in URL
1477         'url': 'https://www.bilibili.tv/video/2019955076',
1478         'only_matching': True,
1479     }, {
1480         # Uppercase language in URL
1481         'url': 'https://www.bilibili.tv/EN/video/2019955076',
1482         'only_matching': True,
1483     }]
1484
1485     def _make_url(video_id, series_id=None):
1486         if series_id:
1487             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1488         return f'https://www.bilibili.tv/en/video/{video_id}'
1489
1490     def _extract_video_metadata(self, url, video_id, season_id):
1491         url, smuggled_data = unsmuggle_url(url, {})
1492         if smuggled_data.get('title'):
1493             return smuggled_data
1494
1495         webpage = self._download_webpage(url, video_id)
1496         # Bstation layout
1497         initial_data = (
1498             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1499             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1500         video_data = traverse_obj(
1501             initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1502
1503         if season_id and not video_data:
1504             # Non-Bstation layout, read through episode list
1505             season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1506             video_data = traverse_obj(season_json, (
1507                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1508             ), expected_type=dict, get_all=False)
1509
1510         # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1511         return merge_dicts(
1512             self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
1513                 'title': self._html_search_meta('og:title', webpage),
1514                 'description': self._html_search_meta('og:description', webpage)
1515             })
1516
1517     def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1518         comment_api_raw_data = self._download_json(
1519             'https://api.bilibili.tv/reply/web/detail', display_id,
1520             note=f'Downloading reply comment of {root_id} - {next_id}',
1521             query={
1522                 'platform': 'web',
1523                 'ps': 20,  # comment's reply per page (default: 3)
1524                 'root': root_id,
1525                 'next': next_id,
1526             })
1527
1528         for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1529             yield {
1530                 'author': traverse_obj(replies, ('member', 'name')),
1531                 'author_id': traverse_obj(replies, ('member', 'mid')),
1532                 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1533                 'text': traverse_obj(replies, ('content', 'message')),
1534                 'id': replies.get('rpid'),
1535                 'like_count': int_or_none(replies.get('like_count')),
1536                 'parent': replies.get('parent'),
1537                 'timestamp': unified_timestamp(replies.get('ctime_text'))
1538             }
1539
1540         if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1541             yield from self._get_comments_reply(
1542                 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1543
1544     def _get_comments(self, video_id, ep_id):
1545         for i in itertools.count(0):
1546             comment_api_raw_data = self._download_json(
1547                 'https://api.bilibili.tv/reply/web/root', video_id,
1548                 note=f'Downloading comment page {i + 1}',
1549                 query={
1550                     'platform': 'web',
1551                     'pn': i,  # page number
1552                     'ps': 20,  # comment per page (default: 20)
1553                     'oid': video_id,
1554                     'type': 3 if ep_id else 1,  # 1: user generated content, 3: series content
1555                     'sort_type': 1,  # 1: best, 2: recent
1556                 })
1557
1558             for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1559                 yield {
1560                     'author': traverse_obj(replies, ('member', 'name')),
1561                     'author_id': traverse_obj(replies, ('member', 'mid')),
1562                     'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1563                     'text': traverse_obj(replies, ('content', 'message')),
1564                     'id': replies.get('rpid'),
1565                     'like_count': int_or_none(replies.get('like_count')),
1566                     'timestamp': unified_timestamp(replies.get('ctime_text')),
1567                     'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1568                 }
1569                 if replies.get('count'):
1570                     yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1571
1572             if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1573                 break
1574
1575     def _real_extract(self, url):
1576         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1577         video_id = ep_id or aid
1578         chapters = None
1579
1580         if ep_id:
1581             intro_ending_json = self._call_api(
1582                 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1583                 video_id, fatal=False) or {}
1584             if intro_ending_json.get('skip'):
1585                 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1586                 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1587                 chapters = [{
1588                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1589                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1590                     'title': 'Intro'
1591                 }, {
1592                     'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1593                     'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1594                     'title': 'Outro'
1595                 }]
1596
1597         return {
1598             'id': video_id,
1599             **self._extract_video_metadata(url, video_id, season_id),
1600             'formats': self._get_formats(ep_id=ep_id, aid=aid),
1601             'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
1602             'chapters': chapters,
1603             '__post_extractor': self.extract_comments(video_id, ep_id)
1604         }
1605
1606
1607 class BiliIntlSeriesIE(BiliIntlBaseIE):
1608     IE_NAME = 'biliIntl:series'
1609     _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
1610     _TESTS = [{
1611         'url': 'https://www.bilibili.tv/en/play/34613',
1612         'playlist_mincount': 15,
1613         'info_dict': {
1614             'id': '34613',
1615             'title': 'TONIKAWA: Over the Moon For You',
1616             'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1617             'categories': ['Slice of life', 'Comedy', 'Romance'],
1618             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1619             'view_count': int,
1620         },
1621         'params': {
1622             'skip_download': True,
1623         },
1624     }, {
1625         'url': 'https://www.bilibili.tv/en/media/1048837',
1626         'info_dict': {
1627             'id': '1048837',
1628             'title': 'SPY×FAMILY',
1629             'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1630             'categories': ['Adventure', 'Action', 'Comedy'],
1631             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1632             'view_count': int,
1633         },
1634         'playlist_mincount': 25,
1635     }, {
1636         'url': 'https://www.biliintl.com/en/play/34613',
1637         'only_matching': True,
1638     }, {
1639         'url': 'https://www.biliintl.com/EN/play/34613',
1640         'only_matching': True,
1641     }]
1642
1643     def _entries(self, series_id):
1644         series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
1645         for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1646             episode_id = str(episode['episode_id'])
1647             yield self.url_result(smuggle_url(
1648                 BiliIntlIE._make_url(episode_id, series_id),
1649                 self._parse_video_metadata(episode)
1650             ), BiliIntlIE, episode_id)
1651
1652     def _real_extract(self, url):
1653         series_id = self._match_id(url)
1654         series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1655         return self.playlist_result(
1656             self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1657             categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1658             thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
1659
1660
1661 class BiliLiveIE(InfoExtractor):
1662     _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
1663
1664     _TESTS = [{
1665         'url': 'https://live.bilibili.com/196',
1666         'info_dict': {
1667             'id': '33989',
1668             'description': "周六杂谈回，其他时候随机游戏。 | \n录播：@下播型泛式录播组。 | \n直播通知群（全员禁言）：666906670，902092584，59971⑧481 （功能一样，别多加）",
1669             'ext': 'flv',
1670             'title': "太空狼人杀联动，不被爆杀就算赢",
1671             'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1672             'timestamp': 1650802769,
1673         },
1674         'skip': 'not live'
1675     }, {
1676         'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1677         'only_matching': True
1678     }, {
1679         'url': 'https://live.bilibili.com/blanc/196',
1680         'only_matching': True
1681     }]
1682
1683     _FORMATS = {
1684         80: {'format_id': 'low', 'format_note': '流畅'},
1685         150: {'format_id': 'high_res', 'format_note': '高清'},
1686         250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1687         400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1688         10000: {'format_id': 'source', 'format_note': '原画'},
1689         20000: {'format_id': '4K', 'format_note': '4K'},
1690         30000: {'format_id': 'dolby', 'format_note': '杜比'},
1691     }
1692
1693     _quality = staticmethod(qualities(list(_FORMATS)))
1694
1695     def _call_api(self, path, room_id, query):
1696         api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1697         if api_result.get('code') != 0:
1698             raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1699         return api_result.get('data') or {}
1700
1701     def _parse_formats(self, qn, fmt):
1702         for codec in fmt.get('codec') or []:
1703             if codec.get('current_qn') != qn:
1704                 continue
1705             for url_info in codec['url_info']:
1706                 yield {
1707                     'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1708                     'ext': fmt.get('format_name'),
1709                     'vcodec': codec.get('codec_name'),
1710                     'quality': self._quality(qn),
1711                     **self._FORMATS[qn],
1712                 }
1713
1714     def _real_extract(self, url):
1715         room_id = self._match_id(url)
1716         room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1717         if room_data.get('live_status') == 0:
1718             raise ExtractorError('Streamer is not live', expected=True)
1719
1720         formats = []
1721         for qn in self._FORMATS.keys():
1722             stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1723                 'room_id': room_id,
1724                 'qn': qn,
1725                 'codec': '0,1',
1726                 'format': '0,2',
1727                 'mask': '0',
1728                 'no_playurl': '0',
1729                 'platform': 'web',
1730                 'protocol': '0,1',
1731             })
1732             for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1733                 formats.extend(self._parse_formats(qn, fmt))
1734
1735         return {
1736             'id': room_id,
1737             'title': room_data.get('title'),
1738             'description': room_data.get('description'),
1739             'thumbnail': room_data.get('user_cover'),
1740             'timestamp': stream_data.get('live_time'),
1741             'formats': formats,
1742             'is_live': True,
1743             'http_headers': {
1744                 'Referer': url,
1745             },
1746         }