]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/bilibili] Support courses and interactive videos (#8343)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
9f09bdcf 5import json
c34f505b 6import math
5336bf57 7import re
6f10cdcf 8import time
ad974876 9import urllib.parse
28746fbd 10
06167fbb 11from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 12from ..dependencies import Cryptodome
3d2623a8 13from ..networking.exceptions import HTTPError
28746fbd 14from ..utils import (
bd8f48c7 15 ExtractorError,
ad974876 16 GeoRestrictedError,
2b9d0216
L
17 InAdvancePagedList,
18 OnDemandPagedList,
9e68747f 19 bool_or_none,
9f09bdcf 20 clean_html,
f5f15c99 21 filter_dict,
6461f2b7 22 float_or_none,
ad974876 23 format_field,
9f09bdcf 24 get_element_by_class,
2b9d0216 25 int_or_none,
bdd0b75e 26 join_nonempty,
ad974876 27 make_archive_id,
d37422f1 28 merge_dicts,
f8580bf0 29 mimetype2ext,
2b9d0216 30 parse_count,
ad974876 31 parse_qs,
b4f53662 32 qualities,
26fdfc37 33 smuggle_url,
efc947fb 34 srt_subtitles_timecode,
4bc15a68 35 str_or_none,
2b9d0216 36 traverse_obj,
6f10cdcf 37 try_call,
b093c38c 38 unified_timestamp,
26fdfc37 39 unsmuggle_url,
c62ecf0d 40 url_or_none,
ad974876 41 urlencode_postdata,
9e68747f 42 variadic,
28746fbd
PH
43)
44
45
ad974876 46class BilibiliBaseIE(InfoExtractor):
5336bf57 47 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
48
ad974876
L
49 def extract_formats(self, play_info):
50 format_names = {
51 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
52 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
53 }
54
b84fda73 55 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
56 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
57 if flac_audio:
58 audios.append(flac_audio)
59 formats = [{
60 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
61 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 62 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
63 'vcodec': 'none',
64 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 65 'filesize': int_or_none(audio.get('size')),
66 'format_id': str_or_none(audio.get('id')),
ad974876
L
67 } for audio in audios]
68
69 formats.extend({
70 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
71 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
72 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
73 'width': int_or_none(video.get('width')),
74 'height': int_or_none(video.get('height')),
75 'vcodec': video.get('codecs'),
76 'acodec': 'none' if audios else None,
b84fda73 77 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
78 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
79 'filesize': int_or_none(video.get('size')),
80 'quality': int_or_none(video.get('id')),
5336bf57 81 'format_id': traverse_obj(
82 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
83 ('id', {str_or_none}), get_all=False),
ad974876
L
84 'format': format_names.get(video.get('id')),
85 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
86
87 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
88 if missing_formats:
89 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 90 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 91
ad974876
L
92 return formats
93
9f09bdcf 94 def _download_playinfo(self, video_id, cid):
95 return self._download_json(
96 'https://api.bilibili.com/x/player/playurl', video_id,
97 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
98 note=f'Downloading video formats for cid {cid}')['data']
99
ad974876
L
100 def json2srt(self, json_data):
101 srt_data = ''
102 for idx, line in enumerate(json_data.get('body') or []):
103 srt_data += (f'{idx + 1}\n'
104 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
105 f'{line["content"]}\n\n')
106 return srt_data
107
9f09bdcf 108 def _get_subtitles(self, video_id, cid, aid=None):
ad974876
L
109 subtitles = {
110 'danmaku': [{
111 'ext': 'xml',
112 'url': f'https://comment.bilibili.com/{cid}.xml',
113 }]
114 }
115
9f09bdcf 116 subtitle_info = traverse_obj(self._download_json(
117 'https://api.bilibili.com/x/player/v2', video_id,
118 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
119 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
120 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
121 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
122 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
123 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
124 for s in subs_list:
ad974876
L
125 subtitles.setdefault(s['lan'], []).append({
126 'ext': 'srt',
127 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
128 })
129 return subtitles
130
c90c5b9b 131 def _get_chapters(self, aid, cid):
132 chapters = aid and cid and self._download_json(
133 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
134 note='Extracting chapters', fatal=False)
135 return traverse_obj(chapters, ('data', 'view_points', ..., {
136 'title': 'content',
137 'start_time': 'from',
138 'end_time': 'to',
139 })) or None
140
ad974876
L
141 def _get_comments(self, aid):
142 for idx in itertools.count(1):
143 replies = traverse_obj(
144 self._download_json(
145 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
146 aid, note=f'Extracting comments from page {idx}', fatal=False),
147 ('data', 'replies'))
148 if not replies:
149 return
150 for children in map(self._get_all_children, replies):
151 yield from children
152
153 def _get_all_children(self, reply):
154 yield {
155 'author': traverse_obj(reply, ('member', 'uname')),
156 'author_id': traverse_obj(reply, ('member', 'mid')),
157 'id': reply.get('rpid'),
158 'text': traverse_obj(reply, ('content', 'message')),
159 'timestamp': reply.get('ctime'),
160 'parent': reply.get('parent') or 'root',
161 }
162 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
163 yield from children
164
bdd0b75e
GS
165 def _get_episodes_from_season(self, ss_id, url):
166 season_info = self._download_json(
167 'https://api.bilibili.com/pgc/web/season/section', ss_id,
168 note='Downloading season info', query={'season_id': ss_id},
169 headers={'Referer': url, **self.geo_verification_headers()})
170
171 for entry in traverse_obj(season_info, (
172 'result', 'main_section', 'episodes',
173 lambda _, v: url_or_none(v['share_url']) and v['id'])):
9f09bdcf 174 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
175
176 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
177 cid_edges = cid_edges or {}
178 division_data = self._download_json(
179 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
180 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
181 note=f'Extracting divisions from edge {edge_id}')
182 edges.setdefault(edge_id, {}).update(
183 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
184 'title': ('title', {str}),
185 'cid': ('cid', {int_or_none}),
186 }), get_all=False))
187
188 edges[edge_id].update(traverse_obj(division_data, ('data', {
189 'title': ('title', {str}),
190 'choices': ('edges', 'questions', ..., 'choices', ..., {
191 'edge_id': ('id', {int_or_none}),
192 'cid': ('cid', {int_or_none}),
193 'text': ('option', {str}),
194 }),
195 })))
196 # use dict to combine edges that use the same video section (same cid)
197 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
198 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
199 if choice['edge_id'] not in edges:
200 edges[choice['edge_id']] = {'cid': choice['cid']}
201 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
202 return cid_edges
203
204 def _get_interactive_entries(self, video_id, cid, metainfo):
205 graph_version = traverse_obj(
206 self._download_json(
207 'https://api.bilibili.com/x/player/wbi/v2', video_id,
208 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
209 ('data', 'interaction', 'graph_version', {int_or_none}))
210 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
211 for cid, edges in cid_edges.items():
212 play_info = self._download_playinfo(video_id, cid)
213 yield {
214 **metainfo,
215 'id': f'{video_id}_{cid}',
216 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
217 'formats': self.extract_formats(play_info),
218 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
219 'duration': float_or_none(play_info.get('timelength'), scale=1000),
220 'subtitles': self.extract_subtitles(video_id, cid),
221 }
bdd0b75e 222
ad974876
L
223
224class BiliBiliIE(BilibiliBaseIE):
9e68747f 225 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 226
bd8f48c7 227 _TESTS = [{
ad974876
L
228 'url': 'https://www.bilibili.com/video/BV13x41117TL',
229 'info_dict': {
230 'id': 'BV13x41117TL',
231 'title': '阿滴英文|英文歌分享#6 "Closer',
232 'ext': 'mp4',
233 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
234 'uploader_id': '65880958',
235 'uploader': '阿滴英文',
236 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
237 'duration': 554.117,
238 'tags': list,
239 'comment_count': int,
240 'upload_date': '20170301',
241 'timestamp': 1488353834,
242 'like_count': int,
243 'view_count': int,
244 },
245 }, {
9f09bdcf 246 'note': 'old av URL version',
06167fbb 247 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 248 'info_dict': {
ad974876 249 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 250 'ext': 'mp4',
f8580bf0 251 'uploader': '菊子桑',
ad974876
L
252 'uploader_id': '156160',
253 'id': 'BV11x411K7CN',
254 'title': '【金坷垃】金泡沫',
255 'duration': 308.36,
f8580bf0 256 'upload_date': '20140420',
ad974876 257 'timestamp': 1397983878,
6461f2b7 258 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
259 'like_count': int,
260 'comment_count': int,
261 'view_count': int,
262 'tags': list,
263 },
c90c5b9b 264 'params': {'skip_download': True},
bd8f48c7 265 }, {
ad974876
L
266 'note': 'Anthology',
267 'url': 'https://www.bilibili.com/video/BV1bK411W797',
268 'info_dict': {
269 'id': 'BV1bK411W797',
270 'title': '物语中的人物是如何吐槽自己的OP的'
271 },
272 'playlist_count': 18,
273 'playlist': [{
274 'info_dict': {
275 'id': 'BV1bK411W797_p1',
276 'ext': 'mp4',
277 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 278 'tags': 'count:10',
ad974876
L
279 'timestamp': 1589601697,
280 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
281 'uploader': '打牌还是打桩',
282 'uploader_id': '150259984',
283 'like_count': int,
284 'comment_count': int,
285 'upload_date': '20200516',
286 'view_count': int,
287 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
288 'duration': 90.314,
289 }
290 }]
06167fbb 291 }, {
ad974876
L
292 'note': 'Specific page of Anthology',
293 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
294 'info_dict': {
295 'id': 'BV1bK411W797_p1',
296 'ext': 'mp4',
297 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 298 'tags': 'count:10',
ad974876
L
299 'timestamp': 1589601697,
300 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
301 'uploader': '打牌还是打桩',
302 'uploader_id': '150259984',
303 'like_count': int,
304 'comment_count': int,
305 'upload_date': '20200516',
306 'view_count': int,
307 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
308 'duration': 90.314,
309 }
bd8f48c7 310 }, {
ad974876
L
311 'note': 'video has subtitles',
312 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 313 'info_dict': {
ad974876 314 'id': 'BV12N4y1M7rh',
bd8f48c7 315 'ext': 'mp4',
c90c5b9b 316 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
317 'tags': list,
318 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
319 'duration': 313.557,
320 'upload_date': '20220709',
9e68747f 321 'uploader': '小夫太渴',
ad974876
L
322 'timestamp': 1657347907,
323 'uploader_id': '1326814124',
324 'comment_count': int,
325 'view_count': int,
326 'like_count': int,
327 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
328 'subtitles': 'count:2'
bd8f48c7 329 },
ad974876 330 'params': {'listsubtitles': True},
ca270371 331 }, {
ad974876 332 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 333 'info_dict': {
ad974876 334 'id': 'BV13x41117TL',
f8580bf0 335 'ext': 'mp4',
ca270371 336 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 337 'upload_date': '20170301',
c90c5b9b 338 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 339 'timestamp': 1488353834,
f8580bf0 340 'uploader_id': '65880958',
341 'uploader': '阿滴英文',
ad974876 342 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 343 'duration': 554.117,
ad974876
L
344 'tags': list,
345 'comment_count': int,
346 'view_count': int,
347 'like_count': int,
89fabf11
JN
348 },
349 'params': {
350 'skip_download': True,
351 },
c90c5b9b 352 }, {
353 'note': 'video has chapter',
354 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
355 'info_dict': {
356 'id': 'BV1vL411G7N7',
357 'ext': 'mp4',
358 'title': '如何为你的B站视频添加进度条分段',
359 'timestamp': 1634554558,
360 'upload_date': '20211018',
361 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
362 'tags': list,
363 'uploader': '爱喝咖啡的当麻',
364 'duration': 669.482,
365 'uploader_id': '1680903',
366 'chapters': 'count:6',
367 'comment_count': int,
368 'view_count': int,
369 'like_count': int,
370 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
371 },
372 'params': {'skip_download': True},
ab29e470 373 }, {
374 'note': 'video redirects to festival page',
375 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
376 'info_dict': {
377 'id': 'BV1wP4y1P72h',
378 'ext': 'mp4',
379 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
380 'timestamp': 1643947497,
381 'upload_date': '20220204',
382 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
383 'uploader': '叨叨冯聊音乐',
384 'duration': 246.719,
385 'uploader_id': '528182630',
386 'view_count': int,
387 'like_count': int,
388 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
389 },
390 'params': {'skip_download': True},
391 }, {
392 'note': 'newer festival video',
393 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
394 'info_dict': {
395 'id': 'BV1ay4y1d77f',
396 'ext': 'mp4',
397 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
398 'timestamp': 1674273600,
399 'upload_date': '20230121',
400 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
401 'uploader': '果蝇轰',
402 'duration': 1111.722,
403 'uploader_id': '8469526',
404 'view_count': int,
405 'like_count': int,
406 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
407 },
408 'params': {'skip_download': True},
9f09bdcf 409 }, {
410 'note': 'interactive/split-path video',
411 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
412 'info_dict': {
413 'id': 'BV1af4y1H7ga',
414 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
415 'timestamp': 1630500414,
416 'upload_date': '20210901',
417 'description': 'md5:01113e39ab06e28042d74ac356a08786',
418 'tags': list,
419 'uploader': '钉宫妮妮Ninico',
420 'duration': 1503,
421 'uploader_id': '8881297',
422 'comment_count': int,
423 'view_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
426 },
427 'playlist_count': 33,
428 'playlist': [{
429 'info_dict': {
430 'id': 'BV1af4y1H7ga_400950101',
431 'ext': 'mp4',
432 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
433 'timestamp': 1630500414,
434 'upload_date': '20210901',
435 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
436 'tags': list,
437 'uploader': '钉宫妮妮Ninico',
438 'duration': 11.605,
439 'uploader_id': '8881297',
440 'comment_count': int,
441 'view_count': int,
442 'like_count': int,
443 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
444 },
445 }],
446 }, {
447 'note': '301 redirect to bangumi link',
448 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
449 'info_dict': {
450 'id': '288525',
451 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
452 'ext': 'mp4',
453 'series': '我和我的祖国',
454 'series_id': '4780',
455 'season': '幕后纪实',
456 'season_id': '28609',
457 'season_number': 1,
458 'episode': '钱学森弹道和乘波体飞行器是什么?',
459 'episode_id': '288525',
460 'episode_number': 105,
461 'duration': 1183.957,
462 'timestamp': 1571648124,
463 'upload_date': '20191021',
464 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
465 },
466 }, {
467 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
468 'info_dict': {
469 'id': 'BV1jL41167ZG',
470 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
471 'ext': 'mp4',
472 },
473 'skip': 'supporter-only video',
474 }, {
475 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
476 'info_dict': {
477 'id': 'BV1Ks411f7aQ',
478 'title': '【BD1080P】狼与香辛料I【华盟】',
479 'ext': 'mp4',
480 },
481 'skip': 'login required',
482 }, {
483 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
484 'info_dict': {
485 'id': 'BV1GJ411x7h7',
486 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
487 'ext': 'mp4',
488 },
489 'skip': 'geo-restricted',
bd8f48c7 490 }]
28746fbd 491
520e7533 492 def _real_extract(self, url):
ad974876 493 video_id = self._match_id(url)
9f09bdcf 494 webpage, urlh = self._download_webpage_handle(url, video_id)
495 if not self._match_valid_url(urlh.url):
496 return self.url_result(urlh.url)
497
c90c5b9b 498 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 499
ab29e470 500 is_festival = 'videoData' not in initial_state
501 if is_festival:
502 video_data = initial_state['videoInfo']
503 else:
9f09bdcf 504 play_info_obj = self._search_json(
505 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
506 if not play_info_obj:
507 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
508 self.raise_login_required()
509 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
510 raise ExtractorError(
511 'This video may be deleted or geo-restricted. '
512 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
513 play_info = traverse_obj(play_info_obj, ('data', {dict}))
514 if not play_info:
515 if traverse_obj(play_info_obj, 'code') == 87007:
516 toast = get_element_by_class('tips-toast', webpage) or ''
517 msg = clean_html(
518 f'{get_element_by_class("belongs-to", toast) or ""},'
519 + (get_element_by_class('level', toast) or ''))
520 raise ExtractorError(
521 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
522 raise ExtractorError('Failed to extract play info')
ab29e470 523 video_data = initial_state['videoData']
524
ad974876 525 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 526
adc74b3c 527 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 528 page_list_json = not is_festival and traverse_obj(
ad974876
L
529 self._download_json(
530 'https://api.bilibili.com/x/player/pagelist', video_id,
531 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
532 note='Extracting videos in anthology'),
533 'data', expected_type=list) or []
534 is_anthology = len(page_list_json) > 1
535
536 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
537 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
538 return self.playlist_from_matches(
539 page_list_json, video_id, title, ie=BiliBiliIE,
540 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 541
ad974876 542 if is_anthology:
f74371a9 543 part_id = part_id or 1
544 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 545
ad974876
L
546 aid = video_data.get('aid')
547 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 548
c90c5b9b 549 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
550
ab29e470 551 festival_info = {}
552 if is_festival:
9f09bdcf 553 play_info = self._download_playinfo(video_id, cid)
ab29e470 554
555 festival_info = traverse_obj(initial_state, {
556 'uploader': ('videoInfo', 'upName'),
557 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
558 'like_count': ('videoStatus', 'like', {int_or_none}),
559 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
560 }, get_all=False)
561
9f09bdcf 562 metainfo = {
ab29e470 563 **traverse_obj(initial_state, {
564 'uploader': ('upData', 'name'),
565 'uploader_id': ('upData', 'mid', {str_or_none}),
566 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
567 'tags': ('tags', ..., 'tag_name'),
568 'thumbnail': ('videoData', 'pic', {url_or_none}),
569 }),
570 **festival_info,
571 **traverse_obj(video_data, {
572 'description': 'desc',
573 'timestamp': ('pubdate', {int_or_none}),
574 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
575 'comment_count': ('stat', 'reply', {int_or_none}),
576 }, get_all=False),
ad974876 577 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
ad974876 578 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 579 'title': title,
c90c5b9b 580 'http_headers': {'Referer': url},
06167fbb 581 }
277d6ff5 582
9f09bdcf 583 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
584 if is_interactive:
585 return self.playlist_result(
586 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
587 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
588 '__post_extractor': self.extract_comments(aid),
589 })
590 else:
591 return {
592 **metainfo,
593 'duration': float_or_none(play_info.get('timelength'), scale=1000),
594 'chapters': self._get_chapters(aid, cid),
595 'subtitles': self.extract_subtitles(video_id, cid),
596 'formats': self.extract_formats(play_info),
597 '__post_extractor': self.extract_comments(aid),
598 }
599
06167fbb 600
ad974876 601class BiliBiliBangumiIE(BilibiliBaseIE):
9f09bdcf 602 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
e88d44c6 603
ad974876 604 _TESTS = [{
9f09bdcf 605 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
606 'info_dict': {
607 'id': '21495',
608 'ext': 'mp4',
609 'series': '悠久之翼',
610 'series_id': '774',
611 'season': '第二季',
612 'season_id': '1182',
613 'season_number': 2,
614 'episode': 'forever/ef',
615 'episode_id': '21495',
616 'episode_number': 12,
617 'title': '12 forever/ef',
618 'duration': 1420.791,
619 'timestamp': 1320412200,
620 'upload_date': '20111104',
621 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
622 },
623 }, {
bdd0b75e 624 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 625 'info_dict': {
bdd0b75e 626 'id': '267851',
ad974876 627 'ext': 'mp4',
bdd0b75e
GS
628 'series': '鬼灭之刃',
629 'series_id': '4358',
9f09bdcf 630 'season': '立志篇',
bdd0b75e 631 'season_id': '26801',
ad974876 632 'season_number': 1,
bdd0b75e
GS
633 'episode': '残酷',
634 'episode_id': '267851',
635 'episode_number': 1,
636 'title': '1 残酷',
637 'duration': 1425.256,
638 'timestamp': 1554566400,
639 'upload_date': '20190406',
640 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 641 },
9f09bdcf 642 'skip': 'Geo-restricted',
643 }, {
644 'note': 'a making-of which falls outside main section',
645 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
646 'info_dict': {
647 'id': '345120',
648 'ext': 'mp4',
649 'series': '鬼灭之刃',
650 'series_id': '4358',
651 'season': '立志篇',
652 'season_id': '26801',
653 'season_number': 1,
654 'episode': '炭治郎篇',
655 'episode_id': '345120',
656 'episode_number': 27,
657 'title': '#1 炭治郎篇',
658 'duration': 1922.129,
659 'timestamp': 1602853860,
660 'upload_date': '20201016',
661 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
662 },
ad974876 663 }]
06167fbb 664
ad974876 665 def _real_extract(self, url):
9f09bdcf 666 episode_id = self._match_id(url)
667 webpage = self._download_webpage(url, episode_id)
e88d44c6 668
ad974876
L
669 if '您所在的地区无法观看本片' in webpage:
670 raise GeoRestrictedError('This video is restricted')
bdd0b75e 671 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 672 self.raise_login_required('This video is for premium members only')
6461f2b7 673
bdd0b75e
GS
674 headers = {'Referer': url, **self.geo_verification_headers()}
675 play_info = self._download_json(
9f09bdcf 676 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
bdd0b75e
GS
677 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
678 headers=headers)
679 premium_only = play_info.get('code') == -10403
680 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
681
ad974876 682 formats = self.extract_formats(play_info)
bdd0b75e 683 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 684 self.raise_login_required('This video is for premium members only')
bd8f48c7 685
bdd0b75e 686 bangumi_info = self._download_json(
9f09bdcf 687 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
bdd0b75e
GS
688 query={'ep_id': episode_id}, headers=headers)['result']
689
690 episode_number, episode_info = next((
691 (idx, ep) for idx, ep in enumerate(traverse_obj(
9f09bdcf 692 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
bdd0b75e 693 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 694
bdd0b75e 695 season_id = bangumi_info.get('season_id')
9f09bdcf 696 season_number, season_title = season_id and next((
697 (idx + 1, e.get('season_title')) for idx, e in enumerate(
bdd0b75e 698 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 699 if e.get('season_id') == season_id
9f09bdcf 700 ), (None, None))
06167fbb 701
bdd0b75e
GS
702 aid = episode_info.get('aid')
703
e88d44c6 704 return {
9f09bdcf 705 'id': episode_id,
ad974876 706 'formats': formats,
bdd0b75e
GS
707 **traverse_obj(bangumi_info, {
708 'series': ('series', 'series_title', {str}),
709 'series_id': ('series', 'series_id', {str_or_none}),
710 'thumbnail': ('square_cover', {url_or_none}),
711 }),
9f09bdcf 712 **traverse_obj(episode_info, {
713 'episode': ('long_title', {str}),
714 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
715 'timestamp': ('pub_time', {int_or_none}),
716 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
717 }),
bdd0b75e 718 'episode_id': episode_id,
9f09bdcf 719 'season': str_or_none(season_title),
bdd0b75e 720 'season_id': str_or_none(season_id),
c90c5b9b 721 'season_number': season_number,
c90c5b9b 722 'duration': float_or_none(play_info.get('timelength'), scale=1000),
9f09bdcf 723 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
bdd0b75e
GS
724 '__post_extractor': self.extract_comments(aid),
725 'http_headers': headers,
e88d44c6 726 }
bd8f48c7 727
bd8f48c7 728
bdd0b75e 729class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 730 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 731 _TESTS = [{
ad974876 732 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 733 'info_dict': {
ad974876 734 'id': '24097891',
9f09bdcf 735 'title': 'CAROLE & TUESDAY',
736 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
bd8f48c7 737 },
ad974876 738 'playlist_mincount': 25,
9f09bdcf 739 }, {
740 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
741 'info_dict': {
742 'id': '1565',
743 'title': '攻壳机动队 S.A.C. 2nd GIG',
744 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
745 },
746 'playlist_count': 26,
747 'playlist': [{
748 'info_dict': {
749 'id': '68540',
750 'ext': 'mp4',
751 'series': '攻壳机动队',
752 'series_id': '1077',
753 'season': '第二季',
754 'season_id': '1565',
755 'season_number': 2,
756 'episode': '再启动 REEMBODY',
757 'episode_id': '68540',
758 'episode_number': 1,
759 'title': '1 再启动 REEMBODY',
760 'duration': 1525.777,
761 'timestamp': 1425074413,
762 'upload_date': '20150227',
763 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
764 },
765 }],
bd8f48c7
YCH
766 }]
767
bd8f48c7 768 def _real_extract(self, url):
ad974876
L
769 media_id = self._match_id(url)
770 webpage = self._download_webpage(url, media_id)
bdd0b75e 771
9f09bdcf 772 initial_state = self._search_json(
773 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
774 ss_id = initial_state['mediaInfo']['season_id']
775
776 return self.playlist_result(
777 self._get_episodes_from_season(ss_id, url), media_id,
778 **traverse_obj(initial_state, ('mediaInfo', {
779 'title': ('title', {str}),
780 'description': ('evaluate', {str}),
781 })))
bdd0b75e 782
bd8f48c7 783
bdd0b75e 784class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 785 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
786 _TESTS = [{
787 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
788 'info_dict': {
9f09bdcf 789 'id': '26801',
790 'title': '鬼灭之刃',
791 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
bdd0b75e
GS
792 },
793 'playlist_mincount': 26
9f09bdcf 794 }, {
795 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
796 'info_dict': {
797 'id': '2251',
798 'title': '玲音',
799 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
800 },
801 'playlist_count': 13,
802 'playlist': [{
803 'info_dict': {
804 'id': '50188',
805 'ext': 'mp4',
806 'series': '玲音',
807 'series_id': '1526',
808 'season': 'TV',
809 'season_id': '2251',
810 'season_number': 1,
811 'episode': 'WEIRD',
812 'episode_id': '50188',
813 'episode_number': 1,
814 'title': '1 WEIRD',
815 'duration': 1436.992,
816 'timestamp': 1343185080,
817 'upload_date': '20120725',
818 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
819 },
820 }],
bdd0b75e
GS
821 }]
822
823 def _real_extract(self, url):
824 ss_id = self._match_id(url)
9f09bdcf 825 webpage = self._download_webpage(url, ss_id)
826 metainfo = traverse_obj(
827 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
828 ('itemListElement', ..., {
829 'title': ('name', {str}),
830 'description': ('description', {str}),
831 }), get_all=False)
832
833 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
834
835
836class BilibiliCheeseBaseIE(BilibiliBaseIE):
837 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
838
839 def _extract_episode(self, season_info, ep_id):
840 episode_info = traverse_obj(season_info, (
841 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
842 aid, cid = episode_info['aid'], episode_info['cid']
843
844 if traverse_obj(episode_info, 'ep_status') == -1:
845 raise ExtractorError('This course episode is not yet available.', expected=True)
846 if not traverse_obj(episode_info, 'playable'):
847 self.raise_login_required('You need to purchase the course to download this episode')
848
849 play_info = self._download_json(
850 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
851 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
852 headers=self._HEADERS, note='Downloading playinfo')['data']
853
854 return {
855 'id': str_or_none(ep_id),
856 'episode_id': str_or_none(ep_id),
857 'formats': self.extract_formats(play_info),
858 'extractor_key': BilibiliCheeseIE.ie_key(),
859 'extractor': BilibiliCheeseIE.IE_NAME,
860 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
861 **traverse_obj(episode_info, {
862 'episode': ('title', {str}),
863 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
864 'alt_title': ('subtitle', {str}),
865 'duration': ('duration', {int_or_none}),
866 'episode_number': ('index', {int_or_none}),
867 'thumbnail': ('cover', {url_or_none}),
868 'timestamp': ('release_date', {int_or_none}),
869 'view_count': ('play', {int_or_none}),
870 }),
871 **traverse_obj(season_info, {
872 'uploader': ('up_info', 'uname', {str}),
873 'uploader_id': ('up_info', 'mid', {str_or_none}),
874 }),
875 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
876 '__post_extractor': self.extract_comments(aid),
877 'http_headers': self._HEADERS,
878 }
879
880 def _download_season_info(self, query_key, video_id):
881 return self._download_json(
882 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
883 headers=self._HEADERS, note='Downloading season info')['data']
bd8f48c7 884
9f09bdcf 885
886class BilibiliCheeseIE(BilibiliCheeseBaseIE):
887 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
888 _TESTS = [{
889 'url': 'https://www.bilibili.com/cheese/play/ep229832',
890 'info_dict': {
891 'id': '229832',
892 'ext': 'mp4',
893 'title': '1 - 课程先导片',
894 'alt_title': '视频课 · 3分41秒',
895 'uploader': '马督工',
896 'uploader_id': '316568752',
897 'episode': '课程先导片',
898 'episode_id': '229832',
899 'episode_number': 1,
900 'duration': 221,
901 'timestamp': 1695549606,
902 'upload_date': '20230924',
903 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
904 'view_count': int,
905 }
906 }]
907
908 def _real_extract(self, url):
909 ep_id = self._match_id(url)
910 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
911
912
913class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
914 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
915 _TESTS = [{
916 'url': 'https://www.bilibili.com/cheese/play/ss5918',
917 'info_dict': {
918 'id': '5918',
919 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
920 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
921 },
922 'playlist': [{
923 'info_dict': {
924 'id': '229832',
925 'ext': 'mp4',
926 'title': '1 - 课程先导片',
927 'alt_title': '视频课 · 3分41秒',
928 'uploader': '马督工',
929 'uploader_id': '316568752',
930 'episode': '课程先导片',
931 'episode_id': '229832',
932 'episode_number': 1,
933 'duration': 221,
934 'timestamp': 1695549606,
935 'upload_date': '20230924',
936 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
937 'view_count': int,
938 }
939 }],
940 'params': {'playlist_items': '1'},
941 }, {
942 'url': 'https://www.bilibili.com/cheese/play/ss5918',
943 'info_dict': {
944 'id': '5918',
945 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
946 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
947 },
948 'playlist_mincount': 5,
949 'skip': 'paid video in list',
950 }]
951
952 def _get_cheese_entries(self, season_info):
953 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
954 yield self._extract_episode(season_info, ep_id)
955
956 def _real_extract(self, url):
957 season_id = self._match_id(url)
958 season_info = self._download_season_info('season_id', season_id)
959
960 return self.playlist_result(
961 self._get_cheese_entries(season_info), season_id,
962 **traverse_obj(season_info, {
963 'title': ('title', {str}),
964 'description': ('subtitle', {str}),
965 }))
4bc15a68
RA
966
967
2b9d0216
L
968class BilibiliSpaceBaseIE(InfoExtractor):
969 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 970 first_page = fetch_page(0)
2b9d0216
L
971 metadata = get_metadata(first_page)
972
973 paged_list = InAdvancePagedList(
12f153a8 974 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
975 metadata['page_count'], metadata['page_size'])
976
977 return metadata, paged_list
978
979
980class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
981 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 982 _TESTS = [{
983 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
984 'info_dict': {
985 'id': '3985676',
986 },
987 'playlist_mincount': 178,
6f10cdcf
E
988 }, {
989 'url': 'https://space.bilibili.com/313580179/video',
990 'info_dict': {
991 'id': '313580179',
992 },
993 'playlist_mincount': 92,
6efb0711 994 }]
995
6f10cdcf
E
996 def _extract_signature(self, playlist_id):
997 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
998
999 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1000 img_key = traverse_obj(
1001 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1002 sub_key = traverse_obj(
1003 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1004
1005 session_key = img_key + sub_key
1006
1007 signature_values = []
1008 for position in (
1009 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1010 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1011 57, 62, 11, 36, 20, 34, 44, 52
1012 ):
1013 char_at_position = try_call(lambda: session_key[position])
1014 if char_at_position:
1015 signature_values.append(char_at_position)
1016
1017 return ''.join(signature_values)[:32]
1018
2b9d0216
L
1019 def _real_extract(self, url):
1020 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1021 if not is_video_url:
1022 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1023 'To download audios, add a "/audio" to the URL')
1024
6f10cdcf
E
1025 signature = self._extract_signature(playlist_id)
1026
2b9d0216 1027 def fetch_page(page_idx):
6f10cdcf
E
1028 query = {
1029 'keyword': '',
1030 'mid': playlist_id,
1031 'order': 'pubdate',
1032 'order_avoided': 'true',
1033 'platform': 'web',
1034 'pn': page_idx + 1,
1035 'ps': 30,
1036 'tid': 0,
1037 'web_location': 1550101,
1038 'wts': int(time.time()),
1039 }
1040 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1041
12f153a8 1042 try:
6f10cdcf
E
1043 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1044 playlist_id, note=f'Downloading page {page_idx}', query=query)
12f153a8 1045 except ExtractorError as e:
3d2623a8 1046 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
1047 raise ExtractorError(
1048 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1049 raise
1050 if response['code'] == -401:
1051 raise ExtractorError(
1052 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1053 return response['data']
2b9d0216
L
1054
1055 def get_metadata(page_data):
1056 page_size = page_data['page']['ps']
1057 entry_count = page_data['page']['count']
1058 return {
1059 'page_count': math.ceil(entry_count / page_size),
1060 'page_size': page_size,
1061 }
6efb0711 1062
2b9d0216
L
1063 def get_entries(page_data):
1064 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1065 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 1066
2b9d0216
L
1067 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1068 return self.playlist_result(paged_list, playlist_id)
6efb0711 1069
6efb0711 1070
2b9d0216
L
1071class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1072 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1073 _TESTS = [{
6f10cdcf 1074 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 1075 'info_dict': {
6f10cdcf 1076 'id': '313580179',
2b9d0216
L
1077 },
1078 'playlist_mincount': 1,
1079 }]
1080
1081 def _real_extract(self, url):
1082 playlist_id = self._match_id(url)
1083
1084 def fetch_page(page_idx):
1085 return self._download_json(
1086 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1087 note=f'Downloading page {page_idx}',
12f153a8 1088 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
1089
1090 def get_metadata(page_data):
1091 return {
1092 'page_count': page_data['pageCount'],
1093 'page_size': page_data['pageSize'],
1094 }
1095
1096 def get_entries(page_data):
1097 for entry in page_data.get('data', []):
1098 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1099
1100 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1101 return self.playlist_result(paged_list, playlist_id)
1102
1103
9e68747f 1104class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1105 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1106 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1107 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1108
1109 def _get_uploader(self, uid, playlist_id):
1110 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1111 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1112
1113 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1114 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1115 metadata.pop('page_count', None)
1116 metadata.pop('page_size', None)
1117 return metadata, page_list
1118
1119
1120class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1121 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
1122 _TESTS = [{
1123 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1124 'info_dict': {
1125 'id': '2142762_57445',
9e68747f 1126 'title': '【完结】《底特律 变人》全结局流程解说',
1127 'description': '',
1128 'uploader': '老戴在此',
1129 'uploader_id': '2142762',
1130 'timestamp': int,
1131 'upload_date': str,
1132 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
1133 },
1134 'playlist_mincount': 31,
1135 }]
06167fbb 1136
1137 def _real_extract(self, url):
2b9d0216
L
1138 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1139 playlist_id = f'{mid}_{sid}'
1140
1141 def fetch_page(page_idx):
1142 return self._download_json(
1143 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1144 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 1145 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
1146
1147 def get_metadata(page_data):
1148 page_size = page_data['page']['page_size']
1149 entry_count = page_data['page']['total']
1150 return {
1151 'page_count': math.ceil(entry_count / page_size),
1152 'page_size': page_size,
9e68747f 1153 'uploader': self._get_uploader(mid, playlist_id),
1154 **traverse_obj(page_data, {
1155 'title': ('meta', 'name', {str}),
1156 'description': ('meta', 'description', {str}),
1157 'uploader_id': ('meta', 'mid', {str_or_none}),
1158 'timestamp': ('meta', 'ptime', {int_or_none}),
1159 'thumbnail': ('meta', 'cover', {url_or_none}),
1160 })
2b9d0216
L
1161 }
1162
1163 def get_entries(page_data):
9e68747f 1164 return self._get_entries(page_data, 'archives')
2b9d0216
L
1165
1166 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 1167 return self.playlist_result(paged_list, playlist_id, **metadata)
1168
1169
1170class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1171 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1172 _TESTS = [{
1173 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1174 'info_dict': {
1175 'id': '1958703906_547718',
1176 'title': '直播回放',
1177 'description': '直播回放',
1178 'uploader': '靡烟miya',
1179 'uploader_id': '1958703906',
1180 'timestamp': 1637985853,
1181 'upload_date': '20211127',
1182 'modified_timestamp': int,
1183 'modified_date': str,
1184 },
1185 'playlist_mincount': 513,
1186 }]
1187
1188 def _real_extract(self, url):
1189 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1190 playlist_id = f'{mid}_{sid}'
1191 playlist_meta = traverse_obj(self._download_json(
1192 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1193 ), {
1194 'title': ('data', 'meta', 'name', {str}),
1195 'description': ('data', 'meta', 'description', {str}),
1196 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1197 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1198 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1199 })
1200
1201 def fetch_page(page_idx):
1202 return self._download_json(
1203 'https://api.bilibili.com/x/series/archives',
1204 playlist_id, note=f'Downloading page {page_idx}',
1205 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1206
1207 def get_metadata(page_data):
1208 page_size = page_data['page']['size']
1209 entry_count = page_data['page']['total']
1210 return {
1211 'page_count': math.ceil(entry_count / page_size),
1212 'page_size': page_size,
1213 'uploader': self._get_uploader(mid, playlist_id),
1214 **playlist_meta
1215 }
1216
1217 def get_entries(page_data):
1218 return self._get_entries(page_data, 'archives')
1219
1220 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1221 return self.playlist_result(paged_list, playlist_id, **metadata)
1222
1223
1224class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1225 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1226 _TESTS = [{
1227 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1228 'info_dict': {
1229 'id': '1103407912',
1230 'title': '【V2】(旧)',
1231 'description': '',
1232 'uploader': '晓月春日',
1233 'uploader_id': '84912',
1234 'timestamp': 1604905176,
1235 'upload_date': '20201109',
1236 'modified_timestamp': int,
1237 'modified_date': str,
1238 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1239 'view_count': int,
1240 'like_count': int,
1241 },
1242 'playlist_mincount': 22,
1243 }, {
1244 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1245 'only_matching': True,
1246 }]
1247
1248 def _real_extract(self, url):
1249 fid = self._match_id(url)
1250
1251 list_info = self._download_json(
1252 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1253 fid, note='Downloading favlist metadata')
1254 if list_info['code'] == -403:
1255 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1256
1257 entries = self._get_entries(self._download_json(
1258 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1259 fid, note='Download favlist entries'), 'data')
1260
1261 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1262 'title': ('title', {str}),
1263 'description': ('intro', {str}),
1264 'uploader': ('upper', 'name', {str}),
1265 'uploader_id': ('upper', 'mid', {str_or_none}),
1266 'timestamp': ('ctime', {int_or_none}),
1267 'modified_timestamp': ('mtime', {int_or_none}),
1268 'thumbnail': ('cover', {url_or_none}),
1269 'view_count': ('cnt_info', 'play', {int_or_none}),
1270 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1271 })))
1272
1273
1274class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1275 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1276 _TESTS = [{
1277 'url': 'https://www.bilibili.com/watchlater/#/list',
1278 'info_dict': {'id': 'watchlater'},
1279 'playlist_mincount': 0,
1280 'skip': 'login required',
1281 }]
1282
1283 def _real_extract(self, url):
1284 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1285 watchlater_info = self._download_json(
1286 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1287 if watchlater_info['code'] == -101:
1288 self.raise_login_required(msg='You need to login to access your watchlater list')
1289 entries = self._get_entries(watchlater_info, ('data', 'list'))
1290 return self.playlist_result(entries, id=list_id, title='稍后再看')
1291
1292
1293class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1294 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1295 _TESTS = [{
1296 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1297 'info_dict': {
1298 'id': '5_547718',
1299 'title': '直播回放',
1300 'uploader': '靡烟miya',
1301 'uploader_id': '1958703906',
1302 'timestamp': 1637985853,
1303 'upload_date': '20211127',
1304 },
1305 'playlist_mincount': 513,
1306 }, {
1307 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1308 'info_dict': {
1309 'id': '5_547718',
1310 },
1311 'playlist_mincount': 513,
1312 'skip': 'redirect url',
1313 }, {
1314 'url': 'https://www.bilibili.com/list/ml1103407912',
1315 'info_dict': {
1316 'id': '3_1103407912',
1317 'title': '【V2】(旧)',
1318 'uploader': '晓月春日',
1319 'uploader_id': '84912',
1320 'timestamp': 1604905176,
1321 'upload_date': '20201109',
1322 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1323 },
1324 'playlist_mincount': 22,
1325 }, {
1326 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1327 'info_dict': {
1328 'id': '3_1103407912',
1329 },
1330 'playlist_mincount': 22,
1331 'skip': 'redirect url',
1332 }, {
1333 'url': 'https://www.bilibili.com/list/watchlater',
1334 'info_dict': {'id': 'watchlater'},
1335 'playlist_mincount': 0,
1336 'skip': 'login required',
1337 }, {
1338 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1339 'info_dict': {'id': 'watchlater'},
1340 'playlist_mincount': 0,
1341 'skip': 'login required',
1342 }]
1343
1344 def _extract_medialist(self, query, list_id):
1345 for page_num in itertools.count(1):
1346 page_data = self._download_json(
1347 'https://api.bilibili.com/x/v2/medialist/resource/list',
1348 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1349 )['data']
1350 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1351 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1352 if not page_data.get('has_more', False):
1353 break
1354
1355 def _real_extract(self, url):
1356 list_id = self._match_id(url)
1357 webpage = self._download_webpage(url, list_id)
1358 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1359 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1360 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1361 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1362 if error_code == -400 and list_id == 'watchlater':
1363 self.raise_login_required('You need to login to access your watchlater playlist')
1364 elif error_code == -403:
1365 self.raise_login_required('This is a private playlist. You need to login as its owner')
1366 elif error_code == 11010:
1367 raise ExtractorError('Playlist is no longer available', expected=True)
1368 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1369
1370 query = {
1371 'ps': 20,
1372 'with_current': False,
1373 **traverse_obj(initial_state, {
1374 'type': ('playlist', 'type', {int_or_none}),
1375 'biz_id': ('playlist', 'id', {int_or_none}),
1376 'tid': ('tid', {int_or_none}),
1377 'sort_field': ('sortFiled', {int_or_none}),
1378 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1379 })
1380 }
1381 metadata = {
1382 'id': f'{query["type"]}_{query["biz_id"]}',
1383 **traverse_obj(initial_state, ('mediaListInfo', {
1384 'title': ('title', {str}),
1385 'uploader': ('upper', 'name', {str}),
1386 'uploader_id': ('upper', 'mid', {str_or_none}),
1387 'timestamp': ('ctime', {int_or_none}),
1388 'thumbnail': ('cover', {url_or_none}),
1389 })),
1390 }
1391 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 1392
1393
c34f505b 1394class BilibiliCategoryIE(InfoExtractor):
1395 IE_NAME = 'Bilibili category extractor'
1396 _MAX_RESULTS = 1000000
9e68747f 1397 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 1398 _TESTS = [{
1399 'url': 'https://www.bilibili.com/v/kichiku/mad',
1400 'info_dict': {
1401 'id': 'kichiku: mad',
1402 'title': 'kichiku: mad'
1403 },
1404 'playlist_mincount': 45,
1405 'params': {
1406 'playlistend': 45
1407 }
1408 }]
1409
1410 def _fetch_page(self, api_url, num_pages, query, page_num):
1411 parsed_json = self._download_json(
1412 api_url, query, query={'Search_key': query, 'pn': page_num},
1413 note='Extracting results from page %s of %s' % (page_num, num_pages))
1414
f8580bf0 1415 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 1416 if not video_list:
1417 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1418
1419 for video in video_list:
1420 yield self.url_result(
1421 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1422
1423 def _entries(self, category, subcategory, query):
1424 # map of categories : subcategories : RIDs
1425 rid_map = {
1426 'kichiku': {
1427 'mad': 26,
1428 'manual_vocaloid': 126,
1429 'guide': 22,
1430 'theatre': 216,
1431 'course': 127
1432 },
1433 }
1434
1435 if category not in rid_map:
e88d44c6 1436 raise ExtractorError(
1437 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1438 if subcategory not in rid_map[category]:
e88d44c6 1439 raise ExtractorError(
1440 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1441 rid_value = rid_map[category][subcategory]
1442
1443 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1444 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1445 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1446 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1447 if count is None or not size:
1448 raise ExtractorError('Failed to calculate either page count or size')
1449
1450 num_pages = math.ceil(count / size)
1451
1452 return OnDemandPagedList(functools.partial(
1453 self._fetch_page, api_url, num_pages, query), size)
1454
1455 def _real_extract(self, url):
ad974876 1456 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1457 query = '%s: %s' % (category, subcategory)
1458
1459 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1460
1461
06167fbb 1462class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1463 IE_DESC = 'Bilibili video search'
06167fbb 1464 _MAX_RESULTS = 100000
1465 _SEARCH_KEY = 'bilisearch'
06167fbb 1466
e88d44c6 1467 def _search_results(self, query):
1468 for page_num in itertools.count(1):
1469 videos = self._download_json(
1470 'https://api.bilibili.com/x/web-interface/search/type', query,
1471 note=f'Extracting results from page {page_num}', query={
1472 'Search_key': query,
1473 'keyword': query,
1474 'page': page_num,
1475 'context': '',
e88d44c6 1476 'duration': 0,
1477 'tids_2': '',
1478 '__refresh__': 'true',
1479 'search_type': 'video',
1480 'tids': 0,
1481 'highlight': 1,
2d101954 1482 })['data'].get('result')
1483 if not videos:
1484 break
06167fbb 1485 for video in videos:
e88d44c6 1486 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1487
1488
4bc15a68
RA
1489class BilibiliAudioBaseIE(InfoExtractor):
1490 def _call_api(self, path, sid, query=None):
1491 if not query:
1492 query = {'sid': sid}
1493 return self._download_json(
1494 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1495 sid, query=query)['data']
1496
1497
1498class BilibiliAudioIE(BilibiliAudioBaseIE):
1499 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1500 _TEST = {
1501 'url': 'https://www.bilibili.com/audio/au1003142',
1502 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1503 'info_dict': {
1504 'id': '1003142',
1505 'ext': 'm4a',
1506 'title': '【tsukimi】YELLOW / 神山羊',
1507 'artist': 'tsukimi',
1508 'comment_count': int,
1509 'description': 'YELLOW的mp3版!',
1510 'duration': 183,
1511 'subtitles': {
1512 'origin': [{
1513 'ext': 'lrc',
1514 }],
1515 },
1516 'thumbnail': r're:^https?://.+\.jpg',
1517 'timestamp': 1564836614,
1518 'upload_date': '20190803',
1519 'uploader': 'tsukimi-つきみぐー',
1520 'view_count': int,
1521 },
1522 }
1523
1524 def _real_extract(self, url):
1525 au_id = self._match_id(url)
1526
1527 play_data = self._call_api('url', au_id)
1528 formats = [{
1529 'url': play_data['cdns'][0],
1530 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1531 'vcodec': 'none'
4bc15a68
RA
1532 }]
1533
6d1b3489 1534 for a_format in formats:
1535 a_format.setdefault('http_headers', {}).update({
1536 'Referer': url,
1537 })
1538
4bc15a68
RA
1539 song = self._call_api('song/info', au_id)
1540 title = song['title']
1541 statistic = song.get('statistic') or {}
1542
1543 subtitles = None
1544 lyric = song.get('lyric')
1545 if lyric:
1546 subtitles = {
1547 'origin': [{
1548 'url': lyric,
1549 }]
1550 }
1551
1552 return {
1553 'id': au_id,
1554 'title': title,
1555 'formats': formats,
1556 'artist': song.get('author'),
1557 'comment_count': int_or_none(statistic.get('comment')),
1558 'description': song.get('intro'),
1559 'duration': int_or_none(song.get('duration')),
1560 'subtitles': subtitles,
1561 'thumbnail': song.get('cover'),
1562 'timestamp': int_or_none(song.get('passtime')),
1563 'uploader': song.get('uname'),
1564 'view_count': int_or_none(statistic.get('play')),
1565 }
1566
1567
1568class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1569 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1570 _TEST = {
1571 'url': 'https://www.bilibili.com/audio/am10624',
1572 'info_dict': {
1573 'id': '10624',
1574 'title': '每日新曲推荐(每日11:00更新)',
1575 'description': '每天11:00更新,为你推送最新音乐',
1576 },
1577 'playlist_count': 19,
1578 }
1579
1580 def _real_extract(self, url):
1581 am_id = self._match_id(url)
1582
1583 songs = self._call_api(
1584 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1585
1586 entries = []
1587 for song in songs:
1588 sid = str_or_none(song.get('id'))
1589 if not sid:
1590 continue
1591 entries.append(self.url_result(
1592 'https://www.bilibili.com/audio/au' + sid,
1593 BilibiliAudioIE.ie_key(), sid))
1594
1595 if entries:
1596 album_data = self._call_api('menu/info', am_id) or {}
1597 album_title = album_data.get('title')
1598 if album_title:
1599 for entry in entries:
1600 entry['album'] = album_title
1601 return self.playlist_result(
1602 entries, am_id, album_title, album_data.get('intro'))
1603
1604 return self.playlist_result(entries, am_id)
63dce309
S
1605
1606
1607class BiliBiliPlayerIE(InfoExtractor):
1608 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1609 _TEST = {
1610 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1611 'only_matching': True,
1612 }
1613
1614 def _real_extract(self, url):
1615 video_id = self._match_id(url)
1616 return self.url_result(
1617 'http://www.bilibili.tv/video/av%s/' % video_id,
1618 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1619
1620
1621class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1622 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1623 _NETRC_MACHINE = 'biliintl'
16f7e6be 1624
c62ecf0d 1625 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1626 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1627 if json.get('code'):
1628 if json['code'] in (10004004, 10004005, 10023006):
1629 self.raise_login_required()
1630 elif json['code'] == 10004001:
1631 self.raise_geo_restricted()
1632 else:
1633 if json.get('message') and str(json['code']) != json['message']:
1634 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1635 else:
1636 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1637 if kwargs.get('fatal'):
1638 raise ExtractorError(errmsg)
1639 else:
1640 self.report_warning(errmsg)
1641 return json.get('data')
16f7e6be 1642
efc947fb 1643 def json2srt(self, json):
1644 data = '\n\n'.join(
1645 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1646 for i, line in enumerate(traverse_obj(json, (
1647 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1648 return data
1649
f5f15c99
LR
1650 def _get_subtitles(self, *, ep_id=None, aid=None):
1651 sub_json = self._call_api(
fbb888a3 1652 '/web/v2/subtitle', ep_id or aid, fatal=False,
1653 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1654 query=filter_dict({
f5f15c99 1655 'platform': 'web',
fbb888a3 1656 's_locale': 'en_US',
f5f15c99
LR
1657 'episode_id': ep_id,
1658 'aid': aid,
fbb888a3 1659 })) or {}
16f7e6be 1660 subtitles = {}
c62ecf0d 1661 for sub in sub_json.get('subtitles') or []:
16f7e6be
AG
1662 sub_url = sub.get('url')
1663 if not sub_url:
1664 continue
c62ecf0d 1665 sub_data = self._download_json(
f5f15c99 1666 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
c62ecf0d 1667 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
efc947fb 1668 if not sub_data:
1669 continue
c62ecf0d 1670 subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
efc947fb 1671 'ext': 'srt',
1672 'data': self.json2srt(sub_data)
16f7e6be
AG
1673 })
1674 return subtitles
1675
f5f15c99
LR
1676 def _get_formats(self, *, ep_id=None, aid=None):
1677 video_json = self._call_api(
1678 '/web/playurl', ep_id or aid, note='Downloading video formats',
1679 errnote='Unable to download video formats', query=filter_dict({
1680 'platform': 'web',
1681 'ep_id': ep_id,
1682 'aid': aid,
1683 }))
16f7e6be
AG
1684 video_json = video_json['playurl']
1685 formats = []
c62ecf0d 1686 for vid in video_json.get('video') or []:
16f7e6be
AG
1687 video_res = vid.get('video_resource') or {}
1688 video_info = vid.get('stream_info') or {}
1689 if not video_res.get('url'):
1690 continue
1691 formats.append({
1692 'url': video_res['url'],
1693 'ext': 'mp4',
1694 'format_note': video_info.get('desc_words'),
1695 'width': video_res.get('width'),
1696 'height': video_res.get('height'),
1697 'vbr': video_res.get('bandwidth'),
1698 'acodec': 'none',
1699 'vcodec': video_res.get('codecs'),
1700 'filesize': video_res.get('size'),
1701 })
c62ecf0d 1702 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1703 if not aud.get('url'):
1704 continue
1705 formats.append({
1706 'url': aud['url'],
1707 'ext': 'mp4',
1708 'abr': aud.get('bandwidth'),
1709 'acodec': aud.get('codecs'),
1710 'vcodec': 'none',
1711 'filesize': aud.get('size'),
1712 })
1713
16f7e6be
AG
1714 return formats
1715
26fdfc37 1716 def _parse_video_metadata(self, video_data):
16f7e6be 1717 return {
f5f15c99
LR
1718 'title': video_data.get('title_display') or video_data.get('title'),
1719 'thumbnail': video_data.get('cover'),
c62ecf0d 1720 'episode_number': int_or_none(self._search_regex(
f5f15c99 1721 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1722 }
1723
52efa4b3 1724 def _perform_login(self, username, password):
65f6e807 1725 if not Cryptodome.RSA:
f6a765ce 1726 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1727
1728 key_data = self._download_json(
1729 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1730 note='Downloading login key', errnote='Unable to download login key')['data']
1731
65f6e807 1732 public_key = Cryptodome.RSA.importKey(key_data['key'])
1733 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1734 login_post = self._download_json(
1735 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1736 'username': username,
1737 'password': base64.b64encode(password_hash).decode('ascii'),
1738 'keep_me': 'true',
1739 's_locale': 'en_US',
1740 'isTrusted': 'true'
1741 }), note='Logging in', errnote='Unable to log in')
1742 if login_post.get('code'):
1743 if login_post.get('message'):
1744 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1745 else:
1746 raise ExtractorError('Unable to log in')
1747
16f7e6be
AG
1748
1749class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1750 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1751 _TESTS = [{
cfcf60ea 1752 # Bstation page
16f7e6be
AG
1753 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1754 'info_dict': {
1755 'id': '341736',
1756 'ext': 'mp4',
c62ecf0d
M
1757 'title': 'E2 - The First Night',
1758 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1759 'episode_number': 2,
d37422f1
H
1760 'upload_date': '20201009',
1761 'episode': 'Episode 2',
1762 'timestamp': 1602259500,
1763 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1764 'chapters': [{
1765 'start_time': 0,
1766 'end_time': 76.242,
1767 'title': '<Untitled Chapter 1>'
1768 }, {
1769 'start_time': 76.242,
1770 'end_time': 161.161,
1771 'title': 'Intro'
1772 }, {
1773 'start_time': 1325.742,
1774 'end_time': 1403.903,
1775 'title': 'Outro'
1776 }],
c62ecf0d 1777 }
16f7e6be 1778 }, {
cfcf60ea 1779 # Non-Bstation page
c62ecf0d 1780 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1781 'info_dict': {
c62ecf0d 1782 'id': '11005006',
16f7e6be 1783 'ext': 'mp4',
c62ecf0d
M
1784 'title': 'E3 - Who?',
1785 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1786 'episode_number': 3,
d37422f1
H
1787 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1788 'episode': 'Episode 3',
1789 'upload_date': '20211219',
1790 'timestamp': 1639928700,
0ba87dd2
H
1791 'chapters': [{
1792 'start_time': 0,
1793 'end_time': 88.0,
1794 'title': '<Untitled Chapter 1>'
1795 }, {
1796 'start_time': 88.0,
1797 'end_time': 156.0,
1798 'title': 'Intro'
1799 }, {
1800 'start_time': 1173.0,
1801 'end_time': 1259.535,
1802 'title': 'Outro'
1803 }],
c62ecf0d 1804 }
cfcf60ea
M
1805 }, {
1806 # Subtitle with empty content
1807 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1808 'info_dict': {
1809 'id': '10131790',
1810 'ext': 'mp4',
1811 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1812 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1813 'episode_number': 140,
1814 },
1815 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
d37422f1
H
1816 }, {
1817 'url': 'https://www.bilibili.tv/en/video/2041863208',
1818 'info_dict': {
1819 'id': '2041863208',
1820 'ext': 'mp4',
1821 'timestamp': 1670874843,
1822 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1823 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1824 'upload_date': '20221212',
1825 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
b093c38c
H
1826 },
1827 }, {
1828 # episode comment extraction
1829 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1830 'info_dict': {
1831 'id': '340317',
1832 'ext': 'mp4',
1833 'timestamp': 1604057820,
1834 'upload_date': '20201030',
1835 'episode_number': 5,
1836 'title': 'E5 - My Own Steel',
1837 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1838 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1839 'episode': 'Episode 5',
1840 'comment_count': int,
1841 'chapters': [{
1842 'start_time': 0,
1843 'end_time': 61.0,
1844 'title': '<Untitled Chapter 1>'
1845 }, {
1846 'start_time': 61.0,
1847 'end_time': 134.0,
1848 'title': 'Intro'
1849 }, {
1850 'start_time': 1290.0,
1851 'end_time': 1379.0,
1852 'title': 'Outro'
1853 }],
1854 },
1855 'params': {
1856 'getcomments': True
1857 }
1858 }, {
1859 # user generated content comment extraction
1860 'url': 'https://www.bilibili.tv/en/video/2045730385',
1861 'info_dict': {
1862 'id': '2045730385',
1863 'ext': 'mp4',
1864 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1865 'timestamp': 1667891924,
1866 'upload_date': '20221108',
1867 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1868 'comment_count': int,
1869 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1870 },
1871 'params': {
1872 'getcomments': True
d37422f1 1873 }
0ba87dd2
H
1874 }, {
1875 # episode id without intro and outro
1876 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1877 'info_dict': {
1878 'id': '11246489',
1879 'ext': 'mp4',
1880 'title': 'E1 - Operation \'Strix\' <Owl>',
1881 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1882 'timestamp': 1649516400,
1883 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1884 'episode': 'Episode 1',
1885 'episode_number': 1,
1886 'upload_date': '20220409',
1887 },
c62ecf0d
M
1888 }, {
1889 'url': 'https://www.biliintl.com/en/play/34613/341736',
1890 'only_matching': True,
f5f15c99
LR
1891 }, {
1892 # User-generated content (as opposed to a series licensed from a studio)
1893 'url': 'https://bilibili.tv/en/video/2019955076',
1894 'only_matching': True,
1895 }, {
1896 # No language in URL
1897 'url': 'https://www.bilibili.tv/video/2019955076',
1898 'only_matching': True,
0831d95c 1899 }, {
1900 # Uppercase language in URL
1901 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1902 'only_matching': True,
16f7e6be
AG
1903 }]
1904
26fdfc37 1905 def _make_url(video_id, series_id=None):
1906 if series_id:
1907 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1908 return f'https://www.bilibili.tv/en/video/{video_id}'
1909
1910 def _extract_video_metadata(self, url, video_id, season_id):
1911 url, smuggled_data = unsmuggle_url(url, {})
1912 if smuggled_data.get('title'):
1913 return smuggled_data
1914
c62ecf0d
M
1915 webpage = self._download_webpage(url, video_id)
1916 # Bstation layout
8072ef2b 1917 initial_data = (
1918 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1919 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1920 video_data = traverse_obj(
d37422f1 1921 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1922
f5f15c99 1923 if season_id and not video_data:
c62ecf0d
M
1924 # Non-Bstation layout, read through episode list
1925 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1926 video_data = traverse_obj(season_json, (
1927 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1928 ), expected_type=dict, get_all=False)
1929
d37422f1
H
1930 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1931 return merge_dicts(
b093c38c 1932 self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
d37422f1
H
1933 'title': self._html_search_meta('og:title', webpage),
1934 'description': self._html_search_meta('og:description', webpage)
1935 })
26fdfc37 1936
b093c38c
H
1937 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1938 comment_api_raw_data = self._download_json(
1939 'https://api.bilibili.tv/reply/web/detail', display_id,
1940 note=f'Downloading reply comment of {root_id} - {next_id}',
1941 query={
1942 'platform': 'web',
1943 'ps': 20, # comment's reply per page (default: 3)
1944 'root': root_id,
1945 'next': next_id,
1946 })
1947
1948 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1949 yield {
1950 'author': traverse_obj(replies, ('member', 'name')),
1951 'author_id': traverse_obj(replies, ('member', 'mid')),
1952 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1953 'text': traverse_obj(replies, ('content', 'message')),
1954 'id': replies.get('rpid'),
1955 'like_count': int_or_none(replies.get('like_count')),
1956 'parent': replies.get('parent'),
1957 'timestamp': unified_timestamp(replies.get('ctime_text'))
1958 }
1959
1960 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1961 yield from self._get_comments_reply(
1962 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1963
1964 def _get_comments(self, video_id, ep_id):
1965 for i in itertools.count(0):
1966 comment_api_raw_data = self._download_json(
1967 'https://api.bilibili.tv/reply/web/root', video_id,
1968 note=f'Downloading comment page {i + 1}',
1969 query={
1970 'platform': 'web',
1971 'pn': i, # page number
1972 'ps': 20, # comment per page (default: 20)
1973 'oid': video_id,
1974 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
1975 'sort_type': 1, # 1: best, 2: recent
1976 })
1977
1978 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1979 yield {
1980 'author': traverse_obj(replies, ('member', 'name')),
1981 'author_id': traverse_obj(replies, ('member', 'mid')),
1982 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1983 'text': traverse_obj(replies, ('content', 'message')),
1984 'id': replies.get('rpid'),
1985 'like_count': int_or_none(replies.get('like_count')),
1986 'timestamp': unified_timestamp(replies.get('ctime_text')),
1987 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1988 }
1989 if replies.get('count'):
1990 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1991
1992 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1993 break
1994
26fdfc37 1995 def _real_extract(self, url):
1996 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1997 video_id = ep_id or aid
0ba87dd2
H
1998 chapters = None
1999
2000 if ep_id:
2001 intro_ending_json = self._call_api(
2002 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2003 video_id, fatal=False) or {}
2004 if intro_ending_json.get('skip'):
2005 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2006 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2007 chapters = [{
2008 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2009 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2010 'title': 'Intro'
2011 }, {
2012 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2013 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2014 'title': 'Outro'
2015 }]
26fdfc37 2016
2017 return {
2018 'id': video_id,
2019 **self._extract_video_metadata(url, video_id, season_id),
2020 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2021 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c
H
2022 'chapters': chapters,
2023 '__post_extractor': self.extract_comments(video_id, ep_id)
26fdfc37 2024 }
16f7e6be
AG
2025
2026
2027class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 2028 IE_NAME = 'biliIntl:series'
76c3cecc 2029 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
2030 _TESTS = [{
2031 'url': 'https://www.bilibili.tv/en/play/34613',
2032 'playlist_mincount': 15,
2033 'info_dict': {
2034 'id': '34613',
76c3cecc
H
2035 'title': 'TONIKAWA: Over the Moon For You',
2036 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2037 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
2038 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2039 'view_count': int,
16f7e6be
AG
2040 },
2041 'params': {
2042 'skip_download': True,
16f7e6be 2043 },
76c3cecc
H
2044 }, {
2045 'url': 'https://www.bilibili.tv/en/media/1048837',
2046 'info_dict': {
2047 'id': '1048837',
2048 'title': 'SPY×FAMILY',
2049 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2050 'categories': ['Adventure', 'Action', 'Comedy'],
2051 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2052 'view_count': int,
2053 },
2054 'playlist_mincount': 25,
16f7e6be
AG
2055 }, {
2056 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 2057 'only_matching': True,
0831d95c 2058 }, {
2059 'url': 'https://www.biliintl.com/EN/play/34613',
2060 'only_matching': True,
16f7e6be
AG
2061 }]
2062
c62ecf0d
M
2063 def _entries(self, series_id):
2064 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 2065 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2066 episode_id = str(episode['episode_id'])
2067 yield self.url_result(smuggle_url(
2068 BiliIntlIE._make_url(episode_id, series_id),
2069 self._parse_video_metadata(episode)
2070 ), BiliIntlIE, episode_id)
16f7e6be
AG
2071
2072 def _real_extract(self, url):
c62ecf0d
M
2073 series_id = self._match_id(url)
2074 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2075 return self.playlist_result(
2076 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2077 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2078 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
2079
2080
2081class BiliLiveIE(InfoExtractor):
9e68747f 2082 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
2083
2084 _TESTS = [{
2085 'url': 'https://live.bilibili.com/196',
2086 'info_dict': {
2087 'id': '33989',
2088 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2089 'ext': 'flv',
2090 'title': "太空狼人杀联动,不被爆杀就算赢",
2091 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2092 'timestamp': 1650802769,
2093 },
2094 'skip': 'not live'
2095 }, {
2096 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2097 'only_matching': True
1c226ccd 2098 }, {
2099 'url': 'https://live.bilibili.com/blanc/196',
2100 'only_matching': True
b4f53662
H
2101 }]
2102
2103 _FORMATS = {
2104 80: {'format_id': 'low', 'format_note': '流畅'},
2105 150: {'format_id': 'high_res', 'format_note': '高清'},
2106 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2107 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2108 10000: {'format_id': 'source', 'format_note': '原画'},
2109 20000: {'format_id': '4K', 'format_note': '4K'},
2110 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2111 }
2112
2113 _quality = staticmethod(qualities(list(_FORMATS)))
2114
2115 def _call_api(self, path, room_id, query):
2116 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2117 if api_result.get('code') != 0:
2118 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2119 return api_result.get('data') or {}
2120
2121 def _parse_formats(self, qn, fmt):
2122 for codec in fmt.get('codec') or []:
2123 if codec.get('current_qn') != qn:
2124 continue
2125 for url_info in codec['url_info']:
2126 yield {
2127 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2128 'ext': fmt.get('format_name'),
2129 'vcodec': codec.get('codec_name'),
2130 'quality': self._quality(qn),
2131 **self._FORMATS[qn],
2132 }
2133
2134 def _real_extract(self, url):
2135 room_id = self._match_id(url)
2136 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2137 if room_data.get('live_status') == 0:
2138 raise ExtractorError('Streamer is not live', expected=True)
2139
2140 formats = []
2141 for qn in self._FORMATS.keys():
2142 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2143 'room_id': room_id,
2144 'qn': qn,
2145 'codec': '0,1',
2146 'format': '0,2',
2147 'mask': '0',
2148 'no_playurl': '0',
2149 'platform': 'web',
2150 'protocol': '0,1',
2151 })
2152 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2153 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
2154
2155 return {
2156 'id': room_id,
2157 'title': room_data.get('title'),
2158 'description': room_data.get('description'),
2159 'thumbnail': room_data.get('user_cover'),
2160 'timestamp': stream_data.get('live_time'),
2161 'formats': formats,
ca2f6e14 2162 'is_live': True,
b4f53662
H
2163 'http_headers': {
2164 'Referer': url,
2165 },
2166 }