]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/bilibili] Add referer header and fix metadata extraction (#8832)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
9f09bdcf 5import json
c34f505b 6import math
5336bf57 7import re
6f10cdcf 8import time
ad974876 9import urllib.parse
28746fbd 10
06167fbb 11from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 12from ..dependencies import Cryptodome
3d2623a8 13from ..networking.exceptions import HTTPError
28746fbd 14from ..utils import (
bd8f48c7 15 ExtractorError,
ad974876 16 GeoRestrictedError,
2b9d0216
L
17 InAdvancePagedList,
18 OnDemandPagedList,
9e68747f 19 bool_or_none,
9f09bdcf 20 clean_html,
cf6413e8 21 determine_ext,
f5f15c99 22 filter_dict,
6461f2b7 23 float_or_none,
ad974876 24 format_field,
9f09bdcf 25 get_element_by_class,
2b9d0216 26 int_or_none,
bdd0b75e 27 join_nonempty,
ad974876 28 make_archive_id,
d37422f1 29 merge_dicts,
f8580bf0 30 mimetype2ext,
2b9d0216 31 parse_count,
ad974876 32 parse_qs,
b4f53662 33 qualities,
26fdfc37 34 smuggle_url,
efc947fb 35 srt_subtitles_timecode,
4bc15a68 36 str_or_none,
2b9d0216 37 traverse_obj,
6f10cdcf 38 try_call,
b093c38c 39 unified_timestamp,
26fdfc37 40 unsmuggle_url,
c62ecf0d 41 url_or_none,
ad974876 42 urlencode_postdata,
9e68747f 43 variadic,
28746fbd
PH
44)
45
46
ad974876 47class BilibiliBaseIE(InfoExtractor):
5336bf57 48 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
49
ad974876
L
50 def extract_formats(self, play_info):
51 format_names = {
52 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
53 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
54 }
55
b84fda73 56 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
57 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
58 if flac_audio:
59 audios.append(flac_audio)
60 formats = [{
61 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
62 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 63 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
64 'vcodec': 'none',
65 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 66 'filesize': int_or_none(audio.get('size')),
67 'format_id': str_or_none(audio.get('id')),
ad974876
L
68 } for audio in audios]
69
70 formats.extend({
71 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
72 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
73 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
74 'width': int_or_none(video.get('width')),
75 'height': int_or_none(video.get('height')),
76 'vcodec': video.get('codecs'),
77 'acodec': 'none' if audios else None,
b84fda73 78 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
79 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
80 'filesize': int_or_none(video.get('size')),
81 'quality': int_or_none(video.get('id')),
5336bf57 82 'format_id': traverse_obj(
83 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
84 ('id', {str_or_none}), get_all=False),
ad974876
L
85 'format': format_names.get(video.get('id')),
86 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
87
88 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
89 if missing_formats:
90 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 91 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 92
ad974876
L
93 return formats
94
9f09bdcf 95 def _download_playinfo(self, video_id, cid):
96 return self._download_json(
97 'https://api.bilibili.com/x/player/playurl', video_id,
98 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
99 note=f'Downloading video formats for cid {cid}')['data']
100
ad974876
L
101 def json2srt(self, json_data):
102 srt_data = ''
103 for idx, line in enumerate(json_data.get('body') or []):
104 srt_data += (f'{idx + 1}\n'
105 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
106 f'{line["content"]}\n\n')
107 return srt_data
108
9f09bdcf 109 def _get_subtitles(self, video_id, cid, aid=None):
ad974876
L
110 subtitles = {
111 'danmaku': [{
112 'ext': 'xml',
113 'url': f'https://comment.bilibili.com/{cid}.xml',
114 }]
115 }
116
9f09bdcf 117 subtitle_info = traverse_obj(self._download_json(
118 'https://api.bilibili.com/x/player/v2', video_id,
119 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
120 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
121 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
122 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
123 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
124 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
125 for s in subs_list:
ad974876
L
126 subtitles.setdefault(s['lan'], []).append({
127 'ext': 'srt',
128 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
129 })
130 return subtitles
131
c90c5b9b 132 def _get_chapters(self, aid, cid):
133 chapters = aid and cid and self._download_json(
134 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
135 note='Extracting chapters', fatal=False)
136 return traverse_obj(chapters, ('data', 'view_points', ..., {
137 'title': 'content',
138 'start_time': 'from',
139 'end_time': 'to',
140 })) or None
141
ad974876
L
142 def _get_comments(self, aid):
143 for idx in itertools.count(1):
144 replies = traverse_obj(
145 self._download_json(
146 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
147 aid, note=f'Extracting comments from page {idx}', fatal=False),
148 ('data', 'replies'))
149 if not replies:
150 return
151 for children in map(self._get_all_children, replies):
152 yield from children
153
154 def _get_all_children(self, reply):
155 yield {
156 'author': traverse_obj(reply, ('member', 'uname')),
157 'author_id': traverse_obj(reply, ('member', 'mid')),
158 'id': reply.get('rpid'),
159 'text': traverse_obj(reply, ('content', 'message')),
160 'timestamp': reply.get('ctime'),
161 'parent': reply.get('parent') or 'root',
162 }
163 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
164 yield from children
165
bdd0b75e
GS
166 def _get_episodes_from_season(self, ss_id, url):
167 season_info = self._download_json(
168 'https://api.bilibili.com/pgc/web/season/section', ss_id,
169 note='Downloading season info', query={'season_id': ss_id},
170 headers={'Referer': url, **self.geo_verification_headers()})
171
172 for entry in traverse_obj(season_info, (
173 'result', 'main_section', 'episodes',
174 lambda _, v: url_or_none(v['share_url']) and v['id'])):
9f09bdcf 175 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
176
177 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
178 cid_edges = cid_edges or {}
179 division_data = self._download_json(
180 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
181 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
182 note=f'Extracting divisions from edge {edge_id}')
183 edges.setdefault(edge_id, {}).update(
184 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
185 'title': ('title', {str}),
186 'cid': ('cid', {int_or_none}),
187 }), get_all=False))
188
189 edges[edge_id].update(traverse_obj(division_data, ('data', {
190 'title': ('title', {str}),
191 'choices': ('edges', 'questions', ..., 'choices', ..., {
192 'edge_id': ('id', {int_or_none}),
193 'cid': ('cid', {int_or_none}),
194 'text': ('option', {str}),
195 }),
196 })))
197 # use dict to combine edges that use the same video section (same cid)
198 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
199 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
200 if choice['edge_id'] not in edges:
201 edges[choice['edge_id']] = {'cid': choice['cid']}
202 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
203 return cid_edges
204
205 def _get_interactive_entries(self, video_id, cid, metainfo):
206 graph_version = traverse_obj(
207 self._download_json(
208 'https://api.bilibili.com/x/player/wbi/v2', video_id,
209 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
210 ('data', 'interaction', 'graph_version', {int_or_none}))
211 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
212 for cid, edges in cid_edges.items():
213 play_info = self._download_playinfo(video_id, cid)
214 yield {
215 **metainfo,
216 'id': f'{video_id}_{cid}',
217 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
218 'formats': self.extract_formats(play_info),
219 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
220 'duration': float_or_none(play_info.get('timelength'), scale=1000),
221 'subtitles': self.extract_subtitles(video_id, cid),
222 }
bdd0b75e 223
ad974876
L
224
225class BiliBiliIE(BilibiliBaseIE):
9e68747f 226 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 227
bd8f48c7 228 _TESTS = [{
ad974876
L
229 'url': 'https://www.bilibili.com/video/BV13x41117TL',
230 'info_dict': {
231 'id': 'BV13x41117TL',
232 'title': '阿滴英文|英文歌分享#6 "Closer',
233 'ext': 'mp4',
234 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
235 'uploader_id': '65880958',
236 'uploader': '阿滴英文',
237 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
238 'duration': 554.117,
239 'tags': list,
240 'comment_count': int,
241 'upload_date': '20170301',
242 'timestamp': 1488353834,
243 'like_count': int,
244 'view_count': int,
245 },
246 }, {
9f09bdcf 247 'note': 'old av URL version',
06167fbb 248 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 249 'info_dict': {
ad974876 250 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 251 'ext': 'mp4',
f8580bf0 252 'uploader': '菊子桑',
ad974876
L
253 'uploader_id': '156160',
254 'id': 'BV11x411K7CN',
255 'title': '【金坷垃】金泡沫',
256 'duration': 308.36,
f8580bf0 257 'upload_date': '20140420',
ad974876 258 'timestamp': 1397983878,
6461f2b7 259 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
260 'like_count': int,
261 'comment_count': int,
262 'view_count': int,
263 'tags': list,
264 },
c90c5b9b 265 'params': {'skip_download': True},
bd8f48c7 266 }, {
ad974876
L
267 'note': 'Anthology',
268 'url': 'https://www.bilibili.com/video/BV1bK411W797',
269 'info_dict': {
270 'id': 'BV1bK411W797',
271 'title': '物语中的人物是如何吐槽自己的OP的'
272 },
273 'playlist_count': 18,
274 'playlist': [{
275 'info_dict': {
276 'id': 'BV1bK411W797_p1',
277 'ext': 'mp4',
278 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 279 'tags': 'count:10',
ad974876
L
280 'timestamp': 1589601697,
281 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
282 'uploader': '打牌还是打桩',
283 'uploader_id': '150259984',
284 'like_count': int,
285 'comment_count': int,
286 'upload_date': '20200516',
287 'view_count': int,
288 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
289 'duration': 90.314,
290 }
291 }]
06167fbb 292 }, {
ad974876
L
293 'note': 'Specific page of Anthology',
294 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
295 'info_dict': {
296 'id': 'BV1bK411W797_p1',
297 'ext': 'mp4',
298 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 299 'tags': 'count:10',
ad974876
L
300 'timestamp': 1589601697,
301 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
302 'uploader': '打牌还是打桩',
303 'uploader_id': '150259984',
304 'like_count': int,
305 'comment_count': int,
306 'upload_date': '20200516',
307 'view_count': int,
308 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
309 'duration': 90.314,
310 }
bd8f48c7 311 }, {
ad974876
L
312 'note': 'video has subtitles',
313 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 314 'info_dict': {
ad974876 315 'id': 'BV12N4y1M7rh',
bd8f48c7 316 'ext': 'mp4',
c90c5b9b 317 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
318 'tags': list,
319 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
320 'duration': 313.557,
321 'upload_date': '20220709',
9e68747f 322 'uploader': '小夫太渴',
ad974876
L
323 'timestamp': 1657347907,
324 'uploader_id': '1326814124',
325 'comment_count': int,
326 'view_count': int,
327 'like_count': int,
328 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
329 'subtitles': 'count:2'
bd8f48c7 330 },
ad974876 331 'params': {'listsubtitles': True},
ca270371 332 }, {
ad974876 333 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 334 'info_dict': {
ad974876 335 'id': 'BV13x41117TL',
f8580bf0 336 'ext': 'mp4',
ca270371 337 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 338 'upload_date': '20170301',
c90c5b9b 339 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 340 'timestamp': 1488353834,
f8580bf0 341 'uploader_id': '65880958',
342 'uploader': '阿滴英文',
ad974876 343 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 344 'duration': 554.117,
ad974876
L
345 'tags': list,
346 'comment_count': int,
347 'view_count': int,
348 'like_count': int,
89fabf11
JN
349 },
350 'params': {
351 'skip_download': True,
352 },
c90c5b9b 353 }, {
354 'note': 'video has chapter',
355 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
356 'info_dict': {
357 'id': 'BV1vL411G7N7',
358 'ext': 'mp4',
359 'title': '如何为你的B站视频添加进度条分段',
360 'timestamp': 1634554558,
361 'upload_date': '20211018',
362 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
363 'tags': list,
364 'uploader': '爱喝咖啡的当麻',
365 'duration': 669.482,
366 'uploader_id': '1680903',
367 'chapters': 'count:6',
368 'comment_count': int,
369 'view_count': int,
370 'like_count': int,
371 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
372 },
373 'params': {'skip_download': True},
ab29e470 374 }, {
375 'note': 'video redirects to festival page',
376 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
377 'info_dict': {
378 'id': 'BV1wP4y1P72h',
379 'ext': 'mp4',
380 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
381 'timestamp': 1643947497,
382 'upload_date': '20220204',
383 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
384 'uploader': '叨叨冯聊音乐',
385 'duration': 246.719,
386 'uploader_id': '528182630',
387 'view_count': int,
388 'like_count': int,
389 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
390 },
391 'params': {'skip_download': True},
392 }, {
393 'note': 'newer festival video',
394 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
395 'info_dict': {
396 'id': 'BV1ay4y1d77f',
397 'ext': 'mp4',
398 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
399 'timestamp': 1674273600,
400 'upload_date': '20230121',
401 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
402 'uploader': '果蝇轰',
403 'duration': 1111.722,
404 'uploader_id': '8469526',
405 'view_count': int,
406 'like_count': int,
407 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
408 },
409 'params': {'skip_download': True},
9f09bdcf 410 }, {
411 'note': 'interactive/split-path video',
412 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
413 'info_dict': {
414 'id': 'BV1af4y1H7ga',
415 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
416 'timestamp': 1630500414,
417 'upload_date': '20210901',
418 'description': 'md5:01113e39ab06e28042d74ac356a08786',
419 'tags': list,
420 'uploader': '钉宫妮妮Ninico',
421 'duration': 1503,
422 'uploader_id': '8881297',
423 'comment_count': int,
424 'view_count': int,
425 'like_count': int,
426 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
427 },
428 'playlist_count': 33,
429 'playlist': [{
430 'info_dict': {
431 'id': 'BV1af4y1H7ga_400950101',
432 'ext': 'mp4',
433 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
434 'timestamp': 1630500414,
435 'upload_date': '20210901',
436 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
437 'tags': list,
438 'uploader': '钉宫妮妮Ninico',
439 'duration': 11.605,
440 'uploader_id': '8881297',
441 'comment_count': int,
442 'view_count': int,
443 'like_count': int,
444 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
445 },
446 }],
447 }, {
448 'note': '301 redirect to bangumi link',
449 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
450 'info_dict': {
451 'id': '288525',
452 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
453 'ext': 'mp4',
454 'series': '我和我的祖国',
455 'series_id': '4780',
456 'season': '幕后纪实',
457 'season_id': '28609',
458 'season_number': 1,
459 'episode': '钱学森弹道和乘波体飞行器是什么?',
460 'episode_id': '288525',
461 'episode_number': 105,
462 'duration': 1183.957,
463 'timestamp': 1571648124,
464 'upload_date': '20191021',
465 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
466 },
467 }, {
468 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
469 'info_dict': {
470 'id': 'BV1jL41167ZG',
471 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
472 'ext': 'mp4',
473 },
474 'skip': 'supporter-only video',
475 }, {
476 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
477 'info_dict': {
478 'id': 'BV1Ks411f7aQ',
479 'title': '【BD1080P】狼与香辛料I【华盟】',
480 'ext': 'mp4',
481 },
482 'skip': 'login required',
483 }, {
484 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
485 'info_dict': {
486 'id': 'BV1GJ411x7h7',
487 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
488 'ext': 'mp4',
489 },
490 'skip': 'geo-restricted',
bd8f48c7 491 }]
28746fbd 492
520e7533 493 def _real_extract(self, url):
ad974876 494 video_id = self._match_id(url)
9f09bdcf 495 webpage, urlh = self._download_webpage_handle(url, video_id)
496 if not self._match_valid_url(urlh.url):
497 return self.url_result(urlh.url)
498
c90c5b9b 499 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 500
ab29e470 501 is_festival = 'videoData' not in initial_state
502 if is_festival:
503 video_data = initial_state['videoInfo']
504 else:
9f09bdcf 505 play_info_obj = self._search_json(
506 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
507 if not play_info_obj:
508 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
509 self.raise_login_required()
510 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
511 raise ExtractorError(
512 'This video may be deleted or geo-restricted. '
513 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
514 play_info = traverse_obj(play_info_obj, ('data', {dict}))
515 if not play_info:
516 if traverse_obj(play_info_obj, 'code') == 87007:
517 toast = get_element_by_class('tips-toast', webpage) or ''
518 msg = clean_html(
519 f'{get_element_by_class("belongs-to", toast) or ""},'
520 + (get_element_by_class('level', toast) or ''))
521 raise ExtractorError(
522 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
523 raise ExtractorError('Failed to extract play info')
ab29e470 524 video_data = initial_state['videoData']
525
ad974876 526 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 527
adc74b3c 528 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 529 page_list_json = not is_festival and traverse_obj(
ad974876
L
530 self._download_json(
531 'https://api.bilibili.com/x/player/pagelist', video_id,
532 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
533 note='Extracting videos in anthology'),
534 'data', expected_type=list) or []
535 is_anthology = len(page_list_json) > 1
536
537 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
538 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
539 return self.playlist_from_matches(
540 page_list_json, video_id, title, ie=BiliBiliIE,
541 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 542
ad974876 543 if is_anthology:
f74371a9 544 part_id = part_id or 1
545 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 546
ad974876
L
547 aid = video_data.get('aid')
548 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 549
c90c5b9b 550 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
551
ab29e470 552 festival_info = {}
553 if is_festival:
9f09bdcf 554 play_info = self._download_playinfo(video_id, cid)
ab29e470 555
556 festival_info = traverse_obj(initial_state, {
557 'uploader': ('videoInfo', 'upName'),
558 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
559 'like_count': ('videoStatus', 'like', {int_or_none}),
560 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
561 }, get_all=False)
562
9f09bdcf 563 metainfo = {
ab29e470 564 **traverse_obj(initial_state, {
565 'uploader': ('upData', 'name'),
566 'uploader_id': ('upData', 'mid', {str_or_none}),
567 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
568 'tags': ('tags', ..., 'tag_name'),
569 'thumbnail': ('videoData', 'pic', {url_or_none}),
570 }),
571 **festival_info,
572 **traverse_obj(video_data, {
573 'description': 'desc',
574 'timestamp': ('pubdate', {int_or_none}),
575 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
576 'comment_count': ('stat', 'reply', {int_or_none}),
577 }, get_all=False),
ad974876 578 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
ad974876 579 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 580 'title': title,
c90c5b9b 581 'http_headers': {'Referer': url},
06167fbb 582 }
277d6ff5 583
9f09bdcf 584 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
585 if is_interactive:
586 return self.playlist_result(
587 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
588 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
589 '__post_extractor': self.extract_comments(aid),
590 })
591 else:
592 return {
593 **metainfo,
594 'duration': float_or_none(play_info.get('timelength'), scale=1000),
595 'chapters': self._get_chapters(aid, cid),
596 'subtitles': self.extract_subtitles(video_id, cid),
597 'formats': self.extract_formats(play_info),
598 '__post_extractor': self.extract_comments(aid),
599 }
600
06167fbb 601
ad974876 602class BiliBiliBangumiIE(BilibiliBaseIE):
9f09bdcf 603 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
e88d44c6 604
ad974876 605 _TESTS = [{
9f09bdcf 606 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
607 'info_dict': {
608 'id': '21495',
609 'ext': 'mp4',
610 'series': '悠久之翼',
611 'series_id': '774',
612 'season': '第二季',
613 'season_id': '1182',
614 'season_number': 2,
615 'episode': 'forever/ef',
616 'episode_id': '21495',
617 'episode_number': 12,
618 'title': '12 forever/ef',
619 'duration': 1420.791,
620 'timestamp': 1320412200,
621 'upload_date': '20111104',
622 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
623 },
624 }, {
bdd0b75e 625 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 626 'info_dict': {
bdd0b75e 627 'id': '267851',
ad974876 628 'ext': 'mp4',
bdd0b75e
GS
629 'series': '鬼灭之刃',
630 'series_id': '4358',
9f09bdcf 631 'season': '立志篇',
bdd0b75e 632 'season_id': '26801',
ad974876 633 'season_number': 1,
bdd0b75e
GS
634 'episode': '残酷',
635 'episode_id': '267851',
636 'episode_number': 1,
637 'title': '1 残酷',
638 'duration': 1425.256,
639 'timestamp': 1554566400,
640 'upload_date': '20190406',
641 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 642 },
9f09bdcf 643 'skip': 'Geo-restricted',
644 }, {
645 'note': 'a making-of which falls outside main section',
646 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
647 'info_dict': {
648 'id': '345120',
649 'ext': 'mp4',
650 'series': '鬼灭之刃',
651 'series_id': '4358',
652 'season': '立志篇',
653 'season_id': '26801',
654 'season_number': 1,
655 'episode': '炭治郎篇',
656 'episode_id': '345120',
657 'episode_number': 27,
658 'title': '#1 炭治郎篇',
659 'duration': 1922.129,
660 'timestamp': 1602853860,
661 'upload_date': '20201016',
662 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
663 },
ad974876 664 }]
06167fbb 665
ad974876 666 def _real_extract(self, url):
9f09bdcf 667 episode_id = self._match_id(url)
668 webpage = self._download_webpage(url, episode_id)
e88d44c6 669
ad974876
L
670 if '您所在的地区无法观看本片' in webpage:
671 raise GeoRestrictedError('This video is restricted')
bdd0b75e 672 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 673 self.raise_login_required('This video is for premium members only')
6461f2b7 674
bdd0b75e
GS
675 headers = {'Referer': url, **self.geo_verification_headers()}
676 play_info = self._download_json(
9f09bdcf 677 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
bdd0b75e
GS
678 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
679 headers=headers)
680 premium_only = play_info.get('code') == -10403
681 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
682
ad974876 683 formats = self.extract_formats(play_info)
bdd0b75e 684 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 685 self.raise_login_required('This video is for premium members only')
bd8f48c7 686
bdd0b75e 687 bangumi_info = self._download_json(
9f09bdcf 688 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
bdd0b75e
GS
689 query={'ep_id': episode_id}, headers=headers)['result']
690
691 episode_number, episode_info = next((
692 (idx, ep) for idx, ep in enumerate(traverse_obj(
9f09bdcf 693 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
bdd0b75e 694 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 695
bdd0b75e 696 season_id = bangumi_info.get('season_id')
9f09bdcf 697 season_number, season_title = season_id and next((
698 (idx + 1, e.get('season_title')) for idx, e in enumerate(
bdd0b75e 699 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 700 if e.get('season_id') == season_id
9f09bdcf 701 ), (None, None))
06167fbb 702
bdd0b75e
GS
703 aid = episode_info.get('aid')
704
e88d44c6 705 return {
9f09bdcf 706 'id': episode_id,
ad974876 707 'formats': formats,
bdd0b75e
GS
708 **traverse_obj(bangumi_info, {
709 'series': ('series', 'series_title', {str}),
710 'series_id': ('series', 'series_id', {str_or_none}),
711 'thumbnail': ('square_cover', {url_or_none}),
712 }),
9f09bdcf 713 **traverse_obj(episode_info, {
714 'episode': ('long_title', {str}),
715 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
716 'timestamp': ('pub_time', {int_or_none}),
717 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
718 }),
bdd0b75e 719 'episode_id': episode_id,
9f09bdcf 720 'season': str_or_none(season_title),
bdd0b75e 721 'season_id': str_or_none(season_id),
c90c5b9b 722 'season_number': season_number,
c90c5b9b 723 'duration': float_or_none(play_info.get('timelength'), scale=1000),
9f09bdcf 724 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
bdd0b75e
GS
725 '__post_extractor': self.extract_comments(aid),
726 'http_headers': headers,
e88d44c6 727 }
bd8f48c7 728
bd8f48c7 729
bdd0b75e 730class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 731 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 732 _TESTS = [{
ad974876 733 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 734 'info_dict': {
ad974876 735 'id': '24097891',
9f09bdcf 736 'title': 'CAROLE & TUESDAY',
737 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
bd8f48c7 738 },
ad974876 739 'playlist_mincount': 25,
9f09bdcf 740 }, {
741 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
742 'info_dict': {
743 'id': '1565',
744 'title': '攻壳机动队 S.A.C. 2nd GIG',
745 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
746 },
747 'playlist_count': 26,
748 'playlist': [{
749 'info_dict': {
750 'id': '68540',
751 'ext': 'mp4',
752 'series': '攻壳机动队',
753 'series_id': '1077',
754 'season': '第二季',
755 'season_id': '1565',
756 'season_number': 2,
757 'episode': '再启动 REEMBODY',
758 'episode_id': '68540',
759 'episode_number': 1,
760 'title': '1 再启动 REEMBODY',
761 'duration': 1525.777,
762 'timestamp': 1425074413,
763 'upload_date': '20150227',
764 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
765 },
766 }],
bd8f48c7
YCH
767 }]
768
bd8f48c7 769 def _real_extract(self, url):
ad974876
L
770 media_id = self._match_id(url)
771 webpage = self._download_webpage(url, media_id)
bdd0b75e 772
9f09bdcf 773 initial_state = self._search_json(
774 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
775 ss_id = initial_state['mediaInfo']['season_id']
776
777 return self.playlist_result(
778 self._get_episodes_from_season(ss_id, url), media_id,
779 **traverse_obj(initial_state, ('mediaInfo', {
780 'title': ('title', {str}),
781 'description': ('evaluate', {str}),
782 })))
bdd0b75e 783
bd8f48c7 784
bdd0b75e 785class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 786 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
787 _TESTS = [{
788 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
789 'info_dict': {
9f09bdcf 790 'id': '26801',
791 'title': '鬼灭之刃',
792 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
bdd0b75e
GS
793 },
794 'playlist_mincount': 26
9f09bdcf 795 }, {
796 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
797 'info_dict': {
798 'id': '2251',
799 'title': '玲音',
800 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
801 },
802 'playlist_count': 13,
803 'playlist': [{
804 'info_dict': {
805 'id': '50188',
806 'ext': 'mp4',
807 'series': '玲音',
808 'series_id': '1526',
809 'season': 'TV',
810 'season_id': '2251',
811 'season_number': 1,
812 'episode': 'WEIRD',
813 'episode_id': '50188',
814 'episode_number': 1,
815 'title': '1 WEIRD',
816 'duration': 1436.992,
817 'timestamp': 1343185080,
818 'upload_date': '20120725',
819 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
820 },
821 }],
bdd0b75e
GS
822 }]
823
824 def _real_extract(self, url):
825 ss_id = self._match_id(url)
9f09bdcf 826 webpage = self._download_webpage(url, ss_id)
827 metainfo = traverse_obj(
828 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
829 ('itemListElement', ..., {
830 'title': ('name', {str}),
831 'description': ('description', {str}),
832 }), get_all=False)
833
834 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
835
836
837class BilibiliCheeseBaseIE(BilibiliBaseIE):
838 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
839
840 def _extract_episode(self, season_info, ep_id):
841 episode_info = traverse_obj(season_info, (
842 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
843 aid, cid = episode_info['aid'], episode_info['cid']
844
845 if traverse_obj(episode_info, 'ep_status') == -1:
846 raise ExtractorError('This course episode is not yet available.', expected=True)
847 if not traverse_obj(episode_info, 'playable'):
848 self.raise_login_required('You need to purchase the course to download this episode')
849
850 play_info = self._download_json(
851 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
852 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
853 headers=self._HEADERS, note='Downloading playinfo')['data']
854
855 return {
856 'id': str_or_none(ep_id),
857 'episode_id': str_or_none(ep_id),
858 'formats': self.extract_formats(play_info),
859 'extractor_key': BilibiliCheeseIE.ie_key(),
860 'extractor': BilibiliCheeseIE.IE_NAME,
861 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
862 **traverse_obj(episode_info, {
863 'episode': ('title', {str}),
864 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
865 'alt_title': ('subtitle', {str}),
866 'duration': ('duration', {int_or_none}),
867 'episode_number': ('index', {int_or_none}),
868 'thumbnail': ('cover', {url_or_none}),
869 'timestamp': ('release_date', {int_or_none}),
870 'view_count': ('play', {int_or_none}),
871 }),
872 **traverse_obj(season_info, {
873 'uploader': ('up_info', 'uname', {str}),
874 'uploader_id': ('up_info', 'mid', {str_or_none}),
875 }),
876 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
877 '__post_extractor': self.extract_comments(aid),
878 'http_headers': self._HEADERS,
879 }
880
881 def _download_season_info(self, query_key, video_id):
882 return self._download_json(
883 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
884 headers=self._HEADERS, note='Downloading season info')['data']
bd8f48c7 885
9f09bdcf 886
887class BilibiliCheeseIE(BilibiliCheeseBaseIE):
888 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
889 _TESTS = [{
890 'url': 'https://www.bilibili.com/cheese/play/ep229832',
891 'info_dict': {
892 'id': '229832',
893 'ext': 'mp4',
894 'title': '1 - 课程先导片',
895 'alt_title': '视频课 · 3分41秒',
896 'uploader': '马督工',
897 'uploader_id': '316568752',
898 'episode': '课程先导片',
899 'episode_id': '229832',
900 'episode_number': 1,
901 'duration': 221,
902 'timestamp': 1695549606,
903 'upload_date': '20230924',
904 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
905 'view_count': int,
906 }
907 }]
908
909 def _real_extract(self, url):
910 ep_id = self._match_id(url)
911 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
912
913
914class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
915 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
916 _TESTS = [{
917 'url': 'https://www.bilibili.com/cheese/play/ss5918',
918 'info_dict': {
919 'id': '5918',
920 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
921 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
922 },
923 'playlist': [{
924 'info_dict': {
925 'id': '229832',
926 'ext': 'mp4',
927 'title': '1 - 课程先导片',
928 'alt_title': '视频课 · 3分41秒',
929 'uploader': '马督工',
930 'uploader_id': '316568752',
931 'episode': '课程先导片',
932 'episode_id': '229832',
933 'episode_number': 1,
934 'duration': 221,
935 'timestamp': 1695549606,
936 'upload_date': '20230924',
937 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
938 'view_count': int,
939 }
940 }],
941 'params': {'playlist_items': '1'},
942 }, {
943 'url': 'https://www.bilibili.com/cheese/play/ss5918',
944 'info_dict': {
945 'id': '5918',
946 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
947 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
948 },
949 'playlist_mincount': 5,
950 'skip': 'paid video in list',
951 }]
952
953 def _get_cheese_entries(self, season_info):
954 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
955 yield self._extract_episode(season_info, ep_id)
956
957 def _real_extract(self, url):
958 season_id = self._match_id(url)
959 season_info = self._download_season_info('season_id', season_id)
960
961 return self.playlist_result(
962 self._get_cheese_entries(season_info), season_id,
963 **traverse_obj(season_info, {
964 'title': ('title', {str}),
965 'description': ('subtitle', {str}),
966 }))
4bc15a68
RA
967
968
2b9d0216
L
969class BilibiliSpaceBaseIE(InfoExtractor):
970 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 971 first_page = fetch_page(0)
2b9d0216
L
972 metadata = get_metadata(first_page)
973
974 paged_list = InAdvancePagedList(
12f153a8 975 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
976 metadata['page_count'], metadata['page_size'])
977
978 return metadata, paged_list
979
980
981class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
982 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 983 _TESTS = [{
984 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
985 'info_dict': {
986 'id': '3985676',
987 },
988 'playlist_mincount': 178,
6f10cdcf
E
989 }, {
990 'url': 'https://space.bilibili.com/313580179/video',
991 'info_dict': {
992 'id': '313580179',
993 },
994 'playlist_mincount': 92,
6efb0711 995 }]
996
6f10cdcf
E
997 def _extract_signature(self, playlist_id):
998 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
999
1000 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1001 img_key = traverse_obj(
1002 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1003 sub_key = traverse_obj(
1004 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1005
1006 session_key = img_key + sub_key
1007
1008 signature_values = []
1009 for position in (
1010 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1011 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1012 57, 62, 11, 36, 20, 34, 44, 52
1013 ):
1014 char_at_position = try_call(lambda: session_key[position])
1015 if char_at_position:
1016 signature_values.append(char_at_position)
1017
1018 return ''.join(signature_values)[:32]
1019
2b9d0216
L
1020 def _real_extract(self, url):
1021 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1022 if not is_video_url:
1023 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1024 'To download audios, add a "/audio" to the URL')
1025
6f10cdcf
E
1026 signature = self._extract_signature(playlist_id)
1027
2b9d0216 1028 def fetch_page(page_idx):
6f10cdcf
E
1029 query = {
1030 'keyword': '',
1031 'mid': playlist_id,
1032 'order': 'pubdate',
1033 'order_avoided': 'true',
1034 'platform': 'web',
1035 'pn': page_idx + 1,
1036 'ps': 30,
1037 'tid': 0,
1038 'web_location': 1550101,
1039 'wts': int(time.time()),
1040 }
1041 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1042
12f153a8 1043 try:
6f10cdcf
E
1044 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1045 playlist_id, note=f'Downloading page {page_idx}', query=query)
12f153a8 1046 except ExtractorError as e:
3d2623a8 1047 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
1048 raise ExtractorError(
1049 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1050 raise
1051 if response['code'] == -401:
1052 raise ExtractorError(
1053 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1054 return response['data']
2b9d0216
L
1055
1056 def get_metadata(page_data):
1057 page_size = page_data['page']['ps']
1058 entry_count = page_data['page']['count']
1059 return {
1060 'page_count': math.ceil(entry_count / page_size),
1061 'page_size': page_size,
1062 }
6efb0711 1063
2b9d0216
L
1064 def get_entries(page_data):
1065 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1066 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 1067
2b9d0216
L
1068 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1069 return self.playlist_result(paged_list, playlist_id)
6efb0711 1070
6efb0711 1071
2b9d0216
L
1072class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1073 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1074 _TESTS = [{
6f10cdcf 1075 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 1076 'info_dict': {
6f10cdcf 1077 'id': '313580179',
2b9d0216
L
1078 },
1079 'playlist_mincount': 1,
1080 }]
1081
1082 def _real_extract(self, url):
1083 playlist_id = self._match_id(url)
1084
1085 def fetch_page(page_idx):
1086 return self._download_json(
1087 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1088 note=f'Downloading page {page_idx}',
12f153a8 1089 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
1090
1091 def get_metadata(page_data):
1092 return {
1093 'page_count': page_data['pageCount'],
1094 'page_size': page_data['pageSize'],
1095 }
1096
1097 def get_entries(page_data):
1098 for entry in page_data.get('data', []):
1099 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1100
1101 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1102 return self.playlist_result(paged_list, playlist_id)
1103
1104
9e68747f 1105class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1106 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1107 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1108 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1109
1110 def _get_uploader(self, uid, playlist_id):
1111 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1112 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1113
1114 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1115 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1116 metadata.pop('page_count', None)
1117 metadata.pop('page_size', None)
1118 return metadata, page_list
1119
1120
1121class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1122 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
1123 _TESTS = [{
1124 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1125 'info_dict': {
1126 'id': '2142762_57445',
9e68747f 1127 'title': '【完结】《底特律 变人》全结局流程解说',
1128 'description': '',
1129 'uploader': '老戴在此',
1130 'uploader_id': '2142762',
1131 'timestamp': int,
1132 'upload_date': str,
1133 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
1134 },
1135 'playlist_mincount': 31,
1136 }]
06167fbb 1137
1138 def _real_extract(self, url):
2b9d0216
L
1139 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1140 playlist_id = f'{mid}_{sid}'
1141
1142 def fetch_page(page_idx):
1143 return self._download_json(
1144 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1145 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 1146 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
1147
1148 def get_metadata(page_data):
1149 page_size = page_data['page']['page_size']
1150 entry_count = page_data['page']['total']
1151 return {
1152 'page_count': math.ceil(entry_count / page_size),
1153 'page_size': page_size,
9e68747f 1154 'uploader': self._get_uploader(mid, playlist_id),
1155 **traverse_obj(page_data, {
1156 'title': ('meta', 'name', {str}),
1157 'description': ('meta', 'description', {str}),
1158 'uploader_id': ('meta', 'mid', {str_or_none}),
1159 'timestamp': ('meta', 'ptime', {int_or_none}),
1160 'thumbnail': ('meta', 'cover', {url_or_none}),
1161 })
2b9d0216
L
1162 }
1163
1164 def get_entries(page_data):
9e68747f 1165 return self._get_entries(page_data, 'archives')
2b9d0216
L
1166
1167 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 1168 return self.playlist_result(paged_list, playlist_id, **metadata)
1169
1170
1171class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1172 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1173 _TESTS = [{
1174 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1175 'info_dict': {
1176 'id': '1958703906_547718',
1177 'title': '直播回放',
1178 'description': '直播回放',
1179 'uploader': '靡烟miya',
1180 'uploader_id': '1958703906',
1181 'timestamp': 1637985853,
1182 'upload_date': '20211127',
1183 'modified_timestamp': int,
1184 'modified_date': str,
1185 },
1186 'playlist_mincount': 513,
1187 }]
1188
1189 def _real_extract(self, url):
1190 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1191 playlist_id = f'{mid}_{sid}'
1192 playlist_meta = traverse_obj(self._download_json(
1193 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1194 ), {
1195 'title': ('data', 'meta', 'name', {str}),
1196 'description': ('data', 'meta', 'description', {str}),
1197 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1198 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1199 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1200 })
1201
1202 def fetch_page(page_idx):
1203 return self._download_json(
1204 'https://api.bilibili.com/x/series/archives',
1205 playlist_id, note=f'Downloading page {page_idx}',
1206 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1207
1208 def get_metadata(page_data):
1209 page_size = page_data['page']['size']
1210 entry_count = page_data['page']['total']
1211 return {
1212 'page_count': math.ceil(entry_count / page_size),
1213 'page_size': page_size,
1214 'uploader': self._get_uploader(mid, playlist_id),
1215 **playlist_meta
1216 }
1217
1218 def get_entries(page_data):
1219 return self._get_entries(page_data, 'archives')
1220
1221 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1222 return self.playlist_result(paged_list, playlist_id, **metadata)
1223
1224
1225class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1226 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1227 _TESTS = [{
1228 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1229 'info_dict': {
1230 'id': '1103407912',
1231 'title': '【V2】(旧)',
1232 'description': '',
1233 'uploader': '晓月春日',
1234 'uploader_id': '84912',
1235 'timestamp': 1604905176,
1236 'upload_date': '20201109',
1237 'modified_timestamp': int,
1238 'modified_date': str,
1239 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1240 'view_count': int,
1241 'like_count': int,
1242 },
1243 'playlist_mincount': 22,
1244 }, {
1245 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1246 'only_matching': True,
1247 }]
1248
1249 def _real_extract(self, url):
1250 fid = self._match_id(url)
1251
1252 list_info = self._download_json(
1253 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1254 fid, note='Downloading favlist metadata')
1255 if list_info['code'] == -403:
1256 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1257
1258 entries = self._get_entries(self._download_json(
1259 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1260 fid, note='Download favlist entries'), 'data')
1261
1262 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1263 'title': ('title', {str}),
1264 'description': ('intro', {str}),
1265 'uploader': ('upper', 'name', {str}),
1266 'uploader_id': ('upper', 'mid', {str_or_none}),
1267 'timestamp': ('ctime', {int_or_none}),
1268 'modified_timestamp': ('mtime', {int_or_none}),
1269 'thumbnail': ('cover', {url_or_none}),
1270 'view_count': ('cnt_info', 'play', {int_or_none}),
1271 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1272 })))
1273
1274
1275class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1276 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1277 _TESTS = [{
1278 'url': 'https://www.bilibili.com/watchlater/#/list',
1279 'info_dict': {'id': 'watchlater'},
1280 'playlist_mincount': 0,
1281 'skip': 'login required',
1282 }]
1283
1284 def _real_extract(self, url):
1285 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1286 watchlater_info = self._download_json(
1287 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1288 if watchlater_info['code'] == -101:
1289 self.raise_login_required(msg='You need to login to access your watchlater list')
1290 entries = self._get_entries(watchlater_info, ('data', 'list'))
1291 return self.playlist_result(entries, id=list_id, title='稍后再看')
1292
1293
1294class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1295 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1296 _TESTS = [{
1297 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1298 'info_dict': {
1299 'id': '5_547718',
1300 'title': '直播回放',
1301 'uploader': '靡烟miya',
1302 'uploader_id': '1958703906',
1303 'timestamp': 1637985853,
1304 'upload_date': '20211127',
1305 },
1306 'playlist_mincount': 513,
1307 }, {
1308 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1309 'info_dict': {
1310 'id': '5_547718',
1311 },
1312 'playlist_mincount': 513,
1313 'skip': 'redirect url',
1314 }, {
1315 'url': 'https://www.bilibili.com/list/ml1103407912',
1316 'info_dict': {
1317 'id': '3_1103407912',
1318 'title': '【V2】(旧)',
1319 'uploader': '晓月春日',
1320 'uploader_id': '84912',
1321 'timestamp': 1604905176,
1322 'upload_date': '20201109',
1323 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1324 },
1325 'playlist_mincount': 22,
1326 }, {
1327 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1328 'info_dict': {
1329 'id': '3_1103407912',
1330 },
1331 'playlist_mincount': 22,
1332 'skip': 'redirect url',
1333 }, {
1334 'url': 'https://www.bilibili.com/list/watchlater',
1335 'info_dict': {'id': 'watchlater'},
1336 'playlist_mincount': 0,
1337 'skip': 'login required',
1338 }, {
1339 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1340 'info_dict': {'id': 'watchlater'},
1341 'playlist_mincount': 0,
1342 'skip': 'login required',
1343 }]
1344
1345 def _extract_medialist(self, query, list_id):
1346 for page_num in itertools.count(1):
1347 page_data = self._download_json(
1348 'https://api.bilibili.com/x/v2/medialist/resource/list',
1349 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1350 )['data']
1351 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1352 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1353 if not page_data.get('has_more', False):
1354 break
1355
1356 def _real_extract(self, url):
1357 list_id = self._match_id(url)
1358 webpage = self._download_webpage(url, list_id)
1359 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1360 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1361 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1362 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1363 if error_code == -400 and list_id == 'watchlater':
1364 self.raise_login_required('You need to login to access your watchlater playlist')
1365 elif error_code == -403:
1366 self.raise_login_required('This is a private playlist. You need to login as its owner')
1367 elif error_code == 11010:
1368 raise ExtractorError('Playlist is no longer available', expected=True)
1369 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1370
1371 query = {
1372 'ps': 20,
1373 'with_current': False,
1374 **traverse_obj(initial_state, {
1375 'type': ('playlist', 'type', {int_or_none}),
1376 'biz_id': ('playlist', 'id', {int_or_none}),
1377 'tid': ('tid', {int_or_none}),
1378 'sort_field': ('sortFiled', {int_or_none}),
1379 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1380 })
1381 }
1382 metadata = {
1383 'id': f'{query["type"]}_{query["biz_id"]}',
1384 **traverse_obj(initial_state, ('mediaListInfo', {
1385 'title': ('title', {str}),
1386 'uploader': ('upper', 'name', {str}),
1387 'uploader_id': ('upper', 'mid', {str_or_none}),
1388 'timestamp': ('ctime', {int_or_none}),
1389 'thumbnail': ('cover', {url_or_none}),
1390 })),
1391 }
1392 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 1393
1394
c34f505b 1395class BilibiliCategoryIE(InfoExtractor):
1396 IE_NAME = 'Bilibili category extractor'
1397 _MAX_RESULTS = 1000000
9e68747f 1398 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 1399 _TESTS = [{
1400 'url': 'https://www.bilibili.com/v/kichiku/mad',
1401 'info_dict': {
1402 'id': 'kichiku: mad',
1403 'title': 'kichiku: mad'
1404 },
1405 'playlist_mincount': 45,
1406 'params': {
1407 'playlistend': 45
1408 }
1409 }]
1410
1411 def _fetch_page(self, api_url, num_pages, query, page_num):
1412 parsed_json = self._download_json(
1413 api_url, query, query={'Search_key': query, 'pn': page_num},
1414 note='Extracting results from page %s of %s' % (page_num, num_pages))
1415
f8580bf0 1416 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 1417 if not video_list:
1418 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1419
1420 for video in video_list:
1421 yield self.url_result(
1422 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1423
1424 def _entries(self, category, subcategory, query):
1425 # map of categories : subcategories : RIDs
1426 rid_map = {
1427 'kichiku': {
1428 'mad': 26,
1429 'manual_vocaloid': 126,
1430 'guide': 22,
1431 'theatre': 216,
1432 'course': 127
1433 },
1434 }
1435
1436 if category not in rid_map:
e88d44c6 1437 raise ExtractorError(
1438 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1439 if subcategory not in rid_map[category]:
e88d44c6 1440 raise ExtractorError(
1441 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1442 rid_value = rid_map[category][subcategory]
1443
1444 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1445 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1446 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1447 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1448 if count is None or not size:
1449 raise ExtractorError('Failed to calculate either page count or size')
1450
1451 num_pages = math.ceil(count / size)
1452
1453 return OnDemandPagedList(functools.partial(
1454 self._fetch_page, api_url, num_pages, query), size)
1455
1456 def _real_extract(self, url):
ad974876 1457 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1458 query = '%s: %s' % (category, subcategory)
1459
1460 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1461
1462
06167fbb 1463class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1464 IE_DESC = 'Bilibili video search'
06167fbb 1465 _MAX_RESULTS = 100000
1466 _SEARCH_KEY = 'bilisearch'
06167fbb 1467
e88d44c6 1468 def _search_results(self, query):
1469 for page_num in itertools.count(1):
1470 videos = self._download_json(
1471 'https://api.bilibili.com/x/web-interface/search/type', query,
1472 note=f'Extracting results from page {page_num}', query={
1473 'Search_key': query,
1474 'keyword': query,
1475 'page': page_num,
1476 'context': '',
e88d44c6 1477 'duration': 0,
1478 'tids_2': '',
1479 '__refresh__': 'true',
1480 'search_type': 'video',
1481 'tids': 0,
1482 'highlight': 1,
2d101954 1483 })['data'].get('result')
1484 if not videos:
1485 break
06167fbb 1486 for video in videos:
e88d44c6 1487 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1488
1489
4bc15a68
RA
1490class BilibiliAudioBaseIE(InfoExtractor):
1491 def _call_api(self, path, sid, query=None):
1492 if not query:
1493 query = {'sid': sid}
1494 return self._download_json(
1495 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1496 sid, query=query)['data']
1497
1498
1499class BilibiliAudioIE(BilibiliAudioBaseIE):
1500 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1501 _TEST = {
1502 'url': 'https://www.bilibili.com/audio/au1003142',
1503 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1504 'info_dict': {
1505 'id': '1003142',
1506 'ext': 'm4a',
1507 'title': '【tsukimi】YELLOW / 神山羊',
1508 'artist': 'tsukimi',
1509 'comment_count': int,
1510 'description': 'YELLOW的mp3版!',
1511 'duration': 183,
1512 'subtitles': {
1513 'origin': [{
1514 'ext': 'lrc',
1515 }],
1516 },
1517 'thumbnail': r're:^https?://.+\.jpg',
1518 'timestamp': 1564836614,
1519 'upload_date': '20190803',
1520 'uploader': 'tsukimi-つきみぐー',
1521 'view_count': int,
1522 },
1523 }
1524
1525 def _real_extract(self, url):
1526 au_id = self._match_id(url)
1527
1528 play_data = self._call_api('url', au_id)
1529 formats = [{
1530 'url': play_data['cdns'][0],
1531 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1532 'vcodec': 'none'
4bc15a68
RA
1533 }]
1534
6d1b3489 1535 for a_format in formats:
1536 a_format.setdefault('http_headers', {}).update({
1537 'Referer': url,
1538 })
1539
4bc15a68
RA
1540 song = self._call_api('song/info', au_id)
1541 title = song['title']
1542 statistic = song.get('statistic') or {}
1543
1544 subtitles = None
1545 lyric = song.get('lyric')
1546 if lyric:
1547 subtitles = {
1548 'origin': [{
1549 'url': lyric,
1550 }]
1551 }
1552
1553 return {
1554 'id': au_id,
1555 'title': title,
1556 'formats': formats,
1557 'artist': song.get('author'),
1558 'comment_count': int_or_none(statistic.get('comment')),
1559 'description': song.get('intro'),
1560 'duration': int_or_none(song.get('duration')),
1561 'subtitles': subtitles,
1562 'thumbnail': song.get('cover'),
1563 'timestamp': int_or_none(song.get('passtime')),
1564 'uploader': song.get('uname'),
1565 'view_count': int_or_none(statistic.get('play')),
1566 }
1567
1568
1569class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1570 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1571 _TEST = {
1572 'url': 'https://www.bilibili.com/audio/am10624',
1573 'info_dict': {
1574 'id': '10624',
1575 'title': '每日新曲推荐(每日11:00更新)',
1576 'description': '每天11:00更新,为你推送最新音乐',
1577 },
1578 'playlist_count': 19,
1579 }
1580
1581 def _real_extract(self, url):
1582 am_id = self._match_id(url)
1583
1584 songs = self._call_api(
1585 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1586
1587 entries = []
1588 for song in songs:
1589 sid = str_or_none(song.get('id'))
1590 if not sid:
1591 continue
1592 entries.append(self.url_result(
1593 'https://www.bilibili.com/audio/au' + sid,
1594 BilibiliAudioIE.ie_key(), sid))
1595
1596 if entries:
1597 album_data = self._call_api('menu/info', am_id) or {}
1598 album_title = album_data.get('title')
1599 if album_title:
1600 for entry in entries:
1601 entry['album'] = album_title
1602 return self.playlist_result(
1603 entries, am_id, album_title, album_data.get('intro'))
1604
1605 return self.playlist_result(entries, am_id)
63dce309
S
1606
1607
1608class BiliBiliPlayerIE(InfoExtractor):
1609 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1610 _TEST = {
1611 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1612 'only_matching': True,
1613 }
1614
1615 def _real_extract(self, url):
1616 video_id = self._match_id(url)
1617 return self.url_result(
1618 'http://www.bilibili.tv/video/av%s/' % video_id,
1619 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1620
1621
1622class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1623 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1624 _NETRC_MACHINE = 'biliintl'
1713c882 1625 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
16f7e6be 1626
c62ecf0d 1627 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1628 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1629 if json.get('code'):
1630 if json['code'] in (10004004, 10004005, 10023006):
1631 self.raise_login_required()
1632 elif json['code'] == 10004001:
1633 self.raise_geo_restricted()
1634 else:
1635 if json.get('message') and str(json['code']) != json['message']:
1636 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1637 else:
1638 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1639 if kwargs.get('fatal'):
1640 raise ExtractorError(errmsg)
1641 else:
1642 self.report_warning(errmsg)
1643 return json.get('data')
16f7e6be 1644
efc947fb 1645 def json2srt(self, json):
1646 data = '\n\n'.join(
1647 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1648 for i, line in enumerate(traverse_obj(json, (
1649 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1650 return data
1651
f5f15c99
LR
1652 def _get_subtitles(self, *, ep_id=None, aid=None):
1653 sub_json = self._call_api(
fbb888a3 1654 '/web/v2/subtitle', ep_id or aid, fatal=False,
1655 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1656 query=filter_dict({
f5f15c99 1657 'platform': 'web',
fbb888a3 1658 's_locale': 'en_US',
f5f15c99
LR
1659 'episode_id': ep_id,
1660 'aid': aid,
fbb888a3 1661 })) or {}
16f7e6be 1662 subtitles = {}
cf6413e8
H
1663 fetched_urls = set()
1664 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1665 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1666 if url in fetched_urls:
1667 continue
1668 fetched_urls.add(url)
1669 sub_ext = determine_ext(url)
1670 sub_lang = sub.get('lang_key') or 'en'
1671
1672 if sub_ext == 'ass':
1673 subtitles.setdefault(sub_lang, []).append({
1674 'ext': 'ass',
1675 'url': url,
1676 })
1677 elif sub_ext == 'json':
1678 sub_data = self._download_json(
1679 url, ep_id or aid, fatal=False,
1680 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1681 errnote='Unable to download subtitles')
1682
1683 if sub_data:
1684 subtitles.setdefault(sub_lang, []).append({
1685 'ext': 'srt',
1686 'data': self.json2srt(sub_data),
1687 })
1688 else:
1689 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1690
16f7e6be
AG
1691 return subtitles
1692
f5f15c99
LR
1693 def _get_formats(self, *, ep_id=None, aid=None):
1694 video_json = self._call_api(
1695 '/web/playurl', ep_id or aid, note='Downloading video formats',
1696 errnote='Unable to download video formats', query=filter_dict({
1697 'platform': 'web',
1698 'ep_id': ep_id,
1699 'aid': aid,
1700 }))
16f7e6be
AG
1701 video_json = video_json['playurl']
1702 formats = []
c62ecf0d 1703 for vid in video_json.get('video') or []:
16f7e6be
AG
1704 video_res = vid.get('video_resource') or {}
1705 video_info = vid.get('stream_info') or {}
1706 if not video_res.get('url'):
1707 continue
1708 formats.append({
1709 'url': video_res['url'],
1710 'ext': 'mp4',
1711 'format_note': video_info.get('desc_words'),
1712 'width': video_res.get('width'),
1713 'height': video_res.get('height'),
1714 'vbr': video_res.get('bandwidth'),
1715 'acodec': 'none',
1716 'vcodec': video_res.get('codecs'),
1717 'filesize': video_res.get('size'),
1718 })
c62ecf0d 1719 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1720 if not aud.get('url'):
1721 continue
1722 formats.append({
1723 'url': aud['url'],
1724 'ext': 'mp4',
1725 'abr': aud.get('bandwidth'),
1726 'acodec': aud.get('codecs'),
1727 'vcodec': 'none',
1728 'filesize': aud.get('size'),
1729 })
1730
16f7e6be
AG
1731 return formats
1732
26fdfc37 1733 def _parse_video_metadata(self, video_data):
16f7e6be 1734 return {
f5f15c99 1735 'title': video_data.get('title_display') or video_data.get('title'),
1713c882 1736 'description': video_data.get('desc'),
f5f15c99 1737 'thumbnail': video_data.get('cover'),
1713c882 1738 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
c62ecf0d 1739 'episode_number': int_or_none(self._search_regex(
f5f15c99 1740 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1741 }
1742
52efa4b3 1743 def _perform_login(self, username, password):
65f6e807 1744 if not Cryptodome.RSA:
f6a765ce 1745 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1746
1747 key_data = self._download_json(
1748 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1749 note='Downloading login key', errnote='Unable to download login key')['data']
1750
65f6e807 1751 public_key = Cryptodome.RSA.importKey(key_data['key'])
1752 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1753 login_post = self._download_json(
1754 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1755 'username': username,
1756 'password': base64.b64encode(password_hash).decode('ascii'),
1757 'keep_me': 'true',
1758 's_locale': 'en_US',
1759 'isTrusted': 'true'
1760 }), note='Logging in', errnote='Unable to log in')
1761 if login_post.get('code'):
1762 if login_post.get('message'):
1763 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1764 else:
1765 raise ExtractorError('Unable to log in')
1766
16f7e6be
AG
1767
1768class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1769 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1770 _TESTS = [{
cfcf60ea 1771 # Bstation page
16f7e6be
AG
1772 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1773 'info_dict': {
1774 'id': '341736',
1775 'ext': 'mp4',
c62ecf0d
M
1776 'title': 'E2 - The First Night',
1777 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1778 'episode_number': 2,
d37422f1
H
1779 'upload_date': '20201009',
1780 'episode': 'Episode 2',
1781 'timestamp': 1602259500,
1782 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1783 'chapters': [{
1784 'start_time': 0,
1785 'end_time': 76.242,
1786 'title': '<Untitled Chapter 1>'
1787 }, {
1788 'start_time': 76.242,
1789 'end_time': 161.161,
1790 'title': 'Intro'
1791 }, {
1792 'start_time': 1325.742,
1793 'end_time': 1403.903,
1794 'title': 'Outro'
1795 }],
c62ecf0d 1796 }
16f7e6be 1797 }, {
cfcf60ea 1798 # Non-Bstation page
c62ecf0d 1799 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1800 'info_dict': {
c62ecf0d 1801 'id': '11005006',
16f7e6be 1802 'ext': 'mp4',
c62ecf0d
M
1803 'title': 'E3 - Who?',
1804 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1805 'episode_number': 3,
d37422f1
H
1806 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1807 'episode': 'Episode 3',
1808 'upload_date': '20211219',
1809 'timestamp': 1639928700,
0ba87dd2
H
1810 'chapters': [{
1811 'start_time': 0,
1812 'end_time': 88.0,
1813 'title': '<Untitled Chapter 1>'
1814 }, {
1815 'start_time': 88.0,
1816 'end_time': 156.0,
1817 'title': 'Intro'
1818 }, {
1819 'start_time': 1173.0,
1820 'end_time': 1259.535,
1821 'title': 'Outro'
1822 }],
c62ecf0d 1823 }
cfcf60ea
M
1824 }, {
1825 # Subtitle with empty content
1826 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1827 'info_dict': {
1828 'id': '10131790',
1829 'ext': 'mp4',
1830 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1831 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1832 'episode_number': 140,
1833 },
1834 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
b093c38c
H
1835 }, {
1836 # episode comment extraction
1837 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1838 'info_dict': {
1839 'id': '340317',
1840 'ext': 'mp4',
1841 'timestamp': 1604057820,
1842 'upload_date': '20201030',
1843 'episode_number': 5,
1844 'title': 'E5 - My Own Steel',
1845 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1846 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1847 'episode': 'Episode 5',
1848 'comment_count': int,
1849 'chapters': [{
1850 'start_time': 0,
1851 'end_time': 61.0,
1852 'title': '<Untitled Chapter 1>'
1853 }, {
1854 'start_time': 61.0,
1855 'end_time': 134.0,
1856 'title': 'Intro'
1857 }, {
1858 'start_time': 1290.0,
1859 'end_time': 1379.0,
1860 'title': 'Outro'
1861 }],
1862 },
1863 'params': {
1864 'getcomments': True
1865 }
1866 }, {
1867 # user generated content comment extraction
1868 'url': 'https://www.bilibili.tv/en/video/2045730385',
1869 'info_dict': {
1870 'id': '2045730385',
1871 'ext': 'mp4',
1872 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1873 'timestamp': 1667891924,
1874 'upload_date': '20221108',
1713c882 1875 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
b093c38c 1876 'comment_count': int,
1713c882 1877 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
b093c38c
H
1878 },
1879 'params': {
1880 'getcomments': True
d37422f1 1881 }
0ba87dd2
H
1882 }, {
1883 # episode id without intro and outro
1884 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1885 'info_dict': {
1886 'id': '11246489',
1887 'ext': 'mp4',
1888 'title': 'E1 - Operation \'Strix\' <Owl>',
1889 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1890 'timestamp': 1649516400,
1891 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1892 'episode': 'Episode 1',
1893 'episode_number': 1,
1894 'upload_date': '20220409',
1895 },
c62ecf0d
M
1896 }, {
1897 'url': 'https://www.biliintl.com/en/play/34613/341736',
1898 'only_matching': True,
f5f15c99
LR
1899 }, {
1900 # User-generated content (as opposed to a series licensed from a studio)
1901 'url': 'https://bilibili.tv/en/video/2019955076',
1902 'only_matching': True,
1903 }, {
1904 # No language in URL
1905 'url': 'https://www.bilibili.tv/video/2019955076',
1906 'only_matching': True,
0831d95c 1907 }, {
1908 # Uppercase language in URL
1909 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1910 'only_matching': True,
16f7e6be
AG
1911 }]
1912
26fdfc37 1913 def _make_url(video_id, series_id=None):
1914 if series_id:
1915 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1916 return f'https://www.bilibili.tv/en/video/{video_id}'
1917
1918 def _extract_video_metadata(self, url, video_id, season_id):
1919 url, smuggled_data = unsmuggle_url(url, {})
1920 if smuggled_data.get('title'):
1921 return smuggled_data
1922
c62ecf0d
M
1923 webpage = self._download_webpage(url, video_id)
1924 # Bstation layout
8072ef2b 1925 initial_data = (
1926 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1927 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1928 video_data = traverse_obj(
d37422f1 1929 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1930
f5f15c99 1931 if season_id and not video_data:
c62ecf0d
M
1932 # Non-Bstation layout, read through episode list
1933 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1934 video_data = traverse_obj(season_json, (
1935 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1936 ), expected_type=dict, get_all=False)
1937
d37422f1
H
1938 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1939 return merge_dicts(
1713c882
S
1940 self._parse_video_metadata(video_data), {
1941 'title': get_element_by_class(
1942 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
1943 'description': get_element_by_class(
1944 'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
1945 }, self._search_json_ld(webpage, video_id, default={}))
26fdfc37 1946
b093c38c
H
1947 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1948 comment_api_raw_data = self._download_json(
1949 'https://api.bilibili.tv/reply/web/detail', display_id,
1950 note=f'Downloading reply comment of {root_id} - {next_id}',
1951 query={
1952 'platform': 'web',
1953 'ps': 20, # comment's reply per page (default: 3)
1954 'root': root_id,
1955 'next': next_id,
1956 })
1957
1958 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1959 yield {
1960 'author': traverse_obj(replies, ('member', 'name')),
1961 'author_id': traverse_obj(replies, ('member', 'mid')),
1962 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1963 'text': traverse_obj(replies, ('content', 'message')),
1964 'id': replies.get('rpid'),
1965 'like_count': int_or_none(replies.get('like_count')),
1966 'parent': replies.get('parent'),
1967 'timestamp': unified_timestamp(replies.get('ctime_text'))
1968 }
1969
1970 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1971 yield from self._get_comments_reply(
1972 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1973
1974 def _get_comments(self, video_id, ep_id):
1975 for i in itertools.count(0):
1976 comment_api_raw_data = self._download_json(
1977 'https://api.bilibili.tv/reply/web/root', video_id,
1978 note=f'Downloading comment page {i + 1}',
1979 query={
1980 'platform': 'web',
1981 'pn': i, # page number
1982 'ps': 20, # comment per page (default: 20)
1983 'oid': video_id,
1984 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
1985 'sort_type': 1, # 1: best, 2: recent
1986 })
1987
1988 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1989 yield {
1990 'author': traverse_obj(replies, ('member', 'name')),
1991 'author_id': traverse_obj(replies, ('member', 'mid')),
1992 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1993 'text': traverse_obj(replies, ('content', 'message')),
1994 'id': replies.get('rpid'),
1995 'like_count': int_or_none(replies.get('like_count')),
1996 'timestamp': unified_timestamp(replies.get('ctime_text')),
1997 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1998 }
1999 if replies.get('count'):
2000 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2001
2002 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2003 break
2004
26fdfc37 2005 def _real_extract(self, url):
2006 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2007 video_id = ep_id or aid
0ba87dd2
H
2008 chapters = None
2009
2010 if ep_id:
2011 intro_ending_json = self._call_api(
2012 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2013 video_id, fatal=False) or {}
2014 if intro_ending_json.get('skip'):
2015 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2016 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2017 chapters = [{
2018 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2019 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2020 'title': 'Intro'
2021 }, {
2022 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2023 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2024 'title': 'Outro'
2025 }]
26fdfc37 2026
2027 return {
2028 'id': video_id,
2029 **self._extract_video_metadata(url, video_id, season_id),
2030 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2031 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c 2032 'chapters': chapters,
1713c882
S
2033 '__post_extractor': self.extract_comments(video_id, ep_id),
2034 'http_headers': self._HEADERS,
26fdfc37 2035 }
16f7e6be
AG
2036
2037
2038class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 2039 IE_NAME = 'biliIntl:series'
76c3cecc 2040 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
2041 _TESTS = [{
2042 'url': 'https://www.bilibili.tv/en/play/34613',
2043 'playlist_mincount': 15,
2044 'info_dict': {
2045 'id': '34613',
76c3cecc
H
2046 'title': 'TONIKAWA: Over the Moon For You',
2047 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2048 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
2049 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2050 'view_count': int,
16f7e6be
AG
2051 },
2052 'params': {
2053 'skip_download': True,
16f7e6be 2054 },
76c3cecc
H
2055 }, {
2056 'url': 'https://www.bilibili.tv/en/media/1048837',
2057 'info_dict': {
2058 'id': '1048837',
2059 'title': 'SPY×FAMILY',
2060 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2061 'categories': ['Adventure', 'Action', 'Comedy'],
2062 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2063 'view_count': int,
2064 },
2065 'playlist_mincount': 25,
16f7e6be
AG
2066 }, {
2067 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 2068 'only_matching': True,
0831d95c 2069 }, {
2070 'url': 'https://www.biliintl.com/EN/play/34613',
2071 'only_matching': True,
16f7e6be
AG
2072 }]
2073
c62ecf0d
M
2074 def _entries(self, series_id):
2075 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 2076 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2077 episode_id = str(episode['episode_id'])
2078 yield self.url_result(smuggle_url(
2079 BiliIntlIE._make_url(episode_id, series_id),
2080 self._parse_video_metadata(episode)
2081 ), BiliIntlIE, episode_id)
16f7e6be
AG
2082
2083 def _real_extract(self, url):
c62ecf0d
M
2084 series_id = self._match_id(url)
2085 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2086 return self.playlist_result(
2087 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2088 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2089 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
2090
2091
2092class BiliLiveIE(InfoExtractor):
9e68747f 2093 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
2094
2095 _TESTS = [{
2096 'url': 'https://live.bilibili.com/196',
2097 'info_dict': {
2098 'id': '33989',
2099 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2100 'ext': 'flv',
2101 'title': "太空狼人杀联动,不被爆杀就算赢",
2102 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2103 'timestamp': 1650802769,
2104 },
2105 'skip': 'not live'
2106 }, {
2107 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2108 'only_matching': True
1c226ccd 2109 }, {
2110 'url': 'https://live.bilibili.com/blanc/196',
2111 'only_matching': True
b4f53662
H
2112 }]
2113
2114 _FORMATS = {
2115 80: {'format_id': 'low', 'format_note': '流畅'},
2116 150: {'format_id': 'high_res', 'format_note': '高清'},
2117 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2118 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2119 10000: {'format_id': 'source', 'format_note': '原画'},
2120 20000: {'format_id': '4K', 'format_note': '4K'},
2121 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2122 }
2123
2124 _quality = staticmethod(qualities(list(_FORMATS)))
2125
2126 def _call_api(self, path, room_id, query):
2127 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2128 if api_result.get('code') != 0:
2129 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2130 return api_result.get('data') or {}
2131
2132 def _parse_formats(self, qn, fmt):
2133 for codec in fmt.get('codec') or []:
2134 if codec.get('current_qn') != qn:
2135 continue
2136 for url_info in codec['url_info']:
2137 yield {
2138 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2139 'ext': fmt.get('format_name'),
2140 'vcodec': codec.get('codec_name'),
2141 'quality': self._quality(qn),
2142 **self._FORMATS[qn],
2143 }
2144
2145 def _real_extract(self, url):
2146 room_id = self._match_id(url)
2147 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2148 if room_data.get('live_status') == 0:
2149 raise ExtractorError('Streamer is not live', expected=True)
2150
2151 formats = []
2152 for qn in self._FORMATS.keys():
2153 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2154 'room_id': room_id,
2155 'qn': qn,
2156 'codec': '0,1',
2157 'format': '0,2',
2158 'mask': '0',
2159 'no_playurl': '0',
2160 'platform': 'web',
2161 'protocol': '0,1',
2162 })
2163 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2164 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
2165
2166 return {
2167 'id': room_id,
2168 'title': room_data.get('title'),
2169 'description': room_data.get('description'),
2170 'thumbnail': room_data.get('user_cover'),
2171 'timestamp': stream_data.get('live_time'),
2172 'formats': formats,
ca2f6e14 2173 'is_live': True,
b4f53662
H
2174 'http_headers': {
2175 'Referer': url,
2176 },
2177 }