]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/BilibiliSpaceVideo] Better error message (#9839)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
9f09bdcf 5import json
c34f505b 6import math
5336bf57 7import re
6f10cdcf 8import time
ad974876 9import urllib.parse
ffa017cf 10import uuid
28746fbd 11
06167fbb 12from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 13from ..dependencies import Cryptodome
3d2623a8 14from ..networking.exceptions import HTTPError
28746fbd 15from ..utils import (
bd8f48c7 16 ExtractorError,
ad974876 17 GeoRestrictedError,
2b9d0216
L
18 InAdvancePagedList,
19 OnDemandPagedList,
9e68747f 20 bool_or_none,
9f09bdcf 21 clean_html,
cf6413e8 22 determine_ext,
f5f15c99 23 filter_dict,
6461f2b7 24 float_or_none,
ad974876 25 format_field,
9f09bdcf 26 get_element_by_class,
2b9d0216 27 int_or_none,
bdd0b75e 28 join_nonempty,
ad974876 29 make_archive_id,
d37422f1 30 merge_dicts,
f8580bf0 31 mimetype2ext,
2b9d0216 32 parse_count,
ad974876 33 parse_qs,
b4f53662 34 qualities,
26fdfc37 35 smuggle_url,
efc947fb 36 srt_subtitles_timecode,
4bc15a68 37 str_or_none,
2b9d0216 38 traverse_obj,
6f10cdcf 39 try_call,
b093c38c 40 unified_timestamp,
26fdfc37 41 unsmuggle_url,
c62ecf0d 42 url_or_none,
ad974876 43 urlencode_postdata,
9e68747f 44 variadic,
28746fbd
PH
45)
46
47
ad974876 48class BilibiliBaseIE(InfoExtractor):
5336bf57 49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50
ad974876
L
51 def extract_formats(self, play_info):
52 format_names = {
53 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
54 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
55 }
56
b84fda73 57 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
58 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
59 if flac_audio:
60 audios.append(flac_audio)
61 formats = [{
62 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 64 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
65 'vcodec': 'none',
66 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 67 'filesize': int_or_none(audio.get('size')),
68 'format_id': str_or_none(audio.get('id')),
ad974876
L
69 } for audio in audios]
70
71 formats.extend({
72 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video.get('width')),
76 'height': int_or_none(video.get('height')),
77 'vcodec': video.get('codecs'),
78 'acodec': 'none' if audios else None,
b84fda73 79 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
80 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
81 'filesize': int_or_none(video.get('size')),
82 'quality': int_or_none(video.get('id')),
5336bf57 83 'format_id': traverse_obj(
84 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
85 ('id', {str_or_none}), get_all=False),
ad974876
L
86 'format': format_names.get(video.get('id')),
87 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
88
89 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
90 if missing_formats:
91 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 92 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 93
ad974876
L
94 return formats
95
9f09bdcf 96 def _download_playinfo(self, video_id, cid):
97 return self._download_json(
98 'https://api.bilibili.com/x/player/playurl', video_id,
99 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
100 note=f'Downloading video formats for cid {cid}')['data']
101
ad974876
L
102 def json2srt(self, json_data):
103 srt_data = ''
104 for idx, line in enumerate(json_data.get('body') or []):
105 srt_data += (f'{idx + 1}\n'
106 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f'{line["content"]}\n\n')
108 return srt_data
109
9f09bdcf 110 def _get_subtitles(self, video_id, cid, aid=None):
ad974876
L
111 subtitles = {
112 'danmaku': [{
113 'ext': 'xml',
114 'url': f'https://comment.bilibili.com/{cid}.xml',
115 }]
116 }
117
9f09bdcf 118 subtitle_info = traverse_obj(self._download_json(
119 'https://api.bilibili.com/x/player/v2', video_id,
120 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
121 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
123 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
124 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
126 for s in subs_list:
ad974876
L
127 subtitles.setdefault(s['lan'], []).append({
128 'ext': 'srt',
129 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
130 })
131 return subtitles
132
c90c5b9b 133 def _get_chapters(self, aid, cid):
134 chapters = aid and cid and self._download_json(
135 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
136 note='Extracting chapters', fatal=False)
137 return traverse_obj(chapters, ('data', 'view_points', ..., {
138 'title': 'content',
139 'start_time': 'from',
140 'end_time': 'to',
141 })) or None
142
ad974876
L
143 def _get_comments(self, aid):
144 for idx in itertools.count(1):
145 replies = traverse_obj(
146 self._download_json(
147 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid, note=f'Extracting comments from page {idx}', fatal=False),
149 ('data', 'replies'))
150 if not replies:
151 return
152 for children in map(self._get_all_children, replies):
153 yield from children
154
155 def _get_all_children(self, reply):
156 yield {
157 'author': traverse_obj(reply, ('member', 'uname')),
158 'author_id': traverse_obj(reply, ('member', 'mid')),
159 'id': reply.get('rpid'),
160 'text': traverse_obj(reply, ('content', 'message')),
161 'timestamp': reply.get('ctime'),
162 'parent': reply.get('parent') or 'root',
163 }
164 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
165 yield from children
166
bdd0b75e
GS
167 def _get_episodes_from_season(self, ss_id, url):
168 season_info = self._download_json(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id,
170 note='Downloading season info', query={'season_id': ss_id},
171 headers={'Referer': url, **self.geo_verification_headers()})
172
173 for entry in traverse_obj(season_info, (
174 'result', 'main_section', 'episodes',
175 lambda _, v: url_or_none(v['share_url']) and v['id'])):
9f09bdcf 176 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
177
178 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
179 cid_edges = cid_edges or {}
180 division_data = self._download_json(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
182 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
183 note=f'Extracting divisions from edge {edge_id}')
184 edges.setdefault(edge_id, {}).update(
185 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
186 'title': ('title', {str}),
187 'cid': ('cid', {int_or_none}),
188 }), get_all=False))
189
190 edges[edge_id].update(traverse_obj(division_data, ('data', {
191 'title': ('title', {str}),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}),
194 'cid': ('cid', {int_or_none}),
195 'text': ('option', {str}),
196 }),
197 })))
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
200 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
201 if choice['edge_id'] not in edges:
202 edges[choice['edge_id']] = {'cid': choice['cid']}
203 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
204 return cid_edges
205
206 def _get_interactive_entries(self, video_id, cid, metainfo):
207 graph_version = traverse_obj(
208 self._download_json(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id,
210 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
211 ('data', 'interaction', 'graph_version', {int_or_none}))
212 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
213 for cid, edges in cid_edges.items():
214 play_info = self._download_playinfo(video_id, cid)
215 yield {
216 **metainfo,
217 'id': f'{video_id}_{cid}',
218 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
219 'formats': self.extract_formats(play_info),
220 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info.get('timelength'), scale=1000),
222 'subtitles': self.extract_subtitles(video_id, cid),
223 }
bdd0b75e 224
ad974876
L
225
226class BiliBiliIE(BilibiliBaseIE):
9e68747f 227 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 228
bd8f48c7 229 _TESTS = [{
ad974876
L
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
231 'info_dict': {
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
234 'ext': 'mp4',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
237 'uploader': '阿滴英文',
238 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
239 'duration': 554.117,
240 'tags': list,
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
244 'like_count': int,
245 'view_count': int,
246 },
247 }, {
9f09bdcf 248 'note': 'old av URL version',
06167fbb 249 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 250 'info_dict': {
ad974876 251 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 252 'ext': 'mp4',
f8580bf0 253 'uploader': '菊子桑',
ad974876
L
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
256 'title': '【金坷垃】金泡沫',
257 'duration': 308.36,
f8580bf0 258 'upload_date': '20140420',
ad974876 259 'timestamp': 1397983878,
6461f2b7 260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
261 'like_count': int,
262 'comment_count': int,
263 'view_count': int,
264 'tags': list,
265 },
c90c5b9b 266 'params': {'skip_download': True},
bd8f48c7 267 }, {
ad974876
L
268 'note': 'Anthology',
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
270 'info_dict': {
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的'
273 },
274 'playlist_count': 18,
275 'playlist': [{
276 'info_dict': {
277 'id': 'BV1bK411W797_p1',
278 'ext': 'mp4',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 280 'tags': 'count:10',
ad974876
L
281 'timestamp': 1589601697,
282 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
285 'like_count': int,
286 'comment_count': int,
287 'upload_date': '20200516',
288 'view_count': int,
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
290 'duration': 90.314,
291 }
292 }]
06167fbb 293 }, {
ad974876
L
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
296 'info_dict': {
297 'id': 'BV1bK411W797_p1',
298 'ext': 'mp4',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 300 'tags': 'count:10',
ad974876
L
301 'timestamp': 1589601697,
302 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
305 'like_count': int,
306 'comment_count': int,
307 'upload_date': '20200516',
308 'view_count': int,
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
310 'duration': 90.314,
311 }
bd8f48c7 312 }, {
ad974876
L
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 315 'info_dict': {
ad974876 316 'id': 'BV12N4y1M7rh',
bd8f48c7 317 'ext': 'mp4',
c90c5b9b 318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
319 'tags': list,
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
321 'duration': 313.557,
322 'upload_date': '20220709',
9e68747f 323 'uploader': '小夫太渴',
ad974876
L
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
327 'view_count': int,
328 'like_count': int,
329 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2'
bd8f48c7 331 },
ad974876 332 'params': {'listsubtitles': True},
ca270371 333 }, {
ad974876 334 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 335 'info_dict': {
ad974876 336 'id': 'BV13x41117TL',
f8580bf0 337 'ext': 'mp4',
ca270371 338 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 339 'upload_date': '20170301',
c90c5b9b 340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 341 'timestamp': 1488353834,
f8580bf0 342 'uploader_id': '65880958',
343 'uploader': '阿滴英文',
ad974876 344 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 345 'duration': 554.117,
ad974876
L
346 'tags': list,
347 'comment_count': int,
348 'view_count': int,
349 'like_count': int,
89fabf11
JN
350 },
351 'params': {
352 'skip_download': True,
353 },
c90c5b9b 354 }, {
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
357 'info_dict': {
358 'id': 'BV1vL411G7N7',
359 'ext': 'mp4',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
364 'tags': list,
365 'uploader': '爱喝咖啡的当麻',
366 'duration': 669.482,
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
370 'view_count': int,
371 'like_count': int,
372 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
373 },
374 'params': {'skip_download': True},
ab29e470 375 }, {
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
378 'info_dict': {
379 'id': 'BV1wP4y1P72h',
380 'ext': 'mp4',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
386 'duration': 246.719,
387 'uploader_id': '528182630',
388 'view_count': int,
389 'like_count': int,
390 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
391 },
392 'params': {'skip_download': True},
393 }, {
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
396 'info_dict': {
397 'id': 'BV1ay4y1d77f',
398 'ext': 'mp4',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
403 'uploader': '果蝇轰',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
406 'view_count': int,
407 'like_count': int,
408 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
409 },
410 'params': {'skip_download': True},
9f09bdcf 411 }, {
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
414 'info_dict': {
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
420 'tags': list,
421 'uploader': '钉宫妮妮Ninico',
422 'duration': 1503,
423 'uploader_id': '8881297',
424 'comment_count': int,
425 'view_count': int,
426 'like_count': int,
427 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
428 },
429 'playlist_count': 33,
430 'playlist': [{
431 'info_dict': {
432 'id': 'BV1af4y1H7ga_400950101',
433 'ext': 'mp4',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
438 'tags': list,
439 'uploader': '钉宫妮妮Ninico',
440 'duration': 11.605,
441 'uploader_id': '8881297',
442 'comment_count': int,
443 'view_count': int,
444 'like_count': int,
445 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
446 },
447 }],
448 }, {
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
451 'info_dict': {
452 'id': '288525',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
454 'ext': 'mp4',
455 'series': '我和我的祖国',
456 'series_id': '4780',
457 'season': '幕后纪实',
458 'season_id': '28609',
459 'season_number': 1,
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
467 },
468 }, {
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
470 'info_dict': {
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
473 'ext': 'mp4',
474 },
475 'skip': 'supporter-only video',
476 }, {
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
478 'info_dict': {
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
481 'ext': 'mp4',
482 },
483 'skip': 'login required',
484 }, {
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
486 'info_dict': {
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
489 'ext': 'mp4',
490 },
491 'skip': 'geo-restricted',
bd8f48c7 492 }]
28746fbd 493
520e7533 494 def _real_extract(self, url):
ad974876 495 video_id = self._match_id(url)
9f09bdcf 496 webpage, urlh = self._download_webpage_handle(url, video_id)
497 if not self._match_valid_url(urlh.url):
498 return self.url_result(urlh.url)
499
c90c5b9b 500 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 501
ab29e470 502 is_festival = 'videoData' not in initial_state
503 if is_festival:
504 video_data = initial_state['videoInfo']
505 else:
9f09bdcf 506 play_info_obj = self._search_json(
507 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
508 if not play_info_obj:
509 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
510 self.raise_login_required()
511 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
512 raise ExtractorError(
513 'This video may be deleted or geo-restricted. '
514 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
515 play_info = traverse_obj(play_info_obj, ('data', {dict}))
516 if not play_info:
517 if traverse_obj(play_info_obj, 'code') == 87007:
518 toast = get_element_by_class('tips-toast', webpage) or ''
519 msg = clean_html(
520 f'{get_element_by_class("belongs-to", toast) or ""},'
521 + (get_element_by_class('level', toast) or ''))
522 raise ExtractorError(
523 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
524 raise ExtractorError('Failed to extract play info')
ab29e470 525 video_data = initial_state['videoData']
526
ad974876 527 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 528
adc74b3c 529 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 530 page_list_json = not is_festival and traverse_obj(
ad974876
L
531 self._download_json(
532 'https://api.bilibili.com/x/player/pagelist', video_id,
533 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
534 note='Extracting videos in anthology'),
535 'data', expected_type=list) or []
536 is_anthology = len(page_list_json) > 1
537
538 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
539 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
540 return self.playlist_from_matches(
541 page_list_json, video_id, title, ie=BiliBiliIE,
542 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 543
ad974876 544 if is_anthology:
f74371a9 545 part_id = part_id or 1
546 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 547
ad974876
L
548 aid = video_data.get('aid')
549 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 550
c90c5b9b 551 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
552
ab29e470 553 festival_info = {}
554 if is_festival:
9f09bdcf 555 play_info = self._download_playinfo(video_id, cid)
ab29e470 556
557 festival_info = traverse_obj(initial_state, {
558 'uploader': ('videoInfo', 'upName'),
559 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
560 'like_count': ('videoStatus', 'like', {int_or_none}),
561 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
562 }, get_all=False)
563
9f09bdcf 564 metainfo = {
ab29e470 565 **traverse_obj(initial_state, {
566 'uploader': ('upData', 'name'),
567 'uploader_id': ('upData', 'mid', {str_or_none}),
568 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
569 'tags': ('tags', ..., 'tag_name'),
570 'thumbnail': ('videoData', 'pic', {url_or_none}),
571 }),
572 **festival_info,
573 **traverse_obj(video_data, {
574 'description': 'desc',
575 'timestamp': ('pubdate', {int_or_none}),
576 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
577 'comment_count': ('stat', 'reply', {int_or_none}),
578 }, get_all=False),
ad974876 579 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
ad974876 580 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 581 'title': title,
c90c5b9b 582 'http_headers': {'Referer': url},
06167fbb 583 }
277d6ff5 584
9f09bdcf 585 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
586 if is_interactive:
587 return self.playlist_result(
588 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
589 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
590 '__post_extractor': self.extract_comments(aid),
591 })
592 else:
593 return {
594 **metainfo,
595 'duration': float_or_none(play_info.get('timelength'), scale=1000),
596 'chapters': self._get_chapters(aid, cid),
597 'subtitles': self.extract_subtitles(video_id, cid),
598 'formats': self.extract_formats(play_info),
599 '__post_extractor': self.extract_comments(aid),
600 }
601
06167fbb 602
ad974876 603class BiliBiliBangumiIE(BilibiliBaseIE):
9f09bdcf 604 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
e88d44c6 605
ad974876 606 _TESTS = [{
9f09bdcf 607 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
608 'info_dict': {
609 'id': '21495',
610 'ext': 'mp4',
611 'series': '悠久之翼',
612 'series_id': '774',
613 'season': '第二季',
614 'season_id': '1182',
615 'season_number': 2,
616 'episode': 'forever/ef',
617 'episode_id': '21495',
618 'episode_number': 12,
619 'title': '12 forever/ef',
620 'duration': 1420.791,
621 'timestamp': 1320412200,
622 'upload_date': '20111104',
623 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
624 },
625 }, {
bdd0b75e 626 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 627 'info_dict': {
bdd0b75e 628 'id': '267851',
ad974876 629 'ext': 'mp4',
bdd0b75e
GS
630 'series': '鬼灭之刃',
631 'series_id': '4358',
9f09bdcf 632 'season': '立志篇',
bdd0b75e 633 'season_id': '26801',
ad974876 634 'season_number': 1,
bdd0b75e
GS
635 'episode': '残酷',
636 'episode_id': '267851',
637 'episode_number': 1,
638 'title': '1 残酷',
639 'duration': 1425.256,
640 'timestamp': 1554566400,
641 'upload_date': '20190406',
642 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 643 },
9f09bdcf 644 'skip': 'Geo-restricted',
645 }, {
646 'note': 'a making-of which falls outside main section',
647 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
648 'info_dict': {
649 'id': '345120',
650 'ext': 'mp4',
651 'series': '鬼灭之刃',
652 'series_id': '4358',
653 'season': '立志篇',
654 'season_id': '26801',
655 'season_number': 1,
656 'episode': '炭治郎篇',
657 'episode_id': '345120',
658 'episode_number': 27,
659 'title': '#1 炭治郎篇',
660 'duration': 1922.129,
661 'timestamp': 1602853860,
662 'upload_date': '20201016',
663 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
664 },
ad974876 665 }]
06167fbb 666
ad974876 667 def _real_extract(self, url):
9f09bdcf 668 episode_id = self._match_id(url)
669 webpage = self._download_webpage(url, episode_id)
e88d44c6 670
ad974876
L
671 if '您所在的地区无法观看本片' in webpage:
672 raise GeoRestrictedError('This video is restricted')
bdd0b75e 673 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 674 self.raise_login_required('This video is for premium members only')
6461f2b7 675
bdd0b75e
GS
676 headers = {'Referer': url, **self.geo_verification_headers()}
677 play_info = self._download_json(
9f09bdcf 678 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
bdd0b75e
GS
679 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
680 headers=headers)
681 premium_only = play_info.get('code') == -10403
682 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
683
ad974876 684 formats = self.extract_formats(play_info)
bdd0b75e 685 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 686 self.raise_login_required('This video is for premium members only')
bd8f48c7 687
bdd0b75e 688 bangumi_info = self._download_json(
9f09bdcf 689 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
bdd0b75e
GS
690 query={'ep_id': episode_id}, headers=headers)['result']
691
692 episode_number, episode_info = next((
693 (idx, ep) for idx, ep in enumerate(traverse_obj(
9f09bdcf 694 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
bdd0b75e 695 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 696
bdd0b75e 697 season_id = bangumi_info.get('season_id')
9f09bdcf 698 season_number, season_title = season_id and next((
699 (idx + 1, e.get('season_title')) for idx, e in enumerate(
bdd0b75e 700 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 701 if e.get('season_id') == season_id
9f09bdcf 702 ), (None, None))
06167fbb 703
bdd0b75e
GS
704 aid = episode_info.get('aid')
705
e88d44c6 706 return {
9f09bdcf 707 'id': episode_id,
ad974876 708 'formats': formats,
bdd0b75e
GS
709 **traverse_obj(bangumi_info, {
710 'series': ('series', 'series_title', {str}),
711 'series_id': ('series', 'series_id', {str_or_none}),
712 'thumbnail': ('square_cover', {url_or_none}),
713 }),
9f09bdcf 714 **traverse_obj(episode_info, {
715 'episode': ('long_title', {str}),
716 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
717 'timestamp': ('pub_time', {int_or_none}),
718 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
719 }),
bdd0b75e 720 'episode_id': episode_id,
9f09bdcf 721 'season': str_or_none(season_title),
bdd0b75e 722 'season_id': str_or_none(season_id),
c90c5b9b 723 'season_number': season_number,
c90c5b9b 724 'duration': float_or_none(play_info.get('timelength'), scale=1000),
9f09bdcf 725 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
bdd0b75e
GS
726 '__post_extractor': self.extract_comments(aid),
727 'http_headers': headers,
e88d44c6 728 }
bd8f48c7 729
bd8f48c7 730
bdd0b75e 731class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 732 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 733 _TESTS = [{
ad974876 734 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 735 'info_dict': {
ad974876 736 'id': '24097891',
9f09bdcf 737 'title': 'CAROLE & TUESDAY',
738 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
bd8f48c7 739 },
ad974876 740 'playlist_mincount': 25,
9f09bdcf 741 }, {
742 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
743 'info_dict': {
744 'id': '1565',
745 'title': '攻壳机动队 S.A.C. 2nd GIG',
746 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
747 },
748 'playlist_count': 26,
749 'playlist': [{
750 'info_dict': {
751 'id': '68540',
752 'ext': 'mp4',
753 'series': '攻壳机动队',
754 'series_id': '1077',
755 'season': '第二季',
756 'season_id': '1565',
757 'season_number': 2,
758 'episode': '再启动 REEMBODY',
759 'episode_id': '68540',
760 'episode_number': 1,
761 'title': '1 再启动 REEMBODY',
762 'duration': 1525.777,
763 'timestamp': 1425074413,
764 'upload_date': '20150227',
765 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
766 },
767 }],
bd8f48c7
YCH
768 }]
769
bd8f48c7 770 def _real_extract(self, url):
ad974876
L
771 media_id = self._match_id(url)
772 webpage = self._download_webpage(url, media_id)
bdd0b75e 773
9f09bdcf 774 initial_state = self._search_json(
775 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
776 ss_id = initial_state['mediaInfo']['season_id']
777
778 return self.playlist_result(
779 self._get_episodes_from_season(ss_id, url), media_id,
780 **traverse_obj(initial_state, ('mediaInfo', {
781 'title': ('title', {str}),
782 'description': ('evaluate', {str}),
783 })))
bdd0b75e 784
bd8f48c7 785
bdd0b75e 786class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 787 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
788 _TESTS = [{
789 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
790 'info_dict': {
9f09bdcf 791 'id': '26801',
792 'title': '鬼灭之刃',
793 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
bdd0b75e
GS
794 },
795 'playlist_mincount': 26
9f09bdcf 796 }, {
797 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
798 'info_dict': {
799 'id': '2251',
800 'title': '玲音',
801 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
802 },
803 'playlist_count': 13,
804 'playlist': [{
805 'info_dict': {
806 'id': '50188',
807 'ext': 'mp4',
808 'series': '玲音',
809 'series_id': '1526',
810 'season': 'TV',
811 'season_id': '2251',
812 'season_number': 1,
813 'episode': 'WEIRD',
814 'episode_id': '50188',
815 'episode_number': 1,
816 'title': '1 WEIRD',
817 'duration': 1436.992,
818 'timestamp': 1343185080,
819 'upload_date': '20120725',
820 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
821 },
822 }],
bdd0b75e
GS
823 }]
824
825 def _real_extract(self, url):
826 ss_id = self._match_id(url)
9f09bdcf 827 webpage = self._download_webpage(url, ss_id)
828 metainfo = traverse_obj(
829 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
830 ('itemListElement', ..., {
831 'title': ('name', {str}),
832 'description': ('description', {str}),
833 }), get_all=False)
834
835 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
836
837
838class BilibiliCheeseBaseIE(BilibiliBaseIE):
839 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
840
841 def _extract_episode(self, season_info, ep_id):
842 episode_info = traverse_obj(season_info, (
843 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
844 aid, cid = episode_info['aid'], episode_info['cid']
845
846 if traverse_obj(episode_info, 'ep_status') == -1:
847 raise ExtractorError('This course episode is not yet available.', expected=True)
848 if not traverse_obj(episode_info, 'playable'):
849 self.raise_login_required('You need to purchase the course to download this episode')
850
851 play_info = self._download_json(
852 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
853 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
854 headers=self._HEADERS, note='Downloading playinfo')['data']
855
856 return {
857 'id': str_or_none(ep_id),
858 'episode_id': str_or_none(ep_id),
859 'formats': self.extract_formats(play_info),
860 'extractor_key': BilibiliCheeseIE.ie_key(),
861 'extractor': BilibiliCheeseIE.IE_NAME,
862 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
863 **traverse_obj(episode_info, {
864 'episode': ('title', {str}),
865 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
866 'alt_title': ('subtitle', {str}),
867 'duration': ('duration', {int_or_none}),
868 'episode_number': ('index', {int_or_none}),
869 'thumbnail': ('cover', {url_or_none}),
870 'timestamp': ('release_date', {int_or_none}),
871 'view_count': ('play', {int_or_none}),
872 }),
873 **traverse_obj(season_info, {
874 'uploader': ('up_info', 'uname', {str}),
875 'uploader_id': ('up_info', 'mid', {str_or_none}),
876 }),
877 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
878 '__post_extractor': self.extract_comments(aid),
879 'http_headers': self._HEADERS,
880 }
881
882 def _download_season_info(self, query_key, video_id):
883 return self._download_json(
884 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
885 headers=self._HEADERS, note='Downloading season info')['data']
bd8f48c7 886
9f09bdcf 887
888class BilibiliCheeseIE(BilibiliCheeseBaseIE):
889 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
890 _TESTS = [{
891 'url': 'https://www.bilibili.com/cheese/play/ep229832',
892 'info_dict': {
893 'id': '229832',
894 'ext': 'mp4',
895 'title': '1 - 课程先导片',
896 'alt_title': '视频课 · 3分41秒',
897 'uploader': '马督工',
898 'uploader_id': '316568752',
899 'episode': '课程先导片',
900 'episode_id': '229832',
901 'episode_number': 1,
902 'duration': 221,
903 'timestamp': 1695549606,
904 'upload_date': '20230924',
905 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
906 'view_count': int,
907 }
908 }]
909
910 def _real_extract(self, url):
911 ep_id = self._match_id(url)
912 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
913
914
915class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
916 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
917 _TESTS = [{
918 'url': 'https://www.bilibili.com/cheese/play/ss5918',
919 'info_dict': {
920 'id': '5918',
921 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
922 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
923 },
924 'playlist': [{
925 'info_dict': {
926 'id': '229832',
927 'ext': 'mp4',
928 'title': '1 - 课程先导片',
929 'alt_title': '视频课 · 3分41秒',
930 'uploader': '马督工',
931 'uploader_id': '316568752',
932 'episode': '课程先导片',
933 'episode_id': '229832',
934 'episode_number': 1,
935 'duration': 221,
936 'timestamp': 1695549606,
937 'upload_date': '20230924',
938 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
939 'view_count': int,
940 }
941 }],
942 'params': {'playlist_items': '1'},
943 }, {
944 'url': 'https://www.bilibili.com/cheese/play/ss5918',
945 'info_dict': {
946 'id': '5918',
947 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
948 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
949 },
950 'playlist_mincount': 5,
951 'skip': 'paid video in list',
952 }]
953
954 def _get_cheese_entries(self, season_info):
955 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
956 yield self._extract_episode(season_info, ep_id)
957
958 def _real_extract(self, url):
959 season_id = self._match_id(url)
960 season_info = self._download_season_info('season_id', season_id)
961
962 return self.playlist_result(
963 self._get_cheese_entries(season_info), season_id,
964 **traverse_obj(season_info, {
965 'title': ('title', {str}),
966 'description': ('subtitle', {str}),
967 }))
4bc15a68
RA
968
969
2b9d0216
L
970class BilibiliSpaceBaseIE(InfoExtractor):
971 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 972 first_page = fetch_page(0)
2b9d0216
L
973 metadata = get_metadata(first_page)
974
975 paged_list = InAdvancePagedList(
12f153a8 976 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
977 metadata['page_count'], metadata['page_size'])
978
979 return metadata, paged_list
980
981
982class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
983 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 984 _TESTS = [{
985 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
986 'info_dict': {
987 'id': '3985676',
988 },
989 'playlist_mincount': 178,
6f10cdcf
E
990 }, {
991 'url': 'https://space.bilibili.com/313580179/video',
992 'info_dict': {
993 'id': '313580179',
994 },
995 'playlist_mincount': 92,
6efb0711 996 }]
997
6f10cdcf
E
998 def _extract_signature(self, playlist_id):
999 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1000
1001 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1002 img_key = traverse_obj(
1003 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1004 sub_key = traverse_obj(
1005 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1006
1007 session_key = img_key + sub_key
1008
1009 signature_values = []
1010 for position in (
1011 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1012 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1013 57, 62, 11, 36, 20, 34, 44, 52
1014 ):
1015 char_at_position = try_call(lambda: session_key[position])
1016 if char_at_position:
1017 signature_values.append(char_at_position)
1018
1019 return ''.join(signature_values)[:32]
1020
2b9d0216
L
1021 def _real_extract(self, url):
1022 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1023 if not is_video_url:
1024 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1025 'To download audios, add a "/audio" to the URL')
1026
6f10cdcf
E
1027 signature = self._extract_signature(playlist_id)
1028
2b9d0216 1029 def fetch_page(page_idx):
6f10cdcf
E
1030 query = {
1031 'keyword': '',
1032 'mid': playlist_id,
1033 'order': 'pubdate',
1034 'order_avoided': 'true',
1035 'platform': 'web',
1036 'pn': page_idx + 1,
1037 'ps': 30,
1038 'tid': 0,
1039 'web_location': 1550101,
1040 'wts': int(time.time()),
1041 }
1042 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1043
12f153a8 1044 try:
6f10cdcf
E
1045 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1046 playlist_id, note=f'Downloading page {page_idx}', query=query)
12f153a8 1047 except ExtractorError as e:
3d2623a8 1048 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
1049 raise ExtractorError(
1050 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1051 raise
06d52c87 1052 if response['code'] in (-352, -401):
12f153a8 1053 raise ExtractorError(
06d52c87 1054 f'Request is blocked by server ({-response["code"]}), '
1055 'please add cookies, wait and try later.', expected=True)
12f153a8 1056 return response['data']
2b9d0216
L
1057
1058 def get_metadata(page_data):
1059 page_size = page_data['page']['ps']
1060 entry_count = page_data['page']['count']
1061 return {
1062 'page_count': math.ceil(entry_count / page_size),
1063 'page_size': page_size,
1064 }
6efb0711 1065
2b9d0216
L
1066 def get_entries(page_data):
1067 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1068 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 1069
2b9d0216
L
1070 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1071 return self.playlist_result(paged_list, playlist_id)
6efb0711 1072
6efb0711 1073
2b9d0216
L
1074class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1075 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1076 _TESTS = [{
6f10cdcf 1077 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 1078 'info_dict': {
6f10cdcf 1079 'id': '313580179',
2b9d0216
L
1080 },
1081 'playlist_mincount': 1,
1082 }]
1083
1084 def _real_extract(self, url):
1085 playlist_id = self._match_id(url)
1086
1087 def fetch_page(page_idx):
1088 return self._download_json(
1089 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1090 note=f'Downloading page {page_idx}',
12f153a8 1091 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
1092
1093 def get_metadata(page_data):
1094 return {
1095 'page_count': page_data['pageCount'],
1096 'page_size': page_data['pageSize'],
1097 }
1098
1099 def get_entries(page_data):
1100 for entry in page_data.get('data', []):
1101 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1102
1103 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1104 return self.playlist_result(paged_list, playlist_id)
1105
1106
9e68747f 1107class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1108 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1109 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1110 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1111
1112 def _get_uploader(self, uid, playlist_id):
1113 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1114 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1115
1116 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1117 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1118 metadata.pop('page_count', None)
1119 metadata.pop('page_size', None)
1120 return metadata, page_list
1121
1122
1123class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1124 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
1125 _TESTS = [{
1126 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1127 'info_dict': {
1128 'id': '2142762_57445',
9e68747f 1129 'title': '【完结】《底特律 变人》全结局流程解说',
1130 'description': '',
1131 'uploader': '老戴在此',
1132 'uploader_id': '2142762',
1133 'timestamp': int,
1134 'upload_date': str,
1135 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
1136 },
1137 'playlist_mincount': 31,
1138 }]
06167fbb 1139
1140 def _real_extract(self, url):
2b9d0216
L
1141 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1142 playlist_id = f'{mid}_{sid}'
1143
1144 def fetch_page(page_idx):
1145 return self._download_json(
1146 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1147 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 1148 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
1149
1150 def get_metadata(page_data):
1151 page_size = page_data['page']['page_size']
1152 entry_count = page_data['page']['total']
1153 return {
1154 'page_count': math.ceil(entry_count / page_size),
1155 'page_size': page_size,
9e68747f 1156 'uploader': self._get_uploader(mid, playlist_id),
1157 **traverse_obj(page_data, {
1158 'title': ('meta', 'name', {str}),
1159 'description': ('meta', 'description', {str}),
1160 'uploader_id': ('meta', 'mid', {str_or_none}),
1161 'timestamp': ('meta', 'ptime', {int_or_none}),
1162 'thumbnail': ('meta', 'cover', {url_or_none}),
1163 })
2b9d0216
L
1164 }
1165
1166 def get_entries(page_data):
9e68747f 1167 return self._get_entries(page_data, 'archives')
2b9d0216
L
1168
1169 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 1170 return self.playlist_result(paged_list, playlist_id, **metadata)
1171
1172
1173class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1174 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1175 _TESTS = [{
1176 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1177 'info_dict': {
1178 'id': '1958703906_547718',
1179 'title': '直播回放',
1180 'description': '直播回放',
1181 'uploader': '靡烟miya',
1182 'uploader_id': '1958703906',
1183 'timestamp': 1637985853,
1184 'upload_date': '20211127',
1185 'modified_timestamp': int,
1186 'modified_date': str,
1187 },
1188 'playlist_mincount': 513,
1189 }]
1190
1191 def _real_extract(self, url):
1192 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1193 playlist_id = f'{mid}_{sid}'
1194 playlist_meta = traverse_obj(self._download_json(
1195 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1196 ), {
1197 'title': ('data', 'meta', 'name', {str}),
1198 'description': ('data', 'meta', 'description', {str}),
1199 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1200 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1201 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1202 })
1203
1204 def fetch_page(page_idx):
1205 return self._download_json(
1206 'https://api.bilibili.com/x/series/archives',
1207 playlist_id, note=f'Downloading page {page_idx}',
1208 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1209
1210 def get_metadata(page_data):
1211 page_size = page_data['page']['size']
1212 entry_count = page_data['page']['total']
1213 return {
1214 'page_count': math.ceil(entry_count / page_size),
1215 'page_size': page_size,
1216 'uploader': self._get_uploader(mid, playlist_id),
1217 **playlist_meta
1218 }
1219
1220 def get_entries(page_data):
1221 return self._get_entries(page_data, 'archives')
1222
1223 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1224 return self.playlist_result(paged_list, playlist_id, **metadata)
1225
1226
1227class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1228 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1229 _TESTS = [{
1230 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1231 'info_dict': {
1232 'id': '1103407912',
1233 'title': '【V2】(旧)',
1234 'description': '',
1235 'uploader': '晓月春日',
1236 'uploader_id': '84912',
1237 'timestamp': 1604905176,
1238 'upload_date': '20201109',
1239 'modified_timestamp': int,
1240 'modified_date': str,
1241 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1242 'view_count': int,
1243 'like_count': int,
1244 },
1245 'playlist_mincount': 22,
1246 }, {
1247 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1248 'only_matching': True,
1249 }]
1250
1251 def _real_extract(self, url):
1252 fid = self._match_id(url)
1253
1254 list_info = self._download_json(
1255 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1256 fid, note='Downloading favlist metadata')
1257 if list_info['code'] == -403:
1258 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1259
1260 entries = self._get_entries(self._download_json(
1261 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1262 fid, note='Download favlist entries'), 'data')
1263
1264 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1265 'title': ('title', {str}),
1266 'description': ('intro', {str}),
1267 'uploader': ('upper', 'name', {str}),
1268 'uploader_id': ('upper', 'mid', {str_or_none}),
1269 'timestamp': ('ctime', {int_or_none}),
1270 'modified_timestamp': ('mtime', {int_or_none}),
1271 'thumbnail': ('cover', {url_or_none}),
1272 'view_count': ('cnt_info', 'play', {int_or_none}),
1273 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1274 })))
1275
1276
1277class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1278 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1279 _TESTS = [{
1280 'url': 'https://www.bilibili.com/watchlater/#/list',
1281 'info_dict': {'id': 'watchlater'},
1282 'playlist_mincount': 0,
1283 'skip': 'login required',
1284 }]
1285
1286 def _real_extract(self, url):
1287 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1288 watchlater_info = self._download_json(
1289 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1290 if watchlater_info['code'] == -101:
1291 self.raise_login_required(msg='You need to login to access your watchlater list')
1292 entries = self._get_entries(watchlater_info, ('data', 'list'))
1293 return self.playlist_result(entries, id=list_id, title='稍后再看')
1294
1295
1296class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1297 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1298 _TESTS = [{
1299 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1300 'info_dict': {
1301 'id': '5_547718',
1302 'title': '直播回放',
1303 'uploader': '靡烟miya',
1304 'uploader_id': '1958703906',
1305 'timestamp': 1637985853,
1306 'upload_date': '20211127',
1307 },
1308 'playlist_mincount': 513,
e439693f 1309 }, {
1310 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1311 'info_dict': {
1312 'id': 'BV1DU4y1r7tz',
1313 'ext': 'mp4',
1314 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1315 'upload_date': '20220820',
1316 'description': '',
1317 'timestamp': 1661016330,
1318 'uploader_id': '1958703906',
1319 'uploader': '靡烟miya',
1320 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1321 'duration': 9552.903,
1322 'tags': list,
1323 'comment_count': int,
1324 'view_count': int,
1325 'like_count': int,
1326 '_old_archive_ids': ['bilibili 687146339_part1'],
1327 },
1328 'params': {'noplaylist': True},
9e68747f 1329 }, {
1330 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1331 'info_dict': {
1332 'id': '5_547718',
1333 },
1334 'playlist_mincount': 513,
1335 'skip': 'redirect url',
1336 }, {
1337 'url': 'https://www.bilibili.com/list/ml1103407912',
1338 'info_dict': {
1339 'id': '3_1103407912',
1340 'title': '【V2】(旧)',
1341 'uploader': '晓月春日',
1342 'uploader_id': '84912',
1343 'timestamp': 1604905176,
1344 'upload_date': '20201109',
1345 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1346 },
1347 'playlist_mincount': 22,
1348 }, {
1349 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1350 'info_dict': {
1351 'id': '3_1103407912',
1352 },
1353 'playlist_mincount': 22,
1354 'skip': 'redirect url',
1355 }, {
1356 'url': 'https://www.bilibili.com/list/watchlater',
1357 'info_dict': {'id': 'watchlater'},
1358 'playlist_mincount': 0,
1359 'skip': 'login required',
1360 }, {
1361 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1362 'info_dict': {'id': 'watchlater'},
1363 'playlist_mincount': 0,
1364 'skip': 'login required',
1365 }]
1366
1367 def _extract_medialist(self, query, list_id):
1368 for page_num in itertools.count(1):
1369 page_data = self._download_json(
1370 'https://api.bilibili.com/x/v2/medialist/resource/list',
1371 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1372 )['data']
1373 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1374 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1375 if not page_data.get('has_more', False):
1376 break
1377
1378 def _real_extract(self, url):
1379 list_id = self._match_id(url)
e439693f 1380
1381 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1382 if not self._yes_playlist(list_id, bvid):
1383 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1384
9e68747f 1385 webpage = self._download_webpage(url, list_id)
1386 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1387 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1388 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1389 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1390 if error_code == -400 and list_id == 'watchlater':
1391 self.raise_login_required('You need to login to access your watchlater playlist')
1392 elif error_code == -403:
1393 self.raise_login_required('This is a private playlist. You need to login as its owner')
1394 elif error_code == 11010:
1395 raise ExtractorError('Playlist is no longer available', expected=True)
1396 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1397
1398 query = {
1399 'ps': 20,
1400 'with_current': False,
1401 **traverse_obj(initial_state, {
1402 'type': ('playlist', 'type', {int_or_none}),
1403 'biz_id': ('playlist', 'id', {int_or_none}),
1404 'tid': ('tid', {int_or_none}),
1405 'sort_field': ('sortFiled', {int_or_none}),
1406 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1407 })
1408 }
1409 metadata = {
1410 'id': f'{query["type"]}_{query["biz_id"]}',
1411 **traverse_obj(initial_state, ('mediaListInfo', {
1412 'title': ('title', {str}),
1413 'uploader': ('upper', 'name', {str}),
1414 'uploader_id': ('upper', 'mid', {str_or_none}),
1415 'timestamp': ('ctime', {int_or_none}),
1416 'thumbnail': ('cover', {url_or_none}),
1417 })),
1418 }
1419 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 1420
1421
c34f505b 1422class BilibiliCategoryIE(InfoExtractor):
1423 IE_NAME = 'Bilibili category extractor'
1424 _MAX_RESULTS = 1000000
9e68747f 1425 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 1426 _TESTS = [{
1427 'url': 'https://www.bilibili.com/v/kichiku/mad',
1428 'info_dict': {
1429 'id': 'kichiku: mad',
1430 'title': 'kichiku: mad'
1431 },
1432 'playlist_mincount': 45,
1433 'params': {
1434 'playlistend': 45
1435 }
1436 }]
1437
1438 def _fetch_page(self, api_url, num_pages, query, page_num):
1439 parsed_json = self._download_json(
1440 api_url, query, query={'Search_key': query, 'pn': page_num},
1441 note='Extracting results from page %s of %s' % (page_num, num_pages))
1442
f8580bf0 1443 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 1444 if not video_list:
1445 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1446
1447 for video in video_list:
1448 yield self.url_result(
1449 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1450
1451 def _entries(self, category, subcategory, query):
1452 # map of categories : subcategories : RIDs
1453 rid_map = {
1454 'kichiku': {
1455 'mad': 26,
1456 'manual_vocaloid': 126,
1457 'guide': 22,
1458 'theatre': 216,
1459 'course': 127
1460 },
1461 }
1462
1463 if category not in rid_map:
e88d44c6 1464 raise ExtractorError(
1465 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1466 if subcategory not in rid_map[category]:
e88d44c6 1467 raise ExtractorError(
1468 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1469 rid_value = rid_map[category][subcategory]
1470
1471 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1472 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1473 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1474 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1475 if count is None or not size:
1476 raise ExtractorError('Failed to calculate either page count or size')
1477
1478 num_pages = math.ceil(count / size)
1479
1480 return OnDemandPagedList(functools.partial(
1481 self._fetch_page, api_url, num_pages, query), size)
1482
1483 def _real_extract(self, url):
ad974876 1484 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1485 query = '%s: %s' % (category, subcategory)
1486
1487 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1488
1489
06167fbb 1490class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1491 IE_DESC = 'Bilibili video search'
06167fbb 1492 _MAX_RESULTS = 100000
1493 _SEARCH_KEY = 'bilisearch'
ffa017cf 1494 _TESTS = [{
1495 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1496 'playlist_count': 3,
1497 'info_dict': {
1498 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1499 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1500 },
1501 'playlist': [{
1502 'info_dict': {
1503 'id': 'BV1n44y1Q7sc',
1504 'ext': 'mp4',
1505 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1506 'timestamp': 1669889987,
1507 'upload_date': '20221201',
1508 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1509 'tags': list,
1510 'uploader': '靡烟miya',
1511 'duration': 123.156,
1512 'uploader_id': '1958703906',
1513 'comment_count': int,
1514 'view_count': int,
1515 'like_count': int,
1516 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1517 '_old_archive_ids': ['bilibili 988222410_part1'],
1518 },
1519 }],
1520 }]
06167fbb 1521
e88d44c6 1522 def _search_results(self, query):
ffa017cf 1523 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1524 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
e88d44c6 1525 for page_num in itertools.count(1):
1526 videos = self._download_json(
1527 'https://api.bilibili.com/x/web-interface/search/type', query,
1528 note=f'Extracting results from page {page_num}', query={
1529 'Search_key': query,
1530 'keyword': query,
1531 'page': page_num,
1532 'context': '',
e88d44c6 1533 'duration': 0,
1534 'tids_2': '',
1535 '__refresh__': 'true',
1536 'search_type': 'video',
1537 'tids': 0,
1538 'highlight': 1,
2d101954 1539 })['data'].get('result')
1540 if not videos:
1541 break
06167fbb 1542 for video in videos:
e88d44c6 1543 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1544
1545
4bc15a68
RA
1546class BilibiliAudioBaseIE(InfoExtractor):
1547 def _call_api(self, path, sid, query=None):
1548 if not query:
1549 query = {'sid': sid}
1550 return self._download_json(
1551 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1552 sid, query=query)['data']
1553
1554
1555class BilibiliAudioIE(BilibiliAudioBaseIE):
1556 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1557 _TEST = {
1558 'url': 'https://www.bilibili.com/audio/au1003142',
1559 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1560 'info_dict': {
1561 'id': '1003142',
1562 'ext': 'm4a',
1563 'title': '【tsukimi】YELLOW / 神山羊',
1564 'artist': 'tsukimi',
1565 'comment_count': int,
1566 'description': 'YELLOW的mp3版!',
1567 'duration': 183,
1568 'subtitles': {
1569 'origin': [{
1570 'ext': 'lrc',
1571 }],
1572 },
1573 'thumbnail': r're:^https?://.+\.jpg',
1574 'timestamp': 1564836614,
1575 'upload_date': '20190803',
1576 'uploader': 'tsukimi-つきみぐー',
1577 'view_count': int,
1578 },
1579 }
1580
1581 def _real_extract(self, url):
1582 au_id = self._match_id(url)
1583
1584 play_data = self._call_api('url', au_id)
1585 formats = [{
1586 'url': play_data['cdns'][0],
1587 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1588 'vcodec': 'none'
4bc15a68
RA
1589 }]
1590
6d1b3489 1591 for a_format in formats:
1592 a_format.setdefault('http_headers', {}).update({
1593 'Referer': url,
1594 })
1595
4bc15a68
RA
1596 song = self._call_api('song/info', au_id)
1597 title = song['title']
1598 statistic = song.get('statistic') or {}
1599
1600 subtitles = None
1601 lyric = song.get('lyric')
1602 if lyric:
1603 subtitles = {
1604 'origin': [{
1605 'url': lyric,
1606 }]
1607 }
1608
1609 return {
1610 'id': au_id,
1611 'title': title,
1612 'formats': formats,
1613 'artist': song.get('author'),
1614 'comment_count': int_or_none(statistic.get('comment')),
1615 'description': song.get('intro'),
1616 'duration': int_or_none(song.get('duration')),
1617 'subtitles': subtitles,
1618 'thumbnail': song.get('cover'),
1619 'timestamp': int_or_none(song.get('passtime')),
1620 'uploader': song.get('uname'),
1621 'view_count': int_or_none(statistic.get('play')),
1622 }
1623
1624
1625class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1626 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1627 _TEST = {
1628 'url': 'https://www.bilibili.com/audio/am10624',
1629 'info_dict': {
1630 'id': '10624',
1631 'title': '每日新曲推荐(每日11:00更新)',
1632 'description': '每天11:00更新,为你推送最新音乐',
1633 },
1634 'playlist_count': 19,
1635 }
1636
1637 def _real_extract(self, url):
1638 am_id = self._match_id(url)
1639
1640 songs = self._call_api(
1641 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1642
1643 entries = []
1644 for song in songs:
1645 sid = str_or_none(song.get('id'))
1646 if not sid:
1647 continue
1648 entries.append(self.url_result(
1649 'https://www.bilibili.com/audio/au' + sid,
1650 BilibiliAudioIE.ie_key(), sid))
1651
1652 if entries:
1653 album_data = self._call_api('menu/info', am_id) or {}
1654 album_title = album_data.get('title')
1655 if album_title:
1656 for entry in entries:
1657 entry['album'] = album_title
1658 return self.playlist_result(
1659 entries, am_id, album_title, album_data.get('intro'))
1660
1661 return self.playlist_result(entries, am_id)
63dce309
S
1662
1663
1664class BiliBiliPlayerIE(InfoExtractor):
1665 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1666 _TEST = {
1667 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1668 'only_matching': True,
1669 }
1670
1671 def _real_extract(self, url):
1672 video_id = self._match_id(url)
1673 return self.url_result(
1674 'http://www.bilibili.tv/video/av%s/' % video_id,
1675 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1676
1677
1678class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1679 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1680 _NETRC_MACHINE = 'biliintl'
1713c882 1681 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
16f7e6be 1682
c62ecf0d 1683 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1684 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1685 if json.get('code'):
1686 if json['code'] in (10004004, 10004005, 10023006):
1687 self.raise_login_required()
1688 elif json['code'] == 10004001:
1689 self.raise_geo_restricted()
1690 else:
1691 if json.get('message') and str(json['code']) != json['message']:
1692 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1693 else:
1694 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1695 if kwargs.get('fatal'):
1696 raise ExtractorError(errmsg)
1697 else:
1698 self.report_warning(errmsg)
1699 return json.get('data')
16f7e6be 1700
efc947fb 1701 def json2srt(self, json):
1702 data = '\n\n'.join(
1703 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1704 for i, line in enumerate(traverse_obj(json, (
1705 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1706 return data
1707
f5f15c99
LR
1708 def _get_subtitles(self, *, ep_id=None, aid=None):
1709 sub_json = self._call_api(
fbb888a3 1710 '/web/v2/subtitle', ep_id or aid, fatal=False,
1711 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1712 query=filter_dict({
f5f15c99 1713 'platform': 'web',
fbb888a3 1714 's_locale': 'en_US',
f5f15c99
LR
1715 'episode_id': ep_id,
1716 'aid': aid,
fbb888a3 1717 })) or {}
16f7e6be 1718 subtitles = {}
cf6413e8
H
1719 fetched_urls = set()
1720 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1721 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1722 if url in fetched_urls:
1723 continue
1724 fetched_urls.add(url)
1725 sub_ext = determine_ext(url)
1726 sub_lang = sub.get('lang_key') or 'en'
1727
1728 if sub_ext == 'ass':
1729 subtitles.setdefault(sub_lang, []).append({
1730 'ext': 'ass',
1731 'url': url,
1732 })
1733 elif sub_ext == 'json':
1734 sub_data = self._download_json(
1735 url, ep_id or aid, fatal=False,
1736 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1737 errnote='Unable to download subtitles')
1738
1739 if sub_data:
1740 subtitles.setdefault(sub_lang, []).append({
1741 'ext': 'srt',
1742 'data': self.json2srt(sub_data),
1743 })
1744 else:
1745 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1746
16f7e6be
AG
1747 return subtitles
1748
f5f15c99
LR
1749 def _get_formats(self, *, ep_id=None, aid=None):
1750 video_json = self._call_api(
1751 '/web/playurl', ep_id or aid, note='Downloading video formats',
1752 errnote='Unable to download video formats', query=filter_dict({
1753 'platform': 'web',
1754 'ep_id': ep_id,
1755 'aid': aid,
1756 }))
16f7e6be
AG
1757 video_json = video_json['playurl']
1758 formats = []
c62ecf0d 1759 for vid in video_json.get('video') or []:
16f7e6be
AG
1760 video_res = vid.get('video_resource') or {}
1761 video_info = vid.get('stream_info') or {}
1762 if not video_res.get('url'):
1763 continue
1764 formats.append({
1765 'url': video_res['url'],
1766 'ext': 'mp4',
1767 'format_note': video_info.get('desc_words'),
1768 'width': video_res.get('width'),
1769 'height': video_res.get('height'),
1770 'vbr': video_res.get('bandwidth'),
1771 'acodec': 'none',
1772 'vcodec': video_res.get('codecs'),
1773 'filesize': video_res.get('size'),
1774 })
c62ecf0d 1775 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1776 if not aud.get('url'):
1777 continue
1778 formats.append({
1779 'url': aud['url'],
1780 'ext': 'mp4',
1781 'abr': aud.get('bandwidth'),
1782 'acodec': aud.get('codecs'),
1783 'vcodec': 'none',
1784 'filesize': aud.get('size'),
1785 })
1786
16f7e6be
AG
1787 return formats
1788
26fdfc37 1789 def _parse_video_metadata(self, video_data):
16f7e6be 1790 return {
f5f15c99 1791 'title': video_data.get('title_display') or video_data.get('title'),
1713c882 1792 'description': video_data.get('desc'),
f5f15c99 1793 'thumbnail': video_data.get('cover'),
1713c882 1794 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
c62ecf0d 1795 'episode_number': int_or_none(self._search_regex(
f5f15c99 1796 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1797 }
1798
52efa4b3 1799 def _perform_login(self, username, password):
65f6e807 1800 if not Cryptodome.RSA:
f6a765ce 1801 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1802
1803 key_data = self._download_json(
1804 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1805 note='Downloading login key', errnote='Unable to download login key')['data']
1806
65f6e807 1807 public_key = Cryptodome.RSA.importKey(key_data['key'])
1808 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1809 login_post = self._download_json(
1810 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1811 'username': username,
1812 'password': base64.b64encode(password_hash).decode('ascii'),
1813 'keep_me': 'true',
1814 's_locale': 'en_US',
1815 'isTrusted': 'true'
1816 }), note='Logging in', errnote='Unable to log in')
1817 if login_post.get('code'):
1818 if login_post.get('message'):
1819 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1820 else:
1821 raise ExtractorError('Unable to log in')
1822
16f7e6be
AG
1823
1824class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1825 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1826 _TESTS = [{
cfcf60ea 1827 # Bstation page
16f7e6be
AG
1828 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1829 'info_dict': {
1830 'id': '341736',
1831 'ext': 'mp4',
c62ecf0d
M
1832 'title': 'E2 - The First Night',
1833 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1834 'episode_number': 2,
d37422f1
H
1835 'upload_date': '20201009',
1836 'episode': 'Episode 2',
1837 'timestamp': 1602259500,
1838 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1839 'chapters': [{
1840 'start_time': 0,
1841 'end_time': 76.242,
1842 'title': '<Untitled Chapter 1>'
1843 }, {
1844 'start_time': 76.242,
1845 'end_time': 161.161,
1846 'title': 'Intro'
1847 }, {
1848 'start_time': 1325.742,
1849 'end_time': 1403.903,
1850 'title': 'Outro'
1851 }],
c62ecf0d 1852 }
16f7e6be 1853 }, {
cfcf60ea 1854 # Non-Bstation page
c62ecf0d 1855 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1856 'info_dict': {
c62ecf0d 1857 'id': '11005006',
16f7e6be 1858 'ext': 'mp4',
c62ecf0d
M
1859 'title': 'E3 - Who?',
1860 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1861 'episode_number': 3,
d37422f1
H
1862 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1863 'episode': 'Episode 3',
1864 'upload_date': '20211219',
1865 'timestamp': 1639928700,
0ba87dd2
H
1866 'chapters': [{
1867 'start_time': 0,
1868 'end_time': 88.0,
1869 'title': '<Untitled Chapter 1>'
1870 }, {
1871 'start_time': 88.0,
1872 'end_time': 156.0,
1873 'title': 'Intro'
1874 }, {
1875 'start_time': 1173.0,
1876 'end_time': 1259.535,
1877 'title': 'Outro'
1878 }],
c62ecf0d 1879 }
cfcf60ea
M
1880 }, {
1881 # Subtitle with empty content
1882 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1883 'info_dict': {
1884 'id': '10131790',
1885 'ext': 'mp4',
1886 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1887 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1888 'episode_number': 140,
1889 },
1890 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
b093c38c
H
1891 }, {
1892 # episode comment extraction
1893 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1894 'info_dict': {
1895 'id': '340317',
1896 'ext': 'mp4',
1897 'timestamp': 1604057820,
1898 'upload_date': '20201030',
1899 'episode_number': 5,
1900 'title': 'E5 - My Own Steel',
1901 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1902 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1903 'episode': 'Episode 5',
1904 'comment_count': int,
1905 'chapters': [{
1906 'start_time': 0,
1907 'end_time': 61.0,
1908 'title': '<Untitled Chapter 1>'
1909 }, {
1910 'start_time': 61.0,
1911 'end_time': 134.0,
1912 'title': 'Intro'
1913 }, {
1914 'start_time': 1290.0,
1915 'end_time': 1379.0,
1916 'title': 'Outro'
1917 }],
1918 },
1919 'params': {
1920 'getcomments': True
1921 }
1922 }, {
1923 # user generated content comment extraction
1924 'url': 'https://www.bilibili.tv/en/video/2045730385',
1925 'info_dict': {
1926 'id': '2045730385',
1927 'ext': 'mp4',
1928 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1929 'timestamp': 1667891924,
1930 'upload_date': '20221108',
1713c882 1931 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
b093c38c 1932 'comment_count': int,
1713c882 1933 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
b093c38c
H
1934 },
1935 'params': {
1936 'getcomments': True
d37422f1 1937 }
0ba87dd2
H
1938 }, {
1939 # episode id without intro and outro
1940 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1941 'info_dict': {
1942 'id': '11246489',
1943 'ext': 'mp4',
1944 'title': 'E1 - Operation \'Strix\' <Owl>',
1945 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1946 'timestamp': 1649516400,
1947 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1948 'episode': 'Episode 1',
1949 'episode_number': 1,
1950 'upload_date': '20220409',
1951 },
c62ecf0d
M
1952 }, {
1953 'url': 'https://www.biliintl.com/en/play/34613/341736',
1954 'only_matching': True,
f5f15c99
LR
1955 }, {
1956 # User-generated content (as opposed to a series licensed from a studio)
1957 'url': 'https://bilibili.tv/en/video/2019955076',
1958 'only_matching': True,
1959 }, {
1960 # No language in URL
1961 'url': 'https://www.bilibili.tv/video/2019955076',
1962 'only_matching': True,
0831d95c 1963 }, {
1964 # Uppercase language in URL
1965 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1966 'only_matching': True,
16f7e6be
AG
1967 }]
1968
93240fc1 1969 @staticmethod
26fdfc37 1970 def _make_url(video_id, series_id=None):
1971 if series_id:
1972 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1973 return f'https://www.bilibili.tv/en/video/{video_id}'
1974
1975 def _extract_video_metadata(self, url, video_id, season_id):
1976 url, smuggled_data = unsmuggle_url(url, {})
1977 if smuggled_data.get('title'):
1978 return smuggled_data
1979
c62ecf0d
M
1980 webpage = self._download_webpage(url, video_id)
1981 # Bstation layout
8072ef2b 1982 initial_data = (
1983 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1984 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1985 video_data = traverse_obj(
d37422f1 1986 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1987
f5f15c99 1988 if season_id and not video_data:
c62ecf0d
M
1989 # Non-Bstation layout, read through episode list
1990 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1991 video_data = traverse_obj(season_json, (
1992 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1993 ), expected_type=dict, get_all=False)
1994
d37422f1
H
1995 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1996 return merge_dicts(
1713c882
S
1997 self._parse_video_metadata(video_data), {
1998 'title': get_element_by_class(
1999 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2000 'description': get_element_by_class(
f1570ab8 2001 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
1713c882 2002 }, self._search_json_ld(webpage, video_id, default={}))
26fdfc37 2003
b093c38c
H
2004 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2005 comment_api_raw_data = self._download_json(
2006 'https://api.bilibili.tv/reply/web/detail', display_id,
2007 note=f'Downloading reply comment of {root_id} - {next_id}',
2008 query={
2009 'platform': 'web',
2010 'ps': 20, # comment's reply per page (default: 3)
2011 'root': root_id,
2012 'next': next_id,
2013 })
2014
2015 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2016 yield {
2017 'author': traverse_obj(replies, ('member', 'name')),
2018 'author_id': traverse_obj(replies, ('member', 'mid')),
2019 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2020 'text': traverse_obj(replies, ('content', 'message')),
2021 'id': replies.get('rpid'),
2022 'like_count': int_or_none(replies.get('like_count')),
2023 'parent': replies.get('parent'),
2024 'timestamp': unified_timestamp(replies.get('ctime_text'))
2025 }
2026
2027 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2028 yield from self._get_comments_reply(
2029 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2030
2031 def _get_comments(self, video_id, ep_id):
2032 for i in itertools.count(0):
2033 comment_api_raw_data = self._download_json(
2034 'https://api.bilibili.tv/reply/web/root', video_id,
2035 note=f'Downloading comment page {i + 1}',
2036 query={
2037 'platform': 'web',
2038 'pn': i, # page number
2039 'ps': 20, # comment per page (default: 20)
2040 'oid': video_id,
2041 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2042 'sort_type': 1, # 1: best, 2: recent
2043 })
2044
2045 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2046 yield {
2047 'author': traverse_obj(replies, ('member', 'name')),
2048 'author_id': traverse_obj(replies, ('member', 'mid')),
2049 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2050 'text': traverse_obj(replies, ('content', 'message')),
2051 'id': replies.get('rpid'),
2052 'like_count': int_or_none(replies.get('like_count')),
2053 'timestamp': unified_timestamp(replies.get('ctime_text')),
2054 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2055 }
2056 if replies.get('count'):
2057 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2058
2059 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2060 break
2061
26fdfc37 2062 def _real_extract(self, url):
2063 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2064 video_id = ep_id or aid
0ba87dd2
H
2065 chapters = None
2066
2067 if ep_id:
2068 intro_ending_json = self._call_api(
2069 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2070 video_id, fatal=False) or {}
2071 if intro_ending_json.get('skip'):
2072 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2073 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2074 chapters = [{
2075 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2076 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2077 'title': 'Intro'
2078 }, {
2079 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2080 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2081 'title': 'Outro'
2082 }]
26fdfc37 2083
2084 return {
2085 'id': video_id,
2086 **self._extract_video_metadata(url, video_id, season_id),
2087 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2088 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c 2089 'chapters': chapters,
1713c882
S
2090 '__post_extractor': self.extract_comments(video_id, ep_id),
2091 'http_headers': self._HEADERS,
26fdfc37 2092 }
16f7e6be
AG
2093
2094
2095class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 2096 IE_NAME = 'biliIntl:series'
76c3cecc 2097 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
2098 _TESTS = [{
2099 'url': 'https://www.bilibili.tv/en/play/34613',
2100 'playlist_mincount': 15,
2101 'info_dict': {
2102 'id': '34613',
76c3cecc
H
2103 'title': 'TONIKAWA: Over the Moon For You',
2104 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2105 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
2106 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2107 'view_count': int,
16f7e6be
AG
2108 },
2109 'params': {
2110 'skip_download': True,
16f7e6be 2111 },
76c3cecc
H
2112 }, {
2113 'url': 'https://www.bilibili.tv/en/media/1048837',
2114 'info_dict': {
2115 'id': '1048837',
2116 'title': 'SPY×FAMILY',
2117 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2118 'categories': ['Adventure', 'Action', 'Comedy'],
2119 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2120 'view_count': int,
2121 },
2122 'playlist_mincount': 25,
16f7e6be
AG
2123 }, {
2124 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 2125 'only_matching': True,
0831d95c 2126 }, {
2127 'url': 'https://www.biliintl.com/EN/play/34613',
2128 'only_matching': True,
16f7e6be
AG
2129 }]
2130
c62ecf0d
M
2131 def _entries(self, series_id):
2132 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 2133 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2134 episode_id = str(episode['episode_id'])
2135 yield self.url_result(smuggle_url(
2136 BiliIntlIE._make_url(episode_id, series_id),
2137 self._parse_video_metadata(episode)
2138 ), BiliIntlIE, episode_id)
16f7e6be
AG
2139
2140 def _real_extract(self, url):
c62ecf0d
M
2141 series_id = self._match_id(url)
2142 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2143 return self.playlist_result(
2144 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2145 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2146 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
2147
2148
2149class BiliLiveIE(InfoExtractor):
9e68747f 2150 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
2151
2152 _TESTS = [{
2153 'url': 'https://live.bilibili.com/196',
2154 'info_dict': {
2155 'id': '33989',
2156 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2157 'ext': 'flv',
2158 'title': "太空狼人杀联动,不被爆杀就算赢",
2159 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2160 'timestamp': 1650802769,
2161 },
2162 'skip': 'not live'
2163 }, {
2164 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2165 'only_matching': True
1c226ccd 2166 }, {
2167 'url': 'https://live.bilibili.com/blanc/196',
2168 'only_matching': True
b4f53662
H
2169 }]
2170
2171 _FORMATS = {
2172 80: {'format_id': 'low', 'format_note': '流畅'},
2173 150: {'format_id': 'high_res', 'format_note': '高清'},
2174 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2175 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2176 10000: {'format_id': 'source', 'format_note': '原画'},
2177 20000: {'format_id': '4K', 'format_note': '4K'},
2178 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2179 }
2180
2181 _quality = staticmethod(qualities(list(_FORMATS)))
2182
2183 def _call_api(self, path, room_id, query):
2184 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2185 if api_result.get('code') != 0:
2186 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2187 return api_result.get('data') or {}
2188
2189 def _parse_formats(self, qn, fmt):
2190 for codec in fmt.get('codec') or []:
2191 if codec.get('current_qn') != qn:
2192 continue
2193 for url_info in codec['url_info']:
2194 yield {
2195 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2196 'ext': fmt.get('format_name'),
2197 'vcodec': codec.get('codec_name'),
2198 'quality': self._quality(qn),
2199 **self._FORMATS[qn],
2200 }
2201
2202 def _real_extract(self, url):
2203 room_id = self._match_id(url)
2204 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2205 if room_data.get('live_status') == 0:
2206 raise ExtractorError('Streamer is not live', expected=True)
2207
2208 formats = []
2209 for qn in self._FORMATS.keys():
2210 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2211 'room_id': room_id,
2212 'qn': qn,
2213 'codec': '0,1',
2214 'format': '0,2',
2215 'mask': '0',
2216 'no_playurl': '0',
2217 'platform': 'web',
2218 'protocol': '0,1',
2219 })
2220 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2221 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
2222
2223 return {
2224 'id': room_id,
2225 'title': room_data.get('title'),
2226 'description': room_data.get('description'),
2227 'thumbnail': room_data.get('user_cover'),
2228 'timestamp': stream_data.get('live_time'),
2229 'formats': formats,
ca2f6e14 2230 'is_live': True,
b4f53662
H
2231 'http_headers': {
2232 'Referer': url,
2233 },
2234 }