]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/BilibiliSpaceVideo] Fix extraction (#9905)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
9f09bdcf 5import json
c34f505b 6import math
5336bf57 7import re
6f10cdcf 8import time
ad974876 9import urllib.parse
ffa017cf 10import uuid
28746fbd 11
06167fbb 12from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 13from ..dependencies import Cryptodome
3d2623a8 14from ..networking.exceptions import HTTPError
28746fbd 15from ..utils import (
bd8f48c7 16 ExtractorError,
ad974876 17 GeoRestrictedError,
2b9d0216
L
18 InAdvancePagedList,
19 OnDemandPagedList,
9e68747f 20 bool_or_none,
9f09bdcf 21 clean_html,
cf6413e8 22 determine_ext,
f5f15c99 23 filter_dict,
6461f2b7 24 float_or_none,
ad974876 25 format_field,
9f09bdcf 26 get_element_by_class,
2b9d0216 27 int_or_none,
bdd0b75e 28 join_nonempty,
ad974876 29 make_archive_id,
d37422f1 30 merge_dicts,
f8580bf0 31 mimetype2ext,
2b9d0216 32 parse_count,
ad974876 33 parse_qs,
b4f53662 34 qualities,
26fdfc37 35 smuggle_url,
efc947fb 36 srt_subtitles_timecode,
4bc15a68 37 str_or_none,
2b9d0216 38 traverse_obj,
6f10cdcf 39 try_call,
b093c38c 40 unified_timestamp,
26fdfc37 41 unsmuggle_url,
c62ecf0d 42 url_or_none,
ad974876 43 urlencode_postdata,
9e68747f 44 variadic,
28746fbd
PH
45)
46
47
ad974876 48class BilibiliBaseIE(InfoExtractor):
5336bf57 49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50
ad974876
L
51 def extract_formats(self, play_info):
52 format_names = {
53 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
54 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
55 }
56
b84fda73 57 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
58 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
59 if flac_audio:
60 audios.append(flac_audio)
61 formats = [{
62 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 64 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
65 'vcodec': 'none',
66 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 67 'filesize': int_or_none(audio.get('size')),
68 'format_id': str_or_none(audio.get('id')),
ad974876
L
69 } for audio in audios]
70
71 formats.extend({
72 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video.get('width')),
76 'height': int_or_none(video.get('height')),
77 'vcodec': video.get('codecs'),
78 'acodec': 'none' if audios else None,
b84fda73 79 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
80 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
81 'filesize': int_or_none(video.get('size')),
82 'quality': int_or_none(video.get('id')),
5336bf57 83 'format_id': traverse_obj(
84 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
85 ('id', {str_or_none}), get_all=False),
ad974876
L
86 'format': format_names.get(video.get('id')),
87 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
88
89 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
90 if missing_formats:
91 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 92 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 93
ad974876
L
94 return formats
95
23388270 96 def _download_playinfo(self, video_id, cid, headers=None):
9f09bdcf 97 return self._download_json(
98 'https://api.bilibili.com/x/player/playurl', video_id,
99 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
23388270 100 note=f'Downloading video formats for cid {cid}', headers=headers)['data']
9f09bdcf 101
ad974876
L
102 def json2srt(self, json_data):
103 srt_data = ''
104 for idx, line in enumerate(json_data.get('body') or []):
105 srt_data += (f'{idx + 1}\n'
106 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f'{line["content"]}\n\n')
108 return srt_data
109
9f09bdcf 110 def _get_subtitles(self, video_id, cid, aid=None):
ad974876
L
111 subtitles = {
112 'danmaku': [{
113 'ext': 'xml',
114 'url': f'https://comment.bilibili.com/{cid}.xml',
115 }]
116 }
117
9f09bdcf 118 subtitle_info = traverse_obj(self._download_json(
119 'https://api.bilibili.com/x/player/v2', video_id,
120 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
121 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
123 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
124 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
126 for s in subs_list:
ad974876
L
127 subtitles.setdefault(s['lan'], []).append({
128 'ext': 'srt',
129 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
130 })
131 return subtitles
132
c90c5b9b 133 def _get_chapters(self, aid, cid):
134 chapters = aid and cid and self._download_json(
135 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
136 note='Extracting chapters', fatal=False)
137 return traverse_obj(chapters, ('data', 'view_points', ..., {
138 'title': 'content',
139 'start_time': 'from',
140 'end_time': 'to',
141 })) or None
142
ad974876
L
143 def _get_comments(self, aid):
144 for idx in itertools.count(1):
145 replies = traverse_obj(
146 self._download_json(
147 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid, note=f'Extracting comments from page {idx}', fatal=False),
149 ('data', 'replies'))
150 if not replies:
151 return
152 for children in map(self._get_all_children, replies):
153 yield from children
154
155 def _get_all_children(self, reply):
156 yield {
157 'author': traverse_obj(reply, ('member', 'uname')),
158 'author_id': traverse_obj(reply, ('member', 'mid')),
159 'id': reply.get('rpid'),
160 'text': traverse_obj(reply, ('content', 'message')),
161 'timestamp': reply.get('ctime'),
162 'parent': reply.get('parent') or 'root',
163 }
164 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
165 yield from children
166
bdd0b75e
GS
167 def _get_episodes_from_season(self, ss_id, url):
168 season_info = self._download_json(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id,
170 note='Downloading season info', query={'season_id': ss_id},
171 headers={'Referer': url, **self.geo_verification_headers()})
172
173 for entry in traverse_obj(season_info, (
174 'result', 'main_section', 'episodes',
175 lambda _, v: url_or_none(v['share_url']) and v['id'])):
9f09bdcf 176 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
177
178 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
179 cid_edges = cid_edges or {}
180 division_data = self._download_json(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
182 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
183 note=f'Extracting divisions from edge {edge_id}')
184 edges.setdefault(edge_id, {}).update(
185 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
186 'title': ('title', {str}),
187 'cid': ('cid', {int_or_none}),
188 }), get_all=False))
189
190 edges[edge_id].update(traverse_obj(division_data, ('data', {
191 'title': ('title', {str}),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}),
194 'cid': ('cid', {int_or_none}),
195 'text': ('option', {str}),
196 }),
197 })))
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
200 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
201 if choice['edge_id'] not in edges:
202 edges[choice['edge_id']] = {'cid': choice['cid']}
203 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
204 return cid_edges
205
206 def _get_interactive_entries(self, video_id, cid, metainfo):
207 graph_version = traverse_obj(
208 self._download_json(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id,
210 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
211 ('data', 'interaction', 'graph_version', {int_or_none}))
212 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
213 for cid, edges in cid_edges.items():
214 play_info = self._download_playinfo(video_id, cid)
215 yield {
216 **metainfo,
217 'id': f'{video_id}_{cid}',
218 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
219 'formats': self.extract_formats(play_info),
220 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info.get('timelength'), scale=1000),
222 'subtitles': self.extract_subtitles(video_id, cid),
223 }
bdd0b75e 224
ad974876
L
225
226class BiliBiliIE(BilibiliBaseIE):
9e68747f 227 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 228
bd8f48c7 229 _TESTS = [{
ad974876
L
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
231 'info_dict': {
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
234 'ext': 'mp4',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
237 'uploader': '阿滴英文',
238 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
239 'duration': 554.117,
240 'tags': list,
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
244 'like_count': int,
245 'view_count': int,
246 },
247 }, {
9f09bdcf 248 'note': 'old av URL version',
06167fbb 249 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 250 'info_dict': {
ad974876 251 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 252 'ext': 'mp4',
f8580bf0 253 'uploader': '菊子桑',
ad974876
L
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
256 'title': '【金坷垃】金泡沫',
257 'duration': 308.36,
f8580bf0 258 'upload_date': '20140420',
ad974876 259 'timestamp': 1397983878,
6461f2b7 260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
261 'like_count': int,
262 'comment_count': int,
263 'view_count': int,
264 'tags': list,
265 },
c90c5b9b 266 'params': {'skip_download': True},
bd8f48c7 267 }, {
ad974876
L
268 'note': 'Anthology',
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
270 'info_dict': {
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的'
273 },
274 'playlist_count': 18,
275 'playlist': [{
276 'info_dict': {
277 'id': 'BV1bK411W797_p1',
278 'ext': 'mp4',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 280 'tags': 'count:10',
ad974876
L
281 'timestamp': 1589601697,
282 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
285 'like_count': int,
286 'comment_count': int,
287 'upload_date': '20200516',
288 'view_count': int,
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
290 'duration': 90.314,
291 }
292 }]
06167fbb 293 }, {
ad974876
L
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
296 'info_dict': {
297 'id': 'BV1bK411W797_p1',
298 'ext': 'mp4',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 300 'tags': 'count:10',
ad974876
L
301 'timestamp': 1589601697,
302 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
305 'like_count': int,
306 'comment_count': int,
307 'upload_date': '20200516',
308 'view_count': int,
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
310 'duration': 90.314,
311 }
bd8f48c7 312 }, {
ad974876
L
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 315 'info_dict': {
ad974876 316 'id': 'BV12N4y1M7rh',
bd8f48c7 317 'ext': 'mp4',
c90c5b9b 318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
319 'tags': list,
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
321 'duration': 313.557,
322 'upload_date': '20220709',
9e68747f 323 'uploader': '小夫太渴',
ad974876
L
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
327 'view_count': int,
328 'like_count': int,
329 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2'
bd8f48c7 331 },
ad974876 332 'params': {'listsubtitles': True},
ca270371 333 }, {
ad974876 334 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 335 'info_dict': {
ad974876 336 'id': 'BV13x41117TL',
f8580bf0 337 'ext': 'mp4',
ca270371 338 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 339 'upload_date': '20170301',
c90c5b9b 340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 341 'timestamp': 1488353834,
f8580bf0 342 'uploader_id': '65880958',
343 'uploader': '阿滴英文',
ad974876 344 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 345 'duration': 554.117,
ad974876
L
346 'tags': list,
347 'comment_count': int,
348 'view_count': int,
349 'like_count': int,
89fabf11
JN
350 },
351 'params': {
352 'skip_download': True,
353 },
c90c5b9b 354 }, {
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
357 'info_dict': {
358 'id': 'BV1vL411G7N7',
359 'ext': 'mp4',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
364 'tags': list,
365 'uploader': '爱喝咖啡的当麻',
366 'duration': 669.482,
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
370 'view_count': int,
371 'like_count': int,
372 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
373 },
374 'params': {'skip_download': True},
ab29e470 375 }, {
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
378 'info_dict': {
379 'id': 'BV1wP4y1P72h',
380 'ext': 'mp4',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
386 'duration': 246.719,
387 'uploader_id': '528182630',
388 'view_count': int,
389 'like_count': int,
390 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
391 },
392 'params': {'skip_download': True},
393 }, {
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
396 'info_dict': {
397 'id': 'BV1ay4y1d77f',
398 'ext': 'mp4',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
403 'uploader': '果蝇轰',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
406 'view_count': int,
407 'like_count': int,
408 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
409 },
410 'params': {'skip_download': True},
9f09bdcf 411 }, {
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
414 'info_dict': {
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
420 'tags': list,
421 'uploader': '钉宫妮妮Ninico',
422 'duration': 1503,
423 'uploader_id': '8881297',
424 'comment_count': int,
425 'view_count': int,
426 'like_count': int,
427 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
428 },
429 'playlist_count': 33,
430 'playlist': [{
431 'info_dict': {
432 'id': 'BV1af4y1H7ga_400950101',
433 'ext': 'mp4',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
438 'tags': list,
439 'uploader': '钉宫妮妮Ninico',
440 'duration': 11.605,
441 'uploader_id': '8881297',
442 'comment_count': int,
443 'view_count': int,
444 'like_count': int,
445 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
446 },
447 }],
448 }, {
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
451 'info_dict': {
452 'id': '288525',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
454 'ext': 'mp4',
455 'series': '我和我的祖国',
456 'series_id': '4780',
457 'season': '幕后纪实',
458 'season_id': '28609',
459 'season_number': 1,
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
467 },
468 }, {
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
470 'info_dict': {
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
473 'ext': 'mp4',
474 },
475 'skip': 'supporter-only video',
476 }, {
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
478 'info_dict': {
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
481 'ext': 'mp4',
482 },
483 'skip': 'login required',
484 }, {
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
486 'info_dict': {
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
489 'ext': 'mp4',
490 },
491 'skip': 'geo-restricted',
bd8f48c7 492 }]
28746fbd 493
520e7533 494 def _real_extract(self, url):
ad974876 495 video_id = self._match_id(url)
23388270 496 headers = self.geo_verification_headers()
497 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
9f09bdcf 498 if not self._match_valid_url(urlh.url):
499 return self.url_result(urlh.url)
500
c90c5b9b 501 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 502
ab29e470 503 is_festival = 'videoData' not in initial_state
504 if is_festival:
505 video_data = initial_state['videoInfo']
506 else:
9f09bdcf 507 play_info_obj = self._search_json(
508 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
509 if not play_info_obj:
510 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
511 self.raise_login_required()
512 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
513 raise ExtractorError(
514 'This video may be deleted or geo-restricted. '
515 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
516 play_info = traverse_obj(play_info_obj, ('data', {dict}))
517 if not play_info:
518 if traverse_obj(play_info_obj, 'code') == 87007:
519 toast = get_element_by_class('tips-toast', webpage) or ''
520 msg = clean_html(
521 f'{get_element_by_class("belongs-to", toast) or ""},'
522 + (get_element_by_class('level', toast) or ''))
523 raise ExtractorError(
524 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
525 raise ExtractorError('Failed to extract play info')
ab29e470 526 video_data = initial_state['videoData']
527
ad974876 528 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 529
adc74b3c 530 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 531 page_list_json = not is_festival and traverse_obj(
ad974876
L
532 self._download_json(
533 'https://api.bilibili.com/x/player/pagelist', video_id,
534 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
23388270 535 note='Extracting videos in anthology', headers=headers),
ad974876
L
536 'data', expected_type=list) or []
537 is_anthology = len(page_list_json) > 1
538
539 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
540 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
541 return self.playlist_from_matches(
542 page_list_json, video_id, title, ie=BiliBiliIE,
543 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 544
ad974876 545 if is_anthology:
f74371a9 546 part_id = part_id or 1
547 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 548
ad974876
L
549 aid = video_data.get('aid')
550 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 551
c90c5b9b 552 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
553
ab29e470 554 festival_info = {}
555 if is_festival:
23388270 556 play_info = self._download_playinfo(video_id, cid, headers=headers)
ab29e470 557
558 festival_info = traverse_obj(initial_state, {
559 'uploader': ('videoInfo', 'upName'),
560 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
561 'like_count': ('videoStatus', 'like', {int_or_none}),
562 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
563 }, get_all=False)
564
9f09bdcf 565 metainfo = {
ab29e470 566 **traverse_obj(initial_state, {
567 'uploader': ('upData', 'name'),
568 'uploader_id': ('upData', 'mid', {str_or_none}),
569 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
570 'tags': ('tags', ..., 'tag_name'),
571 'thumbnail': ('videoData', 'pic', {url_or_none}),
572 }),
573 **festival_info,
574 **traverse_obj(video_data, {
575 'description': 'desc',
576 'timestamp': ('pubdate', {int_or_none}),
577 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
578 'comment_count': ('stat', 'reply', {int_or_none}),
579 }, get_all=False),
ad974876 580 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
ad974876 581 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 582 'title': title,
c90c5b9b 583 'http_headers': {'Referer': url},
06167fbb 584 }
277d6ff5 585
9f09bdcf 586 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
587 if is_interactive:
588 return self.playlist_result(
589 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
590 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
591 '__post_extractor': self.extract_comments(aid),
592 })
593 else:
594 return {
595 **metainfo,
596 'duration': float_or_none(play_info.get('timelength'), scale=1000),
597 'chapters': self._get_chapters(aid, cid),
598 'subtitles': self.extract_subtitles(video_id, cid),
599 'formats': self.extract_formats(play_info),
600 '__post_extractor': self.extract_comments(aid),
601 }
602
06167fbb 603
ad974876 604class BiliBiliBangumiIE(BilibiliBaseIE):
9f09bdcf 605 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
e88d44c6 606
ad974876 607 _TESTS = [{
9f09bdcf 608 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
609 'info_dict': {
610 'id': '21495',
611 'ext': 'mp4',
612 'series': '悠久之翼',
613 'series_id': '774',
614 'season': '第二季',
615 'season_id': '1182',
616 'season_number': 2,
617 'episode': 'forever/ef',
618 'episode_id': '21495',
619 'episode_number': 12,
620 'title': '12 forever/ef',
621 'duration': 1420.791,
622 'timestamp': 1320412200,
623 'upload_date': '20111104',
624 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
625 },
626 }, {
bdd0b75e 627 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 628 'info_dict': {
bdd0b75e 629 'id': '267851',
ad974876 630 'ext': 'mp4',
bdd0b75e
GS
631 'series': '鬼灭之刃',
632 'series_id': '4358',
9f09bdcf 633 'season': '立志篇',
bdd0b75e 634 'season_id': '26801',
ad974876 635 'season_number': 1,
bdd0b75e
GS
636 'episode': '残酷',
637 'episode_id': '267851',
638 'episode_number': 1,
639 'title': '1 残酷',
640 'duration': 1425.256,
641 'timestamp': 1554566400,
642 'upload_date': '20190406',
643 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 644 },
9f09bdcf 645 'skip': 'Geo-restricted',
646 }, {
647 'note': 'a making-of which falls outside main section',
648 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
649 'info_dict': {
650 'id': '345120',
651 'ext': 'mp4',
652 'series': '鬼灭之刃',
653 'series_id': '4358',
654 'season': '立志篇',
655 'season_id': '26801',
656 'season_number': 1,
657 'episode': '炭治郎篇',
658 'episode_id': '345120',
659 'episode_number': 27,
660 'title': '#1 炭治郎篇',
661 'duration': 1922.129,
662 'timestamp': 1602853860,
663 'upload_date': '20201016',
664 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
665 },
ad974876 666 }]
06167fbb 667
ad974876 668 def _real_extract(self, url):
9f09bdcf 669 episode_id = self._match_id(url)
23388270 670 headers = self.geo_verification_headers()
671 webpage = self._download_webpage(url, episode_id, headers=headers)
e88d44c6 672
ad974876
L
673 if '您所在的地区无法观看本片' in webpage:
674 raise GeoRestrictedError('This video is restricted')
bdd0b75e 675 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 676 self.raise_login_required('This video is for premium members only')
6461f2b7 677
23388270 678 headers['Referer'] = url
bdd0b75e 679 play_info = self._download_json(
9f09bdcf 680 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
bdd0b75e
GS
681 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
682 headers=headers)
683 premium_only = play_info.get('code') == -10403
684 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
685
ad974876 686 formats = self.extract_formats(play_info)
bdd0b75e 687 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 688 self.raise_login_required('This video is for premium members only')
bd8f48c7 689
bdd0b75e 690 bangumi_info = self._download_json(
9f09bdcf 691 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
bdd0b75e
GS
692 query={'ep_id': episode_id}, headers=headers)['result']
693
694 episode_number, episode_info = next((
695 (idx, ep) for idx, ep in enumerate(traverse_obj(
9f09bdcf 696 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
bdd0b75e 697 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 698
bdd0b75e 699 season_id = bangumi_info.get('season_id')
9f09bdcf 700 season_number, season_title = season_id and next((
701 (idx + 1, e.get('season_title')) for idx, e in enumerate(
bdd0b75e 702 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 703 if e.get('season_id') == season_id
9f09bdcf 704 ), (None, None))
06167fbb 705
bdd0b75e
GS
706 aid = episode_info.get('aid')
707
e88d44c6 708 return {
9f09bdcf 709 'id': episode_id,
ad974876 710 'formats': formats,
bdd0b75e
GS
711 **traverse_obj(bangumi_info, {
712 'series': ('series', 'series_title', {str}),
713 'series_id': ('series', 'series_id', {str_or_none}),
714 'thumbnail': ('square_cover', {url_or_none}),
715 }),
9f09bdcf 716 **traverse_obj(episode_info, {
717 'episode': ('long_title', {str}),
718 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
719 'timestamp': ('pub_time', {int_or_none}),
720 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
721 }),
bdd0b75e 722 'episode_id': episode_id,
9f09bdcf 723 'season': str_or_none(season_title),
bdd0b75e 724 'season_id': str_or_none(season_id),
c90c5b9b 725 'season_number': season_number,
c90c5b9b 726 'duration': float_or_none(play_info.get('timelength'), scale=1000),
9f09bdcf 727 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
bdd0b75e 728 '__post_extractor': self.extract_comments(aid),
23388270 729 'http_headers': {'Referer': url},
e88d44c6 730 }
bd8f48c7 731
bd8f48c7 732
bdd0b75e 733class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 734 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 735 _TESTS = [{
ad974876 736 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 737 'info_dict': {
ad974876 738 'id': '24097891',
9f09bdcf 739 'title': 'CAROLE & TUESDAY',
740 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
bd8f48c7 741 },
ad974876 742 'playlist_mincount': 25,
9f09bdcf 743 }, {
744 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
745 'info_dict': {
746 'id': '1565',
747 'title': '攻壳机动队 S.A.C. 2nd GIG',
748 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
749 },
750 'playlist_count': 26,
751 'playlist': [{
752 'info_dict': {
753 'id': '68540',
754 'ext': 'mp4',
755 'series': '攻壳机动队',
756 'series_id': '1077',
757 'season': '第二季',
758 'season_id': '1565',
759 'season_number': 2,
760 'episode': '再启动 REEMBODY',
761 'episode_id': '68540',
762 'episode_number': 1,
763 'title': '1 再启动 REEMBODY',
764 'duration': 1525.777,
765 'timestamp': 1425074413,
766 'upload_date': '20150227',
767 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
768 },
769 }],
bd8f48c7
YCH
770 }]
771
bd8f48c7 772 def _real_extract(self, url):
ad974876
L
773 media_id = self._match_id(url)
774 webpage = self._download_webpage(url, media_id)
bdd0b75e 775
9f09bdcf 776 initial_state = self._search_json(
777 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
778 ss_id = initial_state['mediaInfo']['season_id']
779
780 return self.playlist_result(
781 self._get_episodes_from_season(ss_id, url), media_id,
782 **traverse_obj(initial_state, ('mediaInfo', {
783 'title': ('title', {str}),
784 'description': ('evaluate', {str}),
785 })))
bdd0b75e 786
bd8f48c7 787
bdd0b75e 788class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 789 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
790 _TESTS = [{
791 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
792 'info_dict': {
9f09bdcf 793 'id': '26801',
794 'title': '鬼灭之刃',
795 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
bdd0b75e
GS
796 },
797 'playlist_mincount': 26
9f09bdcf 798 }, {
799 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
800 'info_dict': {
801 'id': '2251',
802 'title': '玲音',
803 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
804 },
805 'playlist_count': 13,
806 'playlist': [{
807 'info_dict': {
808 'id': '50188',
809 'ext': 'mp4',
810 'series': '玲音',
811 'series_id': '1526',
812 'season': 'TV',
813 'season_id': '2251',
814 'season_number': 1,
815 'episode': 'WEIRD',
816 'episode_id': '50188',
817 'episode_number': 1,
818 'title': '1 WEIRD',
819 'duration': 1436.992,
820 'timestamp': 1343185080,
821 'upload_date': '20120725',
822 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
823 },
824 }],
bdd0b75e
GS
825 }]
826
827 def _real_extract(self, url):
828 ss_id = self._match_id(url)
9f09bdcf 829 webpage = self._download_webpage(url, ss_id)
830 metainfo = traverse_obj(
831 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
832 ('itemListElement', ..., {
833 'title': ('name', {str}),
834 'description': ('description', {str}),
835 }), get_all=False)
836
837 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
838
839
840class BilibiliCheeseBaseIE(BilibiliBaseIE):
841 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
842
843 def _extract_episode(self, season_info, ep_id):
844 episode_info = traverse_obj(season_info, (
845 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
846 aid, cid = episode_info['aid'], episode_info['cid']
847
848 if traverse_obj(episode_info, 'ep_status') == -1:
849 raise ExtractorError('This course episode is not yet available.', expected=True)
850 if not traverse_obj(episode_info, 'playable'):
851 self.raise_login_required('You need to purchase the course to download this episode')
852
853 play_info = self._download_json(
854 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
855 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
856 headers=self._HEADERS, note='Downloading playinfo')['data']
857
858 return {
859 'id': str_or_none(ep_id),
860 'episode_id': str_or_none(ep_id),
861 'formats': self.extract_formats(play_info),
862 'extractor_key': BilibiliCheeseIE.ie_key(),
863 'extractor': BilibiliCheeseIE.IE_NAME,
864 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
865 **traverse_obj(episode_info, {
866 'episode': ('title', {str}),
867 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
868 'alt_title': ('subtitle', {str}),
869 'duration': ('duration', {int_or_none}),
870 'episode_number': ('index', {int_or_none}),
871 'thumbnail': ('cover', {url_or_none}),
872 'timestamp': ('release_date', {int_or_none}),
873 'view_count': ('play', {int_or_none}),
874 }),
875 **traverse_obj(season_info, {
876 'uploader': ('up_info', 'uname', {str}),
877 'uploader_id': ('up_info', 'mid', {str_or_none}),
878 }),
879 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
880 '__post_extractor': self.extract_comments(aid),
881 'http_headers': self._HEADERS,
882 }
883
884 def _download_season_info(self, query_key, video_id):
885 return self._download_json(
886 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
887 headers=self._HEADERS, note='Downloading season info')['data']
bd8f48c7 888
9f09bdcf 889
890class BilibiliCheeseIE(BilibiliCheeseBaseIE):
891 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
892 _TESTS = [{
893 'url': 'https://www.bilibili.com/cheese/play/ep229832',
894 'info_dict': {
895 'id': '229832',
896 'ext': 'mp4',
897 'title': '1 - 课程先导片',
898 'alt_title': '视频课 · 3分41秒',
899 'uploader': '马督工',
900 'uploader_id': '316568752',
901 'episode': '课程先导片',
902 'episode_id': '229832',
903 'episode_number': 1,
904 'duration': 221,
905 'timestamp': 1695549606,
906 'upload_date': '20230924',
907 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
908 'view_count': int,
909 }
910 }]
911
912 def _real_extract(self, url):
913 ep_id = self._match_id(url)
914 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
915
916
917class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
918 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
919 _TESTS = [{
920 'url': 'https://www.bilibili.com/cheese/play/ss5918',
921 'info_dict': {
922 'id': '5918',
923 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
924 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
925 },
926 'playlist': [{
927 'info_dict': {
928 'id': '229832',
929 'ext': 'mp4',
930 'title': '1 - 课程先导片',
931 'alt_title': '视频课 · 3分41秒',
932 'uploader': '马督工',
933 'uploader_id': '316568752',
934 'episode': '课程先导片',
935 'episode_id': '229832',
936 'episode_number': 1,
937 'duration': 221,
938 'timestamp': 1695549606,
939 'upload_date': '20230924',
940 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
941 'view_count': int,
942 }
943 }],
944 'params': {'playlist_items': '1'},
945 }, {
946 'url': 'https://www.bilibili.com/cheese/play/ss5918',
947 'info_dict': {
948 'id': '5918',
949 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
950 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
951 },
952 'playlist_mincount': 5,
953 'skip': 'paid video in list',
954 }]
955
956 def _get_cheese_entries(self, season_info):
957 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
958 yield self._extract_episode(season_info, ep_id)
959
960 def _real_extract(self, url):
961 season_id = self._match_id(url)
962 season_info = self._download_season_info('season_id', season_id)
963
964 return self.playlist_result(
965 self._get_cheese_entries(season_info), season_id,
966 **traverse_obj(season_info, {
967 'title': ('title', {str}),
968 'description': ('subtitle', {str}),
969 }))
4bc15a68
RA
970
971
2b9d0216
L
972class BilibiliSpaceBaseIE(InfoExtractor):
973 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 974 first_page = fetch_page(0)
2b9d0216
L
975 metadata = get_metadata(first_page)
976
977 paged_list = InAdvancePagedList(
12f153a8 978 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
979 metadata['page_count'], metadata['page_size'])
980
981 return metadata, paged_list
982
983
984class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
985 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 986 _TESTS = [{
987 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
988 'info_dict': {
989 'id': '3985676',
990 },
991 'playlist_mincount': 178,
6f10cdcf
E
992 }, {
993 'url': 'https://space.bilibili.com/313580179/video',
994 'info_dict': {
995 'id': '313580179',
996 },
997 'playlist_mincount': 92,
6efb0711 998 }]
999
6f10cdcf
E
1000 def _extract_signature(self, playlist_id):
1001 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1002
1003 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1004 img_key = traverse_obj(
1005 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1006 sub_key = traverse_obj(
1007 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1008
1009 session_key = img_key + sub_key
1010
1011 signature_values = []
1012 for position in (
1013 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1014 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1015 57, 62, 11, 36, 20, 34, 44, 52
1016 ):
1017 char_at_position = try_call(lambda: session_key[position])
1018 if char_at_position:
1019 signature_values.append(char_at_position)
1020
1021 return ''.join(signature_values)[:32]
1022
2b9d0216
L
1023 def _real_extract(self, url):
1024 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1025 if not is_video_url:
1026 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1027 'To download audios, add a "/audio" to the URL')
1028
6f10cdcf
E
1029 signature = self._extract_signature(playlist_id)
1030
2b9d0216 1031 def fetch_page(page_idx):
6f10cdcf
E
1032 query = {
1033 'keyword': '',
1034 'mid': playlist_id,
1035 'order': 'pubdate',
1036 'order_avoided': 'true',
1037 'platform': 'web',
1038 'pn': page_idx + 1,
1039 'ps': 30,
1040 'tid': 0,
1041 'web_location': 1550101,
1042 'wts': int(time.time()),
1043 }
1044 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1045
12f153a8 1046 try:
6f10cdcf 1047 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
4cc99d7b 1048 playlist_id, note=f'Downloading page {page_idx}', query=query,
1049 headers={'referer': url})
12f153a8 1050 except ExtractorError as e:
3d2623a8 1051 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
1052 raise ExtractorError(
1053 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1054 raise
06d52c87 1055 if response['code'] in (-352, -401):
12f153a8 1056 raise ExtractorError(
06d52c87 1057 f'Request is blocked by server ({-response["code"]}), '
1058 'please add cookies, wait and try later.', expected=True)
12f153a8 1059 return response['data']
2b9d0216
L
1060
1061 def get_metadata(page_data):
1062 page_size = page_data['page']['ps']
1063 entry_count = page_data['page']['count']
1064 return {
1065 'page_count': math.ceil(entry_count / page_size),
1066 'page_size': page_size,
1067 }
6efb0711 1068
2b9d0216
L
1069 def get_entries(page_data):
1070 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1071 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 1072
2b9d0216
L
1073 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1074 return self.playlist_result(paged_list, playlist_id)
6efb0711 1075
6efb0711 1076
2b9d0216
L
1077class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1078 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1079 _TESTS = [{
6f10cdcf 1080 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 1081 'info_dict': {
6f10cdcf 1082 'id': '313580179',
2b9d0216
L
1083 },
1084 'playlist_mincount': 1,
1085 }]
1086
1087 def _real_extract(self, url):
1088 playlist_id = self._match_id(url)
1089
1090 def fetch_page(page_idx):
1091 return self._download_json(
1092 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1093 note=f'Downloading page {page_idx}',
12f153a8 1094 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
1095
1096 def get_metadata(page_data):
1097 return {
1098 'page_count': page_data['pageCount'],
1099 'page_size': page_data['pageSize'],
1100 }
1101
1102 def get_entries(page_data):
1103 for entry in page_data.get('data', []):
1104 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1105
1106 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1107 return self.playlist_result(paged_list, playlist_id)
1108
1109
9e68747f 1110class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1111 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1112 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1113 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1114
1115 def _get_uploader(self, uid, playlist_id):
1116 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1117 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1118
1119 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1120 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1121 metadata.pop('page_count', None)
1122 metadata.pop('page_size', None)
1123 return metadata, page_list
1124
1125
1126class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1127 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
1128 _TESTS = [{
1129 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1130 'info_dict': {
1131 'id': '2142762_57445',
9e68747f 1132 'title': '【完结】《底特律 变人》全结局流程解说',
1133 'description': '',
1134 'uploader': '老戴在此',
1135 'uploader_id': '2142762',
1136 'timestamp': int,
1137 'upload_date': str,
1138 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
1139 },
1140 'playlist_mincount': 31,
1141 }]
06167fbb 1142
1143 def _real_extract(self, url):
2b9d0216
L
1144 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1145 playlist_id = f'{mid}_{sid}'
1146
1147 def fetch_page(page_idx):
1148 return self._download_json(
1149 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1150 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 1151 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
1152
1153 def get_metadata(page_data):
1154 page_size = page_data['page']['page_size']
1155 entry_count = page_data['page']['total']
1156 return {
1157 'page_count': math.ceil(entry_count / page_size),
1158 'page_size': page_size,
9e68747f 1159 'uploader': self._get_uploader(mid, playlist_id),
1160 **traverse_obj(page_data, {
1161 'title': ('meta', 'name', {str}),
1162 'description': ('meta', 'description', {str}),
1163 'uploader_id': ('meta', 'mid', {str_or_none}),
1164 'timestamp': ('meta', 'ptime', {int_or_none}),
1165 'thumbnail': ('meta', 'cover', {url_or_none}),
1166 })
2b9d0216
L
1167 }
1168
1169 def get_entries(page_data):
9e68747f 1170 return self._get_entries(page_data, 'archives')
2b9d0216
L
1171
1172 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 1173 return self.playlist_result(paged_list, playlist_id, **metadata)
1174
1175
1176class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1177 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1178 _TESTS = [{
1179 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1180 'info_dict': {
1181 'id': '1958703906_547718',
1182 'title': '直播回放',
1183 'description': '直播回放',
1184 'uploader': '靡烟miya',
1185 'uploader_id': '1958703906',
1186 'timestamp': 1637985853,
1187 'upload_date': '20211127',
1188 'modified_timestamp': int,
1189 'modified_date': str,
1190 },
1191 'playlist_mincount': 513,
1192 }]
1193
1194 def _real_extract(self, url):
1195 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1196 playlist_id = f'{mid}_{sid}'
1197 playlist_meta = traverse_obj(self._download_json(
1198 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1199 ), {
1200 'title': ('data', 'meta', 'name', {str}),
1201 'description': ('data', 'meta', 'description', {str}),
1202 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1203 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1204 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1205 })
1206
1207 def fetch_page(page_idx):
1208 return self._download_json(
1209 'https://api.bilibili.com/x/series/archives',
1210 playlist_id, note=f'Downloading page {page_idx}',
1211 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1212
1213 def get_metadata(page_data):
1214 page_size = page_data['page']['size']
1215 entry_count = page_data['page']['total']
1216 return {
1217 'page_count': math.ceil(entry_count / page_size),
1218 'page_size': page_size,
1219 'uploader': self._get_uploader(mid, playlist_id),
1220 **playlist_meta
1221 }
1222
1223 def get_entries(page_data):
1224 return self._get_entries(page_data, 'archives')
1225
1226 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1227 return self.playlist_result(paged_list, playlist_id, **metadata)
1228
1229
1230class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1231 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1232 _TESTS = [{
1233 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1234 'info_dict': {
1235 'id': '1103407912',
1236 'title': '【V2】(旧)',
1237 'description': '',
1238 'uploader': '晓月春日',
1239 'uploader_id': '84912',
1240 'timestamp': 1604905176,
1241 'upload_date': '20201109',
1242 'modified_timestamp': int,
1243 'modified_date': str,
1244 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1245 'view_count': int,
1246 'like_count': int,
1247 },
1248 'playlist_mincount': 22,
1249 }, {
1250 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1251 'only_matching': True,
1252 }]
1253
1254 def _real_extract(self, url):
1255 fid = self._match_id(url)
1256
1257 list_info = self._download_json(
1258 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1259 fid, note='Downloading favlist metadata')
1260 if list_info['code'] == -403:
1261 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1262
1263 entries = self._get_entries(self._download_json(
1264 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1265 fid, note='Download favlist entries'), 'data')
1266
1267 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1268 'title': ('title', {str}),
1269 'description': ('intro', {str}),
1270 'uploader': ('upper', 'name', {str}),
1271 'uploader_id': ('upper', 'mid', {str_or_none}),
1272 'timestamp': ('ctime', {int_or_none}),
1273 'modified_timestamp': ('mtime', {int_or_none}),
1274 'thumbnail': ('cover', {url_or_none}),
1275 'view_count': ('cnt_info', 'play', {int_or_none}),
1276 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1277 })))
1278
1279
1280class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1281 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1282 _TESTS = [{
1283 'url': 'https://www.bilibili.com/watchlater/#/list',
1284 'info_dict': {'id': 'watchlater'},
1285 'playlist_mincount': 0,
1286 'skip': 'login required',
1287 }]
1288
1289 def _real_extract(self, url):
1290 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1291 watchlater_info = self._download_json(
1292 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1293 if watchlater_info['code'] == -101:
1294 self.raise_login_required(msg='You need to login to access your watchlater list')
1295 entries = self._get_entries(watchlater_info, ('data', 'list'))
1296 return self.playlist_result(entries, id=list_id, title='稍后再看')
1297
1298
1299class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1300 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1301 _TESTS = [{
1302 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1303 'info_dict': {
1304 'id': '5_547718',
1305 'title': '直播回放',
1306 'uploader': '靡烟miya',
1307 'uploader_id': '1958703906',
1308 'timestamp': 1637985853,
1309 'upload_date': '20211127',
1310 },
1311 'playlist_mincount': 513,
e439693f 1312 }, {
1313 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1314 'info_dict': {
1315 'id': 'BV1DU4y1r7tz',
1316 'ext': 'mp4',
1317 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1318 'upload_date': '20220820',
1319 'description': '',
1320 'timestamp': 1661016330,
1321 'uploader_id': '1958703906',
1322 'uploader': '靡烟miya',
1323 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1324 'duration': 9552.903,
1325 'tags': list,
1326 'comment_count': int,
1327 'view_count': int,
1328 'like_count': int,
1329 '_old_archive_ids': ['bilibili 687146339_part1'],
1330 },
1331 'params': {'noplaylist': True},
9e68747f 1332 }, {
1333 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1334 'info_dict': {
1335 'id': '5_547718',
1336 },
1337 'playlist_mincount': 513,
1338 'skip': 'redirect url',
1339 }, {
1340 'url': 'https://www.bilibili.com/list/ml1103407912',
1341 'info_dict': {
1342 'id': '3_1103407912',
1343 'title': '【V2】(旧)',
1344 'uploader': '晓月春日',
1345 'uploader_id': '84912',
1346 'timestamp': 1604905176,
1347 'upload_date': '20201109',
1348 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1349 },
1350 'playlist_mincount': 22,
1351 }, {
1352 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1353 'info_dict': {
1354 'id': '3_1103407912',
1355 },
1356 'playlist_mincount': 22,
1357 'skip': 'redirect url',
1358 }, {
1359 'url': 'https://www.bilibili.com/list/watchlater',
1360 'info_dict': {'id': 'watchlater'},
1361 'playlist_mincount': 0,
1362 'skip': 'login required',
1363 }, {
1364 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1365 'info_dict': {'id': 'watchlater'},
1366 'playlist_mincount': 0,
1367 'skip': 'login required',
1368 }]
1369
1370 def _extract_medialist(self, query, list_id):
1371 for page_num in itertools.count(1):
1372 page_data = self._download_json(
1373 'https://api.bilibili.com/x/v2/medialist/resource/list',
1374 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1375 )['data']
1376 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1377 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1378 if not page_data.get('has_more', False):
1379 break
1380
1381 def _real_extract(self, url):
1382 list_id = self._match_id(url)
e439693f 1383
1384 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1385 if not self._yes_playlist(list_id, bvid):
1386 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1387
9e68747f 1388 webpage = self._download_webpage(url, list_id)
1389 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1390 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1391 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1392 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1393 if error_code == -400 and list_id == 'watchlater':
1394 self.raise_login_required('You need to login to access your watchlater playlist')
1395 elif error_code == -403:
1396 self.raise_login_required('This is a private playlist. You need to login as its owner')
1397 elif error_code == 11010:
1398 raise ExtractorError('Playlist is no longer available', expected=True)
1399 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1400
1401 query = {
1402 'ps': 20,
1403 'with_current': False,
1404 **traverse_obj(initial_state, {
1405 'type': ('playlist', 'type', {int_or_none}),
1406 'biz_id': ('playlist', 'id', {int_or_none}),
1407 'tid': ('tid', {int_or_none}),
1408 'sort_field': ('sortFiled', {int_or_none}),
1409 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1410 })
1411 }
1412 metadata = {
1413 'id': f'{query["type"]}_{query["biz_id"]}',
1414 **traverse_obj(initial_state, ('mediaListInfo', {
1415 'title': ('title', {str}),
1416 'uploader': ('upper', 'name', {str}),
1417 'uploader_id': ('upper', 'mid', {str_or_none}),
1418 'timestamp': ('ctime', {int_or_none}),
1419 'thumbnail': ('cover', {url_or_none}),
1420 })),
1421 }
1422 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 1423
1424
c34f505b 1425class BilibiliCategoryIE(InfoExtractor):
1426 IE_NAME = 'Bilibili category extractor'
1427 _MAX_RESULTS = 1000000
9e68747f 1428 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 1429 _TESTS = [{
1430 'url': 'https://www.bilibili.com/v/kichiku/mad',
1431 'info_dict': {
1432 'id': 'kichiku: mad',
1433 'title': 'kichiku: mad'
1434 },
1435 'playlist_mincount': 45,
1436 'params': {
1437 'playlistend': 45
1438 }
1439 }]
1440
1441 def _fetch_page(self, api_url, num_pages, query, page_num):
1442 parsed_json = self._download_json(
1443 api_url, query, query={'Search_key': query, 'pn': page_num},
1444 note='Extracting results from page %s of %s' % (page_num, num_pages))
1445
f8580bf0 1446 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 1447 if not video_list:
1448 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1449
1450 for video in video_list:
1451 yield self.url_result(
1452 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1453
1454 def _entries(self, category, subcategory, query):
1455 # map of categories : subcategories : RIDs
1456 rid_map = {
1457 'kichiku': {
1458 'mad': 26,
1459 'manual_vocaloid': 126,
1460 'guide': 22,
1461 'theatre': 216,
1462 'course': 127
1463 },
1464 }
1465
1466 if category not in rid_map:
e88d44c6 1467 raise ExtractorError(
1468 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1469 if subcategory not in rid_map[category]:
e88d44c6 1470 raise ExtractorError(
1471 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1472 rid_value = rid_map[category][subcategory]
1473
1474 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1475 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1476 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1477 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1478 if count is None or not size:
1479 raise ExtractorError('Failed to calculate either page count or size')
1480
1481 num_pages = math.ceil(count / size)
1482
1483 return OnDemandPagedList(functools.partial(
1484 self._fetch_page, api_url, num_pages, query), size)
1485
1486 def _real_extract(self, url):
ad974876 1487 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1488 query = '%s: %s' % (category, subcategory)
1489
1490 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1491
1492
06167fbb 1493class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1494 IE_DESC = 'Bilibili video search'
06167fbb 1495 _MAX_RESULTS = 100000
1496 _SEARCH_KEY = 'bilisearch'
ffa017cf 1497 _TESTS = [{
1498 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1499 'playlist_count': 3,
1500 'info_dict': {
1501 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1502 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1503 },
1504 'playlist': [{
1505 'info_dict': {
1506 'id': 'BV1n44y1Q7sc',
1507 'ext': 'mp4',
1508 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1509 'timestamp': 1669889987,
1510 'upload_date': '20221201',
1511 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1512 'tags': list,
1513 'uploader': '靡烟miya',
1514 'duration': 123.156,
1515 'uploader_id': '1958703906',
1516 'comment_count': int,
1517 'view_count': int,
1518 'like_count': int,
1519 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1520 '_old_archive_ids': ['bilibili 988222410_part1'],
1521 },
1522 }],
1523 }]
06167fbb 1524
e88d44c6 1525 def _search_results(self, query):
ffa017cf 1526 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1527 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
e88d44c6 1528 for page_num in itertools.count(1):
1529 videos = self._download_json(
1530 'https://api.bilibili.com/x/web-interface/search/type', query,
1531 note=f'Extracting results from page {page_num}', query={
1532 'Search_key': query,
1533 'keyword': query,
1534 'page': page_num,
1535 'context': '',
e88d44c6 1536 'duration': 0,
1537 'tids_2': '',
1538 '__refresh__': 'true',
1539 'search_type': 'video',
1540 'tids': 0,
1541 'highlight': 1,
2d101954 1542 })['data'].get('result')
1543 if not videos:
1544 break
06167fbb 1545 for video in videos:
e88d44c6 1546 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1547
1548
4bc15a68
RA
1549class BilibiliAudioBaseIE(InfoExtractor):
1550 def _call_api(self, path, sid, query=None):
1551 if not query:
1552 query = {'sid': sid}
1553 return self._download_json(
1554 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1555 sid, query=query)['data']
1556
1557
1558class BilibiliAudioIE(BilibiliAudioBaseIE):
1559 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1560 _TEST = {
1561 'url': 'https://www.bilibili.com/audio/au1003142',
1562 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1563 'info_dict': {
1564 'id': '1003142',
1565 'ext': 'm4a',
1566 'title': '【tsukimi】YELLOW / 神山羊',
1567 'artist': 'tsukimi',
1568 'comment_count': int,
1569 'description': 'YELLOW的mp3版!',
1570 'duration': 183,
1571 'subtitles': {
1572 'origin': [{
1573 'ext': 'lrc',
1574 }],
1575 },
1576 'thumbnail': r're:^https?://.+\.jpg',
1577 'timestamp': 1564836614,
1578 'upload_date': '20190803',
1579 'uploader': 'tsukimi-つきみぐー',
1580 'view_count': int,
1581 },
1582 }
1583
1584 def _real_extract(self, url):
1585 au_id = self._match_id(url)
1586
1587 play_data = self._call_api('url', au_id)
1588 formats = [{
1589 'url': play_data['cdns'][0],
1590 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1591 'vcodec': 'none'
4bc15a68
RA
1592 }]
1593
6d1b3489 1594 for a_format in formats:
1595 a_format.setdefault('http_headers', {}).update({
1596 'Referer': url,
1597 })
1598
4bc15a68
RA
1599 song = self._call_api('song/info', au_id)
1600 title = song['title']
1601 statistic = song.get('statistic') or {}
1602
1603 subtitles = None
1604 lyric = song.get('lyric')
1605 if lyric:
1606 subtitles = {
1607 'origin': [{
1608 'url': lyric,
1609 }]
1610 }
1611
1612 return {
1613 'id': au_id,
1614 'title': title,
1615 'formats': formats,
1616 'artist': song.get('author'),
1617 'comment_count': int_or_none(statistic.get('comment')),
1618 'description': song.get('intro'),
1619 'duration': int_or_none(song.get('duration')),
1620 'subtitles': subtitles,
1621 'thumbnail': song.get('cover'),
1622 'timestamp': int_or_none(song.get('passtime')),
1623 'uploader': song.get('uname'),
1624 'view_count': int_or_none(statistic.get('play')),
1625 }
1626
1627
1628class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1629 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1630 _TEST = {
1631 'url': 'https://www.bilibili.com/audio/am10624',
1632 'info_dict': {
1633 'id': '10624',
1634 'title': '每日新曲推荐(每日11:00更新)',
1635 'description': '每天11:00更新,为你推送最新音乐',
1636 },
1637 'playlist_count': 19,
1638 }
1639
1640 def _real_extract(self, url):
1641 am_id = self._match_id(url)
1642
1643 songs = self._call_api(
1644 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1645
1646 entries = []
1647 for song in songs:
1648 sid = str_or_none(song.get('id'))
1649 if not sid:
1650 continue
1651 entries.append(self.url_result(
1652 'https://www.bilibili.com/audio/au' + sid,
1653 BilibiliAudioIE.ie_key(), sid))
1654
1655 if entries:
1656 album_data = self._call_api('menu/info', am_id) or {}
1657 album_title = album_data.get('title')
1658 if album_title:
1659 for entry in entries:
1660 entry['album'] = album_title
1661 return self.playlist_result(
1662 entries, am_id, album_title, album_data.get('intro'))
1663
1664 return self.playlist_result(entries, am_id)
63dce309
S
1665
1666
1667class BiliBiliPlayerIE(InfoExtractor):
1668 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1669 _TEST = {
1670 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1671 'only_matching': True,
1672 }
1673
1674 def _real_extract(self, url):
1675 video_id = self._match_id(url)
1676 return self.url_result(
1677 'http://www.bilibili.tv/video/av%s/' % video_id,
1678 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1679
1680
1681class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1682 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1683 _NETRC_MACHINE = 'biliintl'
1713c882 1684 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
16f7e6be 1685
c62ecf0d 1686 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1687 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1688 if json.get('code'):
1689 if json['code'] in (10004004, 10004005, 10023006):
1690 self.raise_login_required()
1691 elif json['code'] == 10004001:
1692 self.raise_geo_restricted()
1693 else:
1694 if json.get('message') and str(json['code']) != json['message']:
1695 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1696 else:
1697 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1698 if kwargs.get('fatal'):
1699 raise ExtractorError(errmsg)
1700 else:
1701 self.report_warning(errmsg)
1702 return json.get('data')
16f7e6be 1703
efc947fb 1704 def json2srt(self, json):
1705 data = '\n\n'.join(
1706 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1707 for i, line in enumerate(traverse_obj(json, (
1708 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1709 return data
1710
f5f15c99
LR
1711 def _get_subtitles(self, *, ep_id=None, aid=None):
1712 sub_json = self._call_api(
fbb888a3 1713 '/web/v2/subtitle', ep_id or aid, fatal=False,
1714 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1715 query=filter_dict({
f5f15c99 1716 'platform': 'web',
fbb888a3 1717 's_locale': 'en_US',
f5f15c99
LR
1718 'episode_id': ep_id,
1719 'aid': aid,
fbb888a3 1720 })) or {}
16f7e6be 1721 subtitles = {}
cf6413e8
H
1722 fetched_urls = set()
1723 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1724 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1725 if url in fetched_urls:
1726 continue
1727 fetched_urls.add(url)
1728 sub_ext = determine_ext(url)
1729 sub_lang = sub.get('lang_key') or 'en'
1730
1731 if sub_ext == 'ass':
1732 subtitles.setdefault(sub_lang, []).append({
1733 'ext': 'ass',
1734 'url': url,
1735 })
1736 elif sub_ext == 'json':
1737 sub_data = self._download_json(
1738 url, ep_id or aid, fatal=False,
1739 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1740 errnote='Unable to download subtitles')
1741
1742 if sub_data:
1743 subtitles.setdefault(sub_lang, []).append({
1744 'ext': 'srt',
1745 'data': self.json2srt(sub_data),
1746 })
1747 else:
1748 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1749
16f7e6be
AG
1750 return subtitles
1751
f5f15c99
LR
1752 def _get_formats(self, *, ep_id=None, aid=None):
1753 video_json = self._call_api(
1754 '/web/playurl', ep_id or aid, note='Downloading video formats',
1755 errnote='Unable to download video formats', query=filter_dict({
1756 'platform': 'web',
1757 'ep_id': ep_id,
1758 'aid': aid,
1759 }))
16f7e6be
AG
1760 video_json = video_json['playurl']
1761 formats = []
c62ecf0d 1762 for vid in video_json.get('video') or []:
16f7e6be
AG
1763 video_res = vid.get('video_resource') or {}
1764 video_info = vid.get('stream_info') or {}
1765 if not video_res.get('url'):
1766 continue
1767 formats.append({
1768 'url': video_res['url'],
1769 'ext': 'mp4',
1770 'format_note': video_info.get('desc_words'),
1771 'width': video_res.get('width'),
1772 'height': video_res.get('height'),
1773 'vbr': video_res.get('bandwidth'),
1774 'acodec': 'none',
1775 'vcodec': video_res.get('codecs'),
1776 'filesize': video_res.get('size'),
1777 })
c62ecf0d 1778 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1779 if not aud.get('url'):
1780 continue
1781 formats.append({
1782 'url': aud['url'],
1783 'ext': 'mp4',
1784 'abr': aud.get('bandwidth'),
1785 'acodec': aud.get('codecs'),
1786 'vcodec': 'none',
1787 'filesize': aud.get('size'),
1788 })
1789
16f7e6be
AG
1790 return formats
1791
26fdfc37 1792 def _parse_video_metadata(self, video_data):
16f7e6be 1793 return {
f5f15c99 1794 'title': video_data.get('title_display') or video_data.get('title'),
1713c882 1795 'description': video_data.get('desc'),
f5f15c99 1796 'thumbnail': video_data.get('cover'),
1713c882 1797 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
c62ecf0d 1798 'episode_number': int_or_none(self._search_regex(
f5f15c99 1799 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1800 }
1801
52efa4b3 1802 def _perform_login(self, username, password):
65f6e807 1803 if not Cryptodome.RSA:
f6a765ce 1804 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1805
1806 key_data = self._download_json(
1807 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1808 note='Downloading login key', errnote='Unable to download login key')['data']
1809
65f6e807 1810 public_key = Cryptodome.RSA.importKey(key_data['key'])
1811 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1812 login_post = self._download_json(
1813 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1814 'username': username,
1815 'password': base64.b64encode(password_hash).decode('ascii'),
1816 'keep_me': 'true',
1817 's_locale': 'en_US',
1818 'isTrusted': 'true'
1819 }), note='Logging in', errnote='Unable to log in')
1820 if login_post.get('code'):
1821 if login_post.get('message'):
1822 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1823 else:
1824 raise ExtractorError('Unable to log in')
1825
16f7e6be
AG
1826
1827class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1828 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1829 _TESTS = [{
cfcf60ea 1830 # Bstation page
16f7e6be
AG
1831 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1832 'info_dict': {
1833 'id': '341736',
1834 'ext': 'mp4',
c62ecf0d
M
1835 'title': 'E2 - The First Night',
1836 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1837 'episode_number': 2,
d37422f1
H
1838 'upload_date': '20201009',
1839 'episode': 'Episode 2',
1840 'timestamp': 1602259500,
1841 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1842 'chapters': [{
1843 'start_time': 0,
1844 'end_time': 76.242,
1845 'title': '<Untitled Chapter 1>'
1846 }, {
1847 'start_time': 76.242,
1848 'end_time': 161.161,
1849 'title': 'Intro'
1850 }, {
1851 'start_time': 1325.742,
1852 'end_time': 1403.903,
1853 'title': 'Outro'
1854 }],
c62ecf0d 1855 }
16f7e6be 1856 }, {
cfcf60ea 1857 # Non-Bstation page
c62ecf0d 1858 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1859 'info_dict': {
c62ecf0d 1860 'id': '11005006',
16f7e6be 1861 'ext': 'mp4',
c62ecf0d
M
1862 'title': 'E3 - Who?',
1863 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1864 'episode_number': 3,
d37422f1
H
1865 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1866 'episode': 'Episode 3',
1867 'upload_date': '20211219',
1868 'timestamp': 1639928700,
0ba87dd2
H
1869 'chapters': [{
1870 'start_time': 0,
1871 'end_time': 88.0,
1872 'title': '<Untitled Chapter 1>'
1873 }, {
1874 'start_time': 88.0,
1875 'end_time': 156.0,
1876 'title': 'Intro'
1877 }, {
1878 'start_time': 1173.0,
1879 'end_time': 1259.535,
1880 'title': 'Outro'
1881 }],
c62ecf0d 1882 }
cfcf60ea
M
1883 }, {
1884 # Subtitle with empty content
1885 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1886 'info_dict': {
1887 'id': '10131790',
1888 'ext': 'mp4',
1889 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1890 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1891 'episode_number': 140,
1892 },
1893 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
b093c38c
H
1894 }, {
1895 # episode comment extraction
1896 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1897 'info_dict': {
1898 'id': '340317',
1899 'ext': 'mp4',
1900 'timestamp': 1604057820,
1901 'upload_date': '20201030',
1902 'episode_number': 5,
1903 'title': 'E5 - My Own Steel',
1904 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1905 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1906 'episode': 'Episode 5',
1907 'comment_count': int,
1908 'chapters': [{
1909 'start_time': 0,
1910 'end_time': 61.0,
1911 'title': '<Untitled Chapter 1>'
1912 }, {
1913 'start_time': 61.0,
1914 'end_time': 134.0,
1915 'title': 'Intro'
1916 }, {
1917 'start_time': 1290.0,
1918 'end_time': 1379.0,
1919 'title': 'Outro'
1920 }],
1921 },
1922 'params': {
1923 'getcomments': True
1924 }
1925 }, {
1926 # user generated content comment extraction
1927 'url': 'https://www.bilibili.tv/en/video/2045730385',
1928 'info_dict': {
1929 'id': '2045730385',
1930 'ext': 'mp4',
1931 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1932 'timestamp': 1667891924,
1933 'upload_date': '20221108',
1713c882 1934 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
b093c38c 1935 'comment_count': int,
1713c882 1936 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
b093c38c
H
1937 },
1938 'params': {
1939 'getcomments': True
d37422f1 1940 }
0ba87dd2
H
1941 }, {
1942 # episode id without intro and outro
1943 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1944 'info_dict': {
1945 'id': '11246489',
1946 'ext': 'mp4',
1947 'title': 'E1 - Operation \'Strix\' <Owl>',
1948 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1949 'timestamp': 1649516400,
1950 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1951 'episode': 'Episode 1',
1952 'episode_number': 1,
1953 'upload_date': '20220409',
1954 },
c62ecf0d
M
1955 }, {
1956 'url': 'https://www.biliintl.com/en/play/34613/341736',
1957 'only_matching': True,
f5f15c99
LR
1958 }, {
1959 # User-generated content (as opposed to a series licensed from a studio)
1960 'url': 'https://bilibili.tv/en/video/2019955076',
1961 'only_matching': True,
1962 }, {
1963 # No language in URL
1964 'url': 'https://www.bilibili.tv/video/2019955076',
1965 'only_matching': True,
0831d95c 1966 }, {
1967 # Uppercase language in URL
1968 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1969 'only_matching': True,
16f7e6be
AG
1970 }]
1971
93240fc1 1972 @staticmethod
26fdfc37 1973 def _make_url(video_id, series_id=None):
1974 if series_id:
1975 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1976 return f'https://www.bilibili.tv/en/video/{video_id}'
1977
1978 def _extract_video_metadata(self, url, video_id, season_id):
1979 url, smuggled_data = unsmuggle_url(url, {})
1980 if smuggled_data.get('title'):
1981 return smuggled_data
1982
c62ecf0d
M
1983 webpage = self._download_webpage(url, video_id)
1984 # Bstation layout
8072ef2b 1985 initial_data = (
1986 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1987 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1988 video_data = traverse_obj(
d37422f1 1989 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1990
f5f15c99 1991 if season_id and not video_data:
c62ecf0d
M
1992 # Non-Bstation layout, read through episode list
1993 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1994 video_data = traverse_obj(season_json, (
1995 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1996 ), expected_type=dict, get_all=False)
1997
d37422f1
H
1998 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1999 return merge_dicts(
1713c882
S
2000 self._parse_video_metadata(video_data), {
2001 'title': get_element_by_class(
2002 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2003 'description': get_element_by_class(
f1570ab8 2004 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
1713c882 2005 }, self._search_json_ld(webpage, video_id, default={}))
26fdfc37 2006
b093c38c
H
2007 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2008 comment_api_raw_data = self._download_json(
2009 'https://api.bilibili.tv/reply/web/detail', display_id,
2010 note=f'Downloading reply comment of {root_id} - {next_id}',
2011 query={
2012 'platform': 'web',
2013 'ps': 20, # comment's reply per page (default: 3)
2014 'root': root_id,
2015 'next': next_id,
2016 })
2017
2018 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2019 yield {
2020 'author': traverse_obj(replies, ('member', 'name')),
2021 'author_id': traverse_obj(replies, ('member', 'mid')),
2022 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2023 'text': traverse_obj(replies, ('content', 'message')),
2024 'id': replies.get('rpid'),
2025 'like_count': int_or_none(replies.get('like_count')),
2026 'parent': replies.get('parent'),
2027 'timestamp': unified_timestamp(replies.get('ctime_text'))
2028 }
2029
2030 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2031 yield from self._get_comments_reply(
2032 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2033
2034 def _get_comments(self, video_id, ep_id):
2035 for i in itertools.count(0):
2036 comment_api_raw_data = self._download_json(
2037 'https://api.bilibili.tv/reply/web/root', video_id,
2038 note=f'Downloading comment page {i + 1}',
2039 query={
2040 'platform': 'web',
2041 'pn': i, # page number
2042 'ps': 20, # comment per page (default: 20)
2043 'oid': video_id,
2044 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2045 'sort_type': 1, # 1: best, 2: recent
2046 })
2047
2048 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2049 yield {
2050 'author': traverse_obj(replies, ('member', 'name')),
2051 'author_id': traverse_obj(replies, ('member', 'mid')),
2052 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2053 'text': traverse_obj(replies, ('content', 'message')),
2054 'id': replies.get('rpid'),
2055 'like_count': int_or_none(replies.get('like_count')),
2056 'timestamp': unified_timestamp(replies.get('ctime_text')),
2057 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2058 }
2059 if replies.get('count'):
2060 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2061
2062 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2063 break
2064
26fdfc37 2065 def _real_extract(self, url):
2066 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2067 video_id = ep_id or aid
0ba87dd2
H
2068 chapters = None
2069
2070 if ep_id:
2071 intro_ending_json = self._call_api(
2072 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2073 video_id, fatal=False) or {}
2074 if intro_ending_json.get('skip'):
2075 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2076 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2077 chapters = [{
2078 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2079 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2080 'title': 'Intro'
2081 }, {
2082 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2083 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2084 'title': 'Outro'
2085 }]
26fdfc37 2086
2087 return {
2088 'id': video_id,
2089 **self._extract_video_metadata(url, video_id, season_id),
2090 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2091 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c 2092 'chapters': chapters,
1713c882
S
2093 '__post_extractor': self.extract_comments(video_id, ep_id),
2094 'http_headers': self._HEADERS,
26fdfc37 2095 }
16f7e6be
AG
2096
2097
2098class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 2099 IE_NAME = 'biliIntl:series'
76c3cecc 2100 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
2101 _TESTS = [{
2102 'url': 'https://www.bilibili.tv/en/play/34613',
2103 'playlist_mincount': 15,
2104 'info_dict': {
2105 'id': '34613',
76c3cecc
H
2106 'title': 'TONIKAWA: Over the Moon For You',
2107 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2108 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
2109 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2110 'view_count': int,
16f7e6be
AG
2111 },
2112 'params': {
2113 'skip_download': True,
16f7e6be 2114 },
76c3cecc
H
2115 }, {
2116 'url': 'https://www.bilibili.tv/en/media/1048837',
2117 'info_dict': {
2118 'id': '1048837',
2119 'title': 'SPY×FAMILY',
2120 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2121 'categories': ['Adventure', 'Action', 'Comedy'],
2122 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2123 'view_count': int,
2124 },
2125 'playlist_mincount': 25,
16f7e6be
AG
2126 }, {
2127 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 2128 'only_matching': True,
0831d95c 2129 }, {
2130 'url': 'https://www.biliintl.com/EN/play/34613',
2131 'only_matching': True,
16f7e6be
AG
2132 }]
2133
c62ecf0d
M
2134 def _entries(self, series_id):
2135 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 2136 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2137 episode_id = str(episode['episode_id'])
2138 yield self.url_result(smuggle_url(
2139 BiliIntlIE._make_url(episode_id, series_id),
2140 self._parse_video_metadata(episode)
2141 ), BiliIntlIE, episode_id)
16f7e6be
AG
2142
2143 def _real_extract(self, url):
c62ecf0d
M
2144 series_id = self._match_id(url)
2145 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2146 return self.playlist_result(
2147 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2148 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2149 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
2150
2151
2152class BiliLiveIE(InfoExtractor):
9e68747f 2153 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
2154
2155 _TESTS = [{
2156 'url': 'https://live.bilibili.com/196',
2157 'info_dict': {
2158 'id': '33989',
2159 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2160 'ext': 'flv',
2161 'title': "太空狼人杀联动,不被爆杀就算赢",
2162 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2163 'timestamp': 1650802769,
2164 },
2165 'skip': 'not live'
2166 }, {
2167 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2168 'only_matching': True
1c226ccd 2169 }, {
2170 'url': 'https://live.bilibili.com/blanc/196',
2171 'only_matching': True
b4f53662
H
2172 }]
2173
2174 _FORMATS = {
2175 80: {'format_id': 'low', 'format_note': '流畅'},
2176 150: {'format_id': 'high_res', 'format_note': '高清'},
2177 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2178 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2179 10000: {'format_id': 'source', 'format_note': '原画'},
2180 20000: {'format_id': '4K', 'format_note': '4K'},
2181 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2182 }
2183
2184 _quality = staticmethod(qualities(list(_FORMATS)))
2185
2186 def _call_api(self, path, room_id, query):
2187 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2188 if api_result.get('code') != 0:
2189 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2190 return api_result.get('data') or {}
2191
2192 def _parse_formats(self, qn, fmt):
2193 for codec in fmt.get('codec') or []:
2194 if codec.get('current_qn') != qn:
2195 continue
2196 for url_info in codec['url_info']:
2197 yield {
2198 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2199 'ext': fmt.get('format_name'),
2200 'vcodec': codec.get('codec_name'),
2201 'quality': self._quality(qn),
2202 **self._FORMATS[qn],
2203 }
2204
2205 def _real_extract(self, url):
2206 room_id = self._match_id(url)
2207 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2208 if room_data.get('live_status') == 0:
2209 raise ExtractorError('Streamer is not live', expected=True)
2210
2211 formats = []
2212 for qn in self._FORMATS.keys():
2213 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2214 'room_id': room_id,
2215 'qn': qn,
2216 'codec': '0,1',
2217 'format': '0,2',
2218 'mask': '0',
2219 'no_playurl': '0',
2220 'platform': 'web',
2221 'protocol': '0,1',
2222 })
2223 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2224 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
2225
2226 return {
2227 'id': room_id,
2228 'title': room_data.get('title'),
2229 'description': room_data.get('description'),
2230 'thumbnail': room_data.get('user_cover'),
2231 'timestamp': stream_data.get('live_time'),
2232 'formats': formats,
ca2f6e14 2233 'is_live': True,
b4f53662
H
2234 'http_headers': {
2235 'Referer': url,
2236 },
2237 }