]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
9f09bdcf 5import json
c34f505b 6import math
5336bf57 7import re
6f10cdcf 8import time
ad974876 9import urllib.parse
ffa017cf 10import uuid
28746fbd 11
06167fbb 12from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 13from ..dependencies import Cryptodome
3d2623a8 14from ..networking.exceptions import HTTPError
28746fbd 15from ..utils import (
bd8f48c7 16 ExtractorError,
ad974876 17 GeoRestrictedError,
2b9d0216
L
18 InAdvancePagedList,
19 OnDemandPagedList,
9e68747f 20 bool_or_none,
9f09bdcf 21 clean_html,
cf6413e8 22 determine_ext,
f5f15c99 23 filter_dict,
6461f2b7 24 float_or_none,
ad974876 25 format_field,
9f09bdcf 26 get_element_by_class,
2b9d0216 27 int_or_none,
bdd0b75e 28 join_nonempty,
ad974876 29 make_archive_id,
d37422f1 30 merge_dicts,
f8580bf0 31 mimetype2ext,
2b9d0216 32 parse_count,
ad974876 33 parse_qs,
b4f53662 34 qualities,
26fdfc37 35 smuggle_url,
efc947fb 36 srt_subtitles_timecode,
4bc15a68 37 str_or_none,
2b9d0216 38 traverse_obj,
6f10cdcf 39 try_call,
b093c38c 40 unified_timestamp,
26fdfc37 41 unsmuggle_url,
c62ecf0d 42 url_or_none,
ad974876 43 urlencode_postdata,
9e68747f 44 variadic,
28746fbd
PH
45)
46
47
ad974876 48class BilibiliBaseIE(InfoExtractor):
5336bf57 49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50
ad974876
L
51 def extract_formats(self, play_info):
52 format_names = {
53 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
54 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
55 }
56
b84fda73 57 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
58 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
59 if flac_audio:
60 audios.append(flac_audio)
61 formats = [{
62 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 64 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
65 'vcodec': 'none',
66 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 67 'filesize': int_or_none(audio.get('size')),
68 'format_id': str_or_none(audio.get('id')),
ad974876
L
69 } for audio in audios]
70
71 formats.extend({
72 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video.get('width')),
76 'height': int_or_none(video.get('height')),
77 'vcodec': video.get('codecs'),
78 'acodec': 'none' if audios else None,
b84fda73 79 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
80 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
81 'filesize': int_or_none(video.get('size')),
82 'quality': int_or_none(video.get('id')),
5336bf57 83 'format_id': traverse_obj(
84 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
85 ('id', {str_or_none}), get_all=False),
ad974876
L
86 'format': format_names.get(video.get('id')),
87 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
88
89 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
90 if missing_formats:
91 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 92 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 93
ad974876
L
94 return formats
95
23388270 96 def _download_playinfo(self, video_id, cid, headers=None):
9f09bdcf 97 return self._download_json(
98 'https://api.bilibili.com/x/player/playurl', video_id,
99 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
23388270 100 note=f'Downloading video formats for cid {cid}', headers=headers)['data']
9f09bdcf 101
ad974876
L
102 def json2srt(self, json_data):
103 srt_data = ''
104 for idx, line in enumerate(json_data.get('body') or []):
105 srt_data += (f'{idx + 1}\n'
106 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f'{line["content"]}\n\n')
108 return srt_data
109
9f09bdcf 110 def _get_subtitles(self, video_id, cid, aid=None):
ad974876
L
111 subtitles = {
112 'danmaku': [{
113 'ext': 'xml',
114 'url': f'https://comment.bilibili.com/{cid}.xml',
add96eb9 115 }],
ad974876
L
116 }
117
9f09bdcf 118 subtitle_info = traverse_obj(self._download_json(
119 'https://api.bilibili.com/x/player/v2', video_id,
120 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
121 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
123 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
124 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
126 for s in subs_list:
ad974876
L
127 subtitles.setdefault(s['lan'], []).append({
128 'ext': 'srt',
add96eb9 129 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
ad974876
L
130 })
131 return subtitles
132
c90c5b9b 133 def _get_chapters(self, aid, cid):
134 chapters = aid and cid and self._download_json(
135 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
136 note='Extracting chapters', fatal=False)
137 return traverse_obj(chapters, ('data', 'view_points', ..., {
138 'title': 'content',
139 'start_time': 'from',
140 'end_time': 'to',
141 })) or None
142
ad974876
L
143 def _get_comments(self, aid):
144 for idx in itertools.count(1):
145 replies = traverse_obj(
146 self._download_json(
147 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid, note=f'Extracting comments from page {idx}', fatal=False),
149 ('data', 'replies'))
150 if not replies:
151 return
152 for children in map(self._get_all_children, replies):
153 yield from children
154
155 def _get_all_children(self, reply):
156 yield {
157 'author': traverse_obj(reply, ('member', 'uname')),
158 'author_id': traverse_obj(reply, ('member', 'mid')),
159 'id': reply.get('rpid'),
160 'text': traverse_obj(reply, ('content', 'message')),
161 'timestamp': reply.get('ctime'),
162 'parent': reply.get('parent') or 'root',
163 }
164 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
165 yield from children
166
bdd0b75e
GS
167 def _get_episodes_from_season(self, ss_id, url):
168 season_info = self._download_json(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id,
170 note='Downloading season info', query={'season_id': ss_id},
171 headers={'Referer': url, **self.geo_verification_headers()})
172
173 for entry in traverse_obj(season_info, (
174 'result', 'main_section', 'episodes',
175 lambda _, v: url_or_none(v['share_url']) and v['id'])):
9f09bdcf 176 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
177
178 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
179 cid_edges = cid_edges or {}
180 division_data = self._download_json(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
182 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
183 note=f'Extracting divisions from edge {edge_id}')
184 edges.setdefault(edge_id, {}).update(
185 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
186 'title': ('title', {str}),
187 'cid': ('cid', {int_or_none}),
188 }), get_all=False))
189
190 edges[edge_id].update(traverse_obj(division_data, ('data', {
191 'title': ('title', {str}),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}),
194 'cid': ('cid', {int_or_none}),
195 'text': ('option', {str}),
196 }),
197 })))
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
200 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
201 if choice['edge_id'] not in edges:
202 edges[choice['edge_id']] = {'cid': choice['cid']}
203 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
204 return cid_edges
205
206 def _get_interactive_entries(self, video_id, cid, metainfo):
207 graph_version = traverse_obj(
208 self._download_json(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id,
210 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
211 ('data', 'interaction', 'graph_version', {int_or_none}))
212 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
213 for cid, edges in cid_edges.items():
214 play_info = self._download_playinfo(video_id, cid)
215 yield {
216 **metainfo,
217 'id': f'{video_id}_{cid}',
add96eb9 218 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
9f09bdcf 219 'formats': self.extract_formats(play_info),
220 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info.get('timelength'), scale=1000),
222 'subtitles': self.extract_subtitles(video_id, cid),
223 }
bdd0b75e 224
ad974876
L
225
226class BiliBiliIE(BilibiliBaseIE):
9e68747f 227 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 228
bd8f48c7 229 _TESTS = [{
ad974876
L
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
231 'info_dict': {
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
234 'ext': 'mp4',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
237 'uploader': '阿滴英文',
238 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
239 'duration': 554.117,
240 'tags': list,
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
244 'like_count': int,
245 'view_count': int,
246 },
247 }, {
9f09bdcf 248 'note': 'old av URL version',
06167fbb 249 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 250 'info_dict': {
ad974876 251 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 252 'ext': 'mp4',
f8580bf0 253 'uploader': '菊子桑',
ad974876
L
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
256 'title': '【金坷垃】金泡沫',
257 'duration': 308.36,
f8580bf0 258 'upload_date': '20140420',
ad974876 259 'timestamp': 1397983878,
6461f2b7 260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
261 'like_count': int,
262 'comment_count': int,
263 'view_count': int,
264 'tags': list,
265 },
c90c5b9b 266 'params': {'skip_download': True},
bd8f48c7 267 }, {
ad974876
L
268 'note': 'Anthology',
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
270 'info_dict': {
271 'id': 'BV1bK411W797',
add96eb9 272 'title': '物语中的人物是如何吐槽自己的OP的',
ad974876
L
273 },
274 'playlist_count': 18,
275 'playlist': [{
276 'info_dict': {
277 'id': 'BV1bK411W797_p1',
278 'ext': 'mp4',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 280 'tags': 'count:10',
ad974876
L
281 'timestamp': 1589601697,
282 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
285 'like_count': int,
286 'comment_count': int,
287 'upload_date': '20200516',
288 'view_count': int,
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
290 'duration': 90.314,
add96eb9 291 },
292 }],
06167fbb 293 }, {
ad974876
L
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
296 'info_dict': {
297 'id': 'BV1bK411W797_p1',
298 'ext': 'mp4',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
9f09bdcf 300 'tags': 'count:10',
ad974876
L
301 'timestamp': 1589601697,
302 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
305 'like_count': int,
306 'comment_count': int,
307 'upload_date': '20200516',
308 'view_count': int,
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
310 'duration': 90.314,
add96eb9 311 },
bd8f48c7 312 }, {
ad974876
L
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 315 'info_dict': {
ad974876 316 'id': 'BV12N4y1M7rh',
bd8f48c7 317 'ext': 'mp4',
c90c5b9b 318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
319 'tags': list,
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
321 'duration': 313.557,
322 'upload_date': '20220709',
9e68747f 323 'uploader': '小夫太渴',
ad974876
L
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
327 'view_count': int,
328 'like_count': int,
329 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
add96eb9 330 'subtitles': 'count:2',
bd8f48c7 331 },
ad974876 332 'params': {'listsubtitles': True},
ca270371 333 }, {
ad974876 334 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 335 'info_dict': {
ad974876 336 'id': 'BV13x41117TL',
f8580bf0 337 'ext': 'mp4',
ca270371 338 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 339 'upload_date': '20170301',
c90c5b9b 340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 341 'timestamp': 1488353834,
f8580bf0 342 'uploader_id': '65880958',
343 'uploader': '阿滴英文',
ad974876 344 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 345 'duration': 554.117,
ad974876
L
346 'tags': list,
347 'comment_count': int,
348 'view_count': int,
349 'like_count': int,
89fabf11
JN
350 },
351 'params': {
352 'skip_download': True,
353 },
c90c5b9b 354 }, {
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
357 'info_dict': {
358 'id': 'BV1vL411G7N7',
359 'ext': 'mp4',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
364 'tags': list,
365 'uploader': '爱喝咖啡的当麻',
366 'duration': 669.482,
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
370 'view_count': int,
371 'like_count': int,
372 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
373 },
374 'params': {'skip_download': True},
ab29e470 375 }, {
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
378 'info_dict': {
379 'id': 'BV1wP4y1P72h',
380 'ext': 'mp4',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
386 'duration': 246.719,
387 'uploader_id': '528182630',
388 'view_count': int,
389 'like_count': int,
390 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
391 },
392 'params': {'skip_download': True},
393 }, {
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
396 'info_dict': {
397 'id': 'BV1ay4y1d77f',
398 'ext': 'mp4',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
403 'uploader': '果蝇轰',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
406 'view_count': int,
407 'like_count': int,
408 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
409 },
410 'params': {'skip_download': True},
9f09bdcf 411 }, {
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
414 'info_dict': {
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
420 'tags': list,
421 'uploader': '钉宫妮妮Ninico',
422 'duration': 1503,
423 'uploader_id': '8881297',
424 'comment_count': int,
425 'view_count': int,
426 'like_count': int,
427 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
428 },
429 'playlist_count': 33,
430 'playlist': [{
431 'info_dict': {
432 'id': 'BV1af4y1H7ga_400950101',
433 'ext': 'mp4',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
438 'tags': list,
439 'uploader': '钉宫妮妮Ninico',
440 'duration': 11.605,
441 'uploader_id': '8881297',
442 'comment_count': int,
443 'view_count': int,
444 'like_count': int,
445 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
446 },
447 }],
448 }, {
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
451 'info_dict': {
452 'id': '288525',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
454 'ext': 'mp4',
455 'series': '我和我的祖国',
456 'series_id': '4780',
457 'season': '幕后纪实',
458 'season_id': '28609',
459 'season_number': 1,
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
467 },
468 }, {
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
470 'info_dict': {
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
473 'ext': 'mp4',
474 },
475 'skip': 'supporter-only video',
476 }, {
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
478 'info_dict': {
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
481 'ext': 'mp4',
482 },
483 'skip': 'login required',
484 }, {
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
486 'info_dict': {
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
489 'ext': 'mp4',
490 },
491 'skip': 'geo-restricted',
bd8f48c7 492 }]
28746fbd 493
520e7533 494 def _real_extract(self, url):
ad974876 495 video_id = self._match_id(url)
23388270 496 headers = self.geo_verification_headers()
497 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
9f09bdcf 498 if not self._match_valid_url(urlh.url):
499 return self.url_result(urlh.url)
500
c90c5b9b 501 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 502
ab29e470 503 is_festival = 'videoData' not in initial_state
504 if is_festival:
505 video_data = initial_state['videoInfo']
506 else:
9f09bdcf 507 play_info_obj = self._search_json(
508 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
509 if not play_info_obj:
510 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
511 self.raise_login_required()
512 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
513 raise ExtractorError(
514 'This video may be deleted or geo-restricted. '
515 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
516 play_info = traverse_obj(play_info_obj, ('data', {dict}))
517 if not play_info:
518 if traverse_obj(play_info_obj, 'code') == 87007:
519 toast = get_element_by_class('tips-toast', webpage) or ''
520 msg = clean_html(
521 f'{get_element_by_class("belongs-to", toast) or ""},'
522 + (get_element_by_class('level', toast) or ''))
523 raise ExtractorError(
524 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
525 raise ExtractorError('Failed to extract play info')
ab29e470 526 video_data = initial_state['videoData']
527
ad974876 528 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 529
adc74b3c 530 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 531 page_list_json = not is_festival and traverse_obj(
ad974876
L
532 self._download_json(
533 'https://api.bilibili.com/x/player/pagelist', video_id,
534 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
23388270 535 note='Extracting videos in anthology', headers=headers),
ad974876
L
536 'data', expected_type=list) or []
537 is_anthology = len(page_list_json) > 1
538
539 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
540 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
541 return self.playlist_from_matches(
542 page_list_json, video_id, title, ie=BiliBiliIE,
543 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 544
ad974876 545 if is_anthology:
f74371a9 546 part_id = part_id or 1
547 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 548
ad974876
L
549 aid = video_data.get('aid')
550 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 551
c90c5b9b 552 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
553
ab29e470 554 festival_info = {}
555 if is_festival:
23388270 556 play_info = self._download_playinfo(video_id, cid, headers=headers)
ab29e470 557
558 festival_info = traverse_obj(initial_state, {
559 'uploader': ('videoInfo', 'upName'),
560 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
561 'like_count': ('videoStatus', 'like', {int_or_none}),
562 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
563 }, get_all=False)
564
9f09bdcf 565 metainfo = {
ab29e470 566 **traverse_obj(initial_state, {
567 'uploader': ('upData', 'name'),
568 'uploader_id': ('upData', 'mid', {str_or_none}),
569 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
570 'tags': ('tags', ..., 'tag_name'),
571 'thumbnail': ('videoData', 'pic', {url_or_none}),
572 }),
573 **festival_info,
574 **traverse_obj(video_data, {
575 'description': 'desc',
576 'timestamp': ('pubdate', {int_or_none}),
577 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
578 'comment_count': ('stat', 'reply', {int_or_none}),
579 }, get_all=False),
ad974876 580 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
ad974876 581 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 582 'title': title,
c90c5b9b 583 'http_headers': {'Referer': url},
06167fbb 584 }
277d6ff5 585
9f09bdcf 586 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
587 if is_interactive:
588 return self.playlist_result(
add96eb9 589 self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
590 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
591 __post_extractor=self.extract_comments(aid))
9f09bdcf 592 else:
593 return {
594 **metainfo,
595 'duration': float_or_none(play_info.get('timelength'), scale=1000),
596 'chapters': self._get_chapters(aid, cid),
597 'subtitles': self.extract_subtitles(video_id, cid),
598 'formats': self.extract_formats(play_info),
599 '__post_extractor': self.extract_comments(aid),
600 }
601
06167fbb 602
ad974876 603class BiliBiliBangumiIE(BilibiliBaseIE):
9f09bdcf 604 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
e88d44c6 605
ad974876 606 _TESTS = [{
9f09bdcf 607 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
608 'info_dict': {
609 'id': '21495',
610 'ext': 'mp4',
611 'series': '悠久之翼',
612 'series_id': '774',
613 'season': '第二季',
614 'season_id': '1182',
615 'season_number': 2,
616 'episode': 'forever/ef',
617 'episode_id': '21495',
618 'episode_number': 12,
619 'title': '12 forever/ef',
620 'duration': 1420.791,
621 'timestamp': 1320412200,
622 'upload_date': '20111104',
623 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
624 },
625 }, {
bdd0b75e 626 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 627 'info_dict': {
bdd0b75e 628 'id': '267851',
ad974876 629 'ext': 'mp4',
bdd0b75e
GS
630 'series': '鬼灭之刃',
631 'series_id': '4358',
9f09bdcf 632 'season': '立志篇',
bdd0b75e 633 'season_id': '26801',
ad974876 634 'season_number': 1,
bdd0b75e
GS
635 'episode': '残酷',
636 'episode_id': '267851',
637 'episode_number': 1,
638 'title': '1 残酷',
639 'duration': 1425.256,
640 'timestamp': 1554566400,
641 'upload_date': '20190406',
add96eb9 642 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
ad974876 643 },
9f09bdcf 644 'skip': 'Geo-restricted',
645 }, {
646 'note': 'a making-of which falls outside main section',
647 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
648 'info_dict': {
649 'id': '345120',
650 'ext': 'mp4',
651 'series': '鬼灭之刃',
652 'series_id': '4358',
653 'season': '立志篇',
654 'season_id': '26801',
655 'season_number': 1,
656 'episode': '炭治郎篇',
657 'episode_id': '345120',
658 'episode_number': 27,
659 'title': '#1 炭治郎篇',
660 'duration': 1922.129,
661 'timestamp': 1602853860,
662 'upload_date': '20201016',
add96eb9 663 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
9f09bdcf 664 },
ad974876 665 }]
06167fbb 666
ad974876 667 def _real_extract(self, url):
9f09bdcf 668 episode_id = self._match_id(url)
23388270 669 headers = self.geo_verification_headers()
670 webpage = self._download_webpage(url, episode_id, headers=headers)
e88d44c6 671
ad974876
L
672 if '您所在的地区无法观看本片' in webpage:
673 raise GeoRestrictedError('This video is restricted')
bdd0b75e 674 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 675 self.raise_login_required('This video is for premium members only')
6461f2b7 676
23388270 677 headers['Referer'] = url
bdd0b75e 678 play_info = self._download_json(
9f09bdcf 679 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
bdd0b75e
GS
680 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
681 headers=headers)
682 premium_only = play_info.get('code') == -10403
683 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
684
ad974876 685 formats = self.extract_formats(play_info)
bdd0b75e 686 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 687 self.raise_login_required('This video is for premium members only')
bd8f48c7 688
bdd0b75e 689 bangumi_info = self._download_json(
9f09bdcf 690 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
bdd0b75e
GS
691 query={'ep_id': episode_id}, headers=headers)['result']
692
693 episode_number, episode_info = next((
694 (idx, ep) for idx, ep in enumerate(traverse_obj(
9f09bdcf 695 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
bdd0b75e 696 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 697
bdd0b75e 698 season_id = bangumi_info.get('season_id')
9f09bdcf 699 season_number, season_title = season_id and next((
700 (idx + 1, e.get('season_title')) for idx, e in enumerate(
bdd0b75e 701 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 702 if e.get('season_id') == season_id
9f09bdcf 703 ), (None, None))
06167fbb 704
bdd0b75e
GS
705 aid = episode_info.get('aid')
706
e88d44c6 707 return {
9f09bdcf 708 'id': episode_id,
ad974876 709 'formats': formats,
bdd0b75e
GS
710 **traverse_obj(bangumi_info, {
711 'series': ('series', 'series_title', {str}),
712 'series_id': ('series', 'series_id', {str_or_none}),
713 'thumbnail': ('square_cover', {url_or_none}),
714 }),
9f09bdcf 715 **traverse_obj(episode_info, {
716 'episode': ('long_title', {str}),
717 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
718 'timestamp': ('pub_time', {int_or_none}),
719 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
720 }),
bdd0b75e 721 'episode_id': episode_id,
9f09bdcf 722 'season': str_or_none(season_title),
bdd0b75e 723 'season_id': str_or_none(season_id),
c90c5b9b 724 'season_number': season_number,
c90c5b9b 725 'duration': float_or_none(play_info.get('timelength'), scale=1000),
9f09bdcf 726 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
bdd0b75e 727 '__post_extractor': self.extract_comments(aid),
23388270 728 'http_headers': {'Referer': url},
e88d44c6 729 }
bd8f48c7 730
bd8f48c7 731
bdd0b75e 732class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 733 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 734 _TESTS = [{
ad974876 735 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 736 'info_dict': {
ad974876 737 'id': '24097891',
9f09bdcf 738 'title': 'CAROLE & TUESDAY',
739 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
bd8f48c7 740 },
ad974876 741 'playlist_mincount': 25,
9f09bdcf 742 }, {
743 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
744 'info_dict': {
745 'id': '1565',
746 'title': '攻壳机动队 S.A.C. 2nd GIG',
747 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
748 },
749 'playlist_count': 26,
750 'playlist': [{
751 'info_dict': {
752 'id': '68540',
753 'ext': 'mp4',
754 'series': '攻壳机动队',
755 'series_id': '1077',
756 'season': '第二季',
757 'season_id': '1565',
758 'season_number': 2,
759 'episode': '再启动 REEMBODY',
760 'episode_id': '68540',
761 'episode_number': 1,
762 'title': '1 再启动 REEMBODY',
763 'duration': 1525.777,
764 'timestamp': 1425074413,
765 'upload_date': '20150227',
add96eb9 766 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
9f09bdcf 767 },
768 }],
bd8f48c7
YCH
769 }]
770
bd8f48c7 771 def _real_extract(self, url):
ad974876
L
772 media_id = self._match_id(url)
773 webpage = self._download_webpage(url, media_id)
bdd0b75e 774
9f09bdcf 775 initial_state = self._search_json(
776 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
777 ss_id = initial_state['mediaInfo']['season_id']
778
779 return self.playlist_result(
780 self._get_episodes_from_season(ss_id, url), media_id,
781 **traverse_obj(initial_state, ('mediaInfo', {
782 'title': ('title', {str}),
783 'description': ('evaluate', {str}),
784 })))
bdd0b75e 785
bd8f48c7 786
bdd0b75e 787class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 788 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
789 _TESTS = [{
790 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
791 'info_dict': {
9f09bdcf 792 'id': '26801',
793 'title': '鬼灭之刃',
794 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
bdd0b75e 795 },
add96eb9 796 'playlist_mincount': 26,
9f09bdcf 797 }, {
798 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
799 'info_dict': {
800 'id': '2251',
801 'title': '玲音',
802 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
803 },
804 'playlist_count': 13,
805 'playlist': [{
806 'info_dict': {
807 'id': '50188',
808 'ext': 'mp4',
809 'series': '玲音',
810 'series_id': '1526',
811 'season': 'TV',
812 'season_id': '2251',
813 'season_number': 1,
814 'episode': 'WEIRD',
815 'episode_id': '50188',
816 'episode_number': 1,
817 'title': '1 WEIRD',
818 'duration': 1436.992,
819 'timestamp': 1343185080,
820 'upload_date': '20120725',
add96eb9 821 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
9f09bdcf 822 },
823 }],
bdd0b75e
GS
824 }]
825
826 def _real_extract(self, url):
827 ss_id = self._match_id(url)
9f09bdcf 828 webpage = self._download_webpage(url, ss_id)
829 metainfo = traverse_obj(
830 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
831 ('itemListElement', ..., {
832 'title': ('name', {str}),
833 'description': ('description', {str}),
834 }), get_all=False)
835
836 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
837
838
839class BilibiliCheeseBaseIE(BilibiliBaseIE):
840 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
841
842 def _extract_episode(self, season_info, ep_id):
843 episode_info = traverse_obj(season_info, (
844 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
845 aid, cid = episode_info['aid'], episode_info['cid']
846
847 if traverse_obj(episode_info, 'ep_status') == -1:
848 raise ExtractorError('This course episode is not yet available.', expected=True)
849 if not traverse_obj(episode_info, 'playable'):
850 self.raise_login_required('You need to purchase the course to download this episode')
851
852 play_info = self._download_json(
853 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
854 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
855 headers=self._HEADERS, note='Downloading playinfo')['data']
856
857 return {
858 'id': str_or_none(ep_id),
859 'episode_id': str_or_none(ep_id),
860 'formats': self.extract_formats(play_info),
861 'extractor_key': BilibiliCheeseIE.ie_key(),
862 'extractor': BilibiliCheeseIE.IE_NAME,
863 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
864 **traverse_obj(episode_info, {
865 'episode': ('title', {str}),
866 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
867 'alt_title': ('subtitle', {str}),
868 'duration': ('duration', {int_or_none}),
869 'episode_number': ('index', {int_or_none}),
870 'thumbnail': ('cover', {url_or_none}),
871 'timestamp': ('release_date', {int_or_none}),
872 'view_count': ('play', {int_or_none}),
873 }),
874 **traverse_obj(season_info, {
875 'uploader': ('up_info', 'uname', {str}),
876 'uploader_id': ('up_info', 'mid', {str_or_none}),
877 }),
878 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
879 '__post_extractor': self.extract_comments(aid),
880 'http_headers': self._HEADERS,
881 }
882
883 def _download_season_info(self, query_key, video_id):
884 return self._download_json(
885 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
886 headers=self._HEADERS, note='Downloading season info')['data']
bd8f48c7 887
9f09bdcf 888
889class BilibiliCheeseIE(BilibiliCheeseBaseIE):
890 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
891 _TESTS = [{
892 'url': 'https://www.bilibili.com/cheese/play/ep229832',
893 'info_dict': {
894 'id': '229832',
895 'ext': 'mp4',
896 'title': '1 - 课程先导片',
897 'alt_title': '视频课 · 3分41秒',
898 'uploader': '马督工',
899 'uploader_id': '316568752',
900 'episode': '课程先导片',
901 'episode_id': '229832',
902 'episode_number': 1,
903 'duration': 221,
904 'timestamp': 1695549606,
905 'upload_date': '20230924',
906 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
907 'view_count': int,
add96eb9 908 },
9f09bdcf 909 }]
910
911 def _real_extract(self, url):
912 ep_id = self._match_id(url)
913 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
914
915
916class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
917 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
918 _TESTS = [{
919 'url': 'https://www.bilibili.com/cheese/play/ss5918',
920 'info_dict': {
921 'id': '5918',
922 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
923 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
924 },
925 'playlist': [{
926 'info_dict': {
927 'id': '229832',
928 'ext': 'mp4',
929 'title': '1 - 课程先导片',
930 'alt_title': '视频课 · 3分41秒',
931 'uploader': '马督工',
932 'uploader_id': '316568752',
933 'episode': '课程先导片',
934 'episode_id': '229832',
935 'episode_number': 1,
936 'duration': 221,
937 'timestamp': 1695549606,
938 'upload_date': '20230924',
939 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
940 'view_count': int,
add96eb9 941 },
9f09bdcf 942 }],
943 'params': {'playlist_items': '1'},
944 }, {
945 'url': 'https://www.bilibili.com/cheese/play/ss5918',
946 'info_dict': {
947 'id': '5918',
948 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
949 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
950 },
951 'playlist_mincount': 5,
952 'skip': 'paid video in list',
953 }]
954
955 def _get_cheese_entries(self, season_info):
956 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
957 yield self._extract_episode(season_info, ep_id)
958
959 def _real_extract(self, url):
960 season_id = self._match_id(url)
961 season_info = self._download_season_info('season_id', season_id)
962
963 return self.playlist_result(
964 self._get_cheese_entries(season_info), season_id,
965 **traverse_obj(season_info, {
966 'title': ('title', {str}),
967 'description': ('subtitle', {str}),
968 }))
4bc15a68
RA
969
970
2b9d0216
L
971class BilibiliSpaceBaseIE(InfoExtractor):
972 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 973 first_page = fetch_page(0)
2b9d0216
L
974 metadata = get_metadata(first_page)
975
976 paged_list = InAdvancePagedList(
12f153a8 977 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
978 metadata['page_count'], metadata['page_size'])
979
980 return metadata, paged_list
981
982
983class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
984 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 985 _TESTS = [{
986 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
987 'info_dict': {
988 'id': '3985676',
989 },
990 'playlist_mincount': 178,
6f10cdcf
E
991 }, {
992 'url': 'https://space.bilibili.com/313580179/video',
993 'info_dict': {
994 'id': '313580179',
995 },
996 'playlist_mincount': 92,
6efb0711 997 }]
998
6f10cdcf
E
999 def _extract_signature(self, playlist_id):
1000 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1001
1002 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1003 img_key = traverse_obj(
1004 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1005 sub_key = traverse_obj(
1006 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1007
1008 session_key = img_key + sub_key
1009
1010 signature_values = []
1011 for position in (
1012 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1013 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
add96eb9 1014 57, 62, 11, 36, 20, 34, 44, 52,
6f10cdcf
E
1015 ):
1016 char_at_position = try_call(lambda: session_key[position])
1017 if char_at_position:
1018 signature_values.append(char_at_position)
1019
1020 return ''.join(signature_values)[:32]
1021
2b9d0216
L
1022 def _real_extract(self, url):
1023 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1024 if not is_video_url:
1025 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1026 'To download audios, add a "/audio" to the URL')
1027
6f10cdcf
E
1028 signature = self._extract_signature(playlist_id)
1029
2b9d0216 1030 def fetch_page(page_idx):
6f10cdcf
E
1031 query = {
1032 'keyword': '',
1033 'mid': playlist_id,
1034 'order': 'pubdate',
1035 'order_avoided': 'true',
1036 'platform': 'web',
1037 'pn': page_idx + 1,
1038 'ps': 30,
1039 'tid': 0,
1040 'web_location': 1550101,
1041 'wts': int(time.time()),
1042 }
1043 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1044
12f153a8 1045 try:
6f10cdcf 1046 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
4cc99d7b 1047 playlist_id, note=f'Downloading page {page_idx}', query=query,
1048 headers={'referer': url})
12f153a8 1049 except ExtractorError as e:
3d2623a8 1050 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
1051 raise ExtractorError(
1052 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1053 raise
06d52c87 1054 if response['code'] in (-352, -401):
12f153a8 1055 raise ExtractorError(
06d52c87 1056 f'Request is blocked by server ({-response["code"]}), '
1057 'please add cookies, wait and try later.', expected=True)
12f153a8 1058 return response['data']
2b9d0216
L
1059
1060 def get_metadata(page_data):
1061 page_size = page_data['page']['ps']
1062 entry_count = page_data['page']['count']
1063 return {
1064 'page_count': math.ceil(entry_count / page_size),
1065 'page_size': page_size,
1066 }
6efb0711 1067
2b9d0216
L
1068 def get_entries(page_data):
1069 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1070 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 1071
2b9d0216
L
1072 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1073 return self.playlist_result(paged_list, playlist_id)
6efb0711 1074
6efb0711 1075
2b9d0216
L
1076class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1077 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1078 _TESTS = [{
6f10cdcf 1079 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 1080 'info_dict': {
6f10cdcf 1081 'id': '313580179',
2b9d0216
L
1082 },
1083 'playlist_mincount': 1,
1084 }]
1085
1086 def _real_extract(self, url):
1087 playlist_id = self._match_id(url)
1088
1089 def fetch_page(page_idx):
1090 return self._download_json(
1091 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1092 note=f'Downloading page {page_idx}',
12f153a8 1093 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
1094
1095 def get_metadata(page_data):
1096 return {
1097 'page_count': page_data['pageCount'],
1098 'page_size': page_data['pageSize'],
1099 }
1100
1101 def get_entries(page_data):
1102 for entry in page_data.get('data', []):
1103 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1104
1105 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1106 return self.playlist_result(paged_list, playlist_id)
1107
1108
9e68747f 1109class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1110 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1111 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1112 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1113
1114 def _get_uploader(self, uid, playlist_id):
1115 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1116 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1117
1118 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1119 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1120 metadata.pop('page_count', None)
1121 metadata.pop('page_size', None)
1122 return metadata, page_list
1123
1124
1125class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1126 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
1127 _TESTS = [{
1128 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1129 'info_dict': {
1130 'id': '2142762_57445',
9e68747f 1131 'title': '【完结】《底特律 变人》全结局流程解说',
1132 'description': '',
1133 'uploader': '老戴在此',
1134 'uploader_id': '2142762',
1135 'timestamp': int,
1136 'upload_date': str,
1137 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
1138 },
1139 'playlist_mincount': 31,
1140 }]
06167fbb 1141
1142 def _real_extract(self, url):
2b9d0216
L
1143 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1144 playlist_id = f'{mid}_{sid}'
1145
1146 def fetch_page(page_idx):
1147 return self._download_json(
1148 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1149 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 1150 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
1151
1152 def get_metadata(page_data):
1153 page_size = page_data['page']['page_size']
1154 entry_count = page_data['page']['total']
1155 return {
1156 'page_count': math.ceil(entry_count / page_size),
1157 'page_size': page_size,
9e68747f 1158 'uploader': self._get_uploader(mid, playlist_id),
1159 **traverse_obj(page_data, {
1160 'title': ('meta', 'name', {str}),
1161 'description': ('meta', 'description', {str}),
1162 'uploader_id': ('meta', 'mid', {str_or_none}),
1163 'timestamp': ('meta', 'ptime', {int_or_none}),
1164 'thumbnail': ('meta', 'cover', {url_or_none}),
add96eb9 1165 }),
2b9d0216
L
1166 }
1167
1168 def get_entries(page_data):
9e68747f 1169 return self._get_entries(page_data, 'archives')
2b9d0216
L
1170
1171 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 1172 return self.playlist_result(paged_list, playlist_id, **metadata)
1173
1174
1175class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1176 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1177 _TESTS = [{
1178 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1179 'info_dict': {
1180 'id': '1958703906_547718',
1181 'title': '直播回放',
1182 'description': '直播回放',
1183 'uploader': '靡烟miya',
1184 'uploader_id': '1958703906',
1185 'timestamp': 1637985853,
1186 'upload_date': '20211127',
1187 'modified_timestamp': int,
1188 'modified_date': str,
1189 },
1190 'playlist_mincount': 513,
1191 }]
1192
1193 def _real_extract(self, url):
1194 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1195 playlist_id = f'{mid}_{sid}'
1196 playlist_meta = traverse_obj(self._download_json(
add96eb9 1197 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
9e68747f 1198 ), {
1199 'title': ('data', 'meta', 'name', {str}),
1200 'description': ('data', 'meta', 'description', {str}),
1201 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1202 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1203 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1204 })
1205
1206 def fetch_page(page_idx):
1207 return self._download_json(
1208 'https://api.bilibili.com/x/series/archives',
1209 playlist_id, note=f'Downloading page {page_idx}',
1210 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1211
1212 def get_metadata(page_data):
1213 page_size = page_data['page']['size']
1214 entry_count = page_data['page']['total']
1215 return {
1216 'page_count': math.ceil(entry_count / page_size),
1217 'page_size': page_size,
1218 'uploader': self._get_uploader(mid, playlist_id),
add96eb9 1219 **playlist_meta,
9e68747f 1220 }
1221
1222 def get_entries(page_data):
1223 return self._get_entries(page_data, 'archives')
1224
1225 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1226 return self.playlist_result(paged_list, playlist_id, **metadata)
1227
1228
1229class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1230 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1231 _TESTS = [{
1232 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1233 'info_dict': {
1234 'id': '1103407912',
1235 'title': '【V2】(旧)',
1236 'description': '',
1237 'uploader': '晓月春日',
1238 'uploader_id': '84912',
1239 'timestamp': 1604905176,
1240 'upload_date': '20201109',
1241 'modified_timestamp': int,
1242 'modified_date': str,
add96eb9 1243 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
9e68747f 1244 'view_count': int,
1245 'like_count': int,
1246 },
1247 'playlist_mincount': 22,
1248 }, {
1249 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1250 'only_matching': True,
1251 }]
1252
1253 def _real_extract(self, url):
1254 fid = self._match_id(url)
1255
1256 list_info = self._download_json(
1257 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1258 fid, note='Downloading favlist metadata')
1259 if list_info['code'] == -403:
1260 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1261
1262 entries = self._get_entries(self._download_json(
1263 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1264 fid, note='Download favlist entries'), 'data')
1265
1266 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1267 'title': ('title', {str}),
1268 'description': ('intro', {str}),
1269 'uploader': ('upper', 'name', {str}),
1270 'uploader_id': ('upper', 'mid', {str_or_none}),
1271 'timestamp': ('ctime', {int_or_none}),
1272 'modified_timestamp': ('mtime', {int_or_none}),
1273 'thumbnail': ('cover', {url_or_none}),
1274 'view_count': ('cnt_info', 'play', {int_or_none}),
1275 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1276 })))
1277
1278
1279class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1280 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1281 _TESTS = [{
1282 'url': 'https://www.bilibili.com/watchlater/#/list',
1283 'info_dict': {'id': 'watchlater'},
1284 'playlist_mincount': 0,
1285 'skip': 'login required',
1286 }]
1287
1288 def _real_extract(self, url):
1289 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1290 watchlater_info = self._download_json(
1291 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1292 if watchlater_info['code'] == -101:
1293 self.raise_login_required(msg='You need to login to access your watchlater list')
1294 entries = self._get_entries(watchlater_info, ('data', 'list'))
1295 return self.playlist_result(entries, id=list_id, title='稍后再看')
1296
1297
1298class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1299 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1300 _TESTS = [{
1301 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1302 'info_dict': {
1303 'id': '5_547718',
1304 'title': '直播回放',
1305 'uploader': '靡烟miya',
1306 'uploader_id': '1958703906',
1307 'timestamp': 1637985853,
1308 'upload_date': '20211127',
1309 },
1310 'playlist_mincount': 513,
e439693f 1311 }, {
1312 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1313 'info_dict': {
1314 'id': 'BV1DU4y1r7tz',
1315 'ext': 'mp4',
1316 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1317 'upload_date': '20220820',
1318 'description': '',
1319 'timestamp': 1661016330,
1320 'uploader_id': '1958703906',
1321 'uploader': '靡烟miya',
1322 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1323 'duration': 9552.903,
1324 'tags': list,
1325 'comment_count': int,
1326 'view_count': int,
1327 'like_count': int,
1328 '_old_archive_ids': ['bilibili 687146339_part1'],
1329 },
1330 'params': {'noplaylist': True},
9e68747f 1331 }, {
1332 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1333 'info_dict': {
1334 'id': '5_547718',
1335 },
1336 'playlist_mincount': 513,
1337 'skip': 'redirect url',
1338 }, {
1339 'url': 'https://www.bilibili.com/list/ml1103407912',
1340 'info_dict': {
1341 'id': '3_1103407912',
1342 'title': '【V2】(旧)',
1343 'uploader': '晓月春日',
1344 'uploader_id': '84912',
1345 'timestamp': 1604905176,
1346 'upload_date': '20201109',
add96eb9 1347 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
9e68747f 1348 },
1349 'playlist_mincount': 22,
1350 }, {
1351 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1352 'info_dict': {
1353 'id': '3_1103407912',
1354 },
1355 'playlist_mincount': 22,
1356 'skip': 'redirect url',
1357 }, {
1358 'url': 'https://www.bilibili.com/list/watchlater',
1359 'info_dict': {'id': 'watchlater'},
1360 'playlist_mincount': 0,
1361 'skip': 'login required',
1362 }, {
1363 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1364 'info_dict': {'id': 'watchlater'},
1365 'playlist_mincount': 0,
1366 'skip': 'login required',
1367 }]
1368
1369 def _extract_medialist(self, query, list_id):
1370 for page_num in itertools.count(1):
1371 page_data = self._download_json(
1372 'https://api.bilibili.com/x/v2/medialist/resource/list',
add96eb9 1373 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
9e68747f 1374 )['data']
1375 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1376 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1377 if not page_data.get('has_more', False):
1378 break
1379
1380 def _real_extract(self, url):
1381 list_id = self._match_id(url)
e439693f 1382
1383 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1384 if not self._yes_playlist(list_id, bvid):
1385 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1386
9e68747f 1387 webpage = self._download_webpage(url, list_id)
1388 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1389 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1390 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1391 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1392 if error_code == -400 and list_id == 'watchlater':
1393 self.raise_login_required('You need to login to access your watchlater playlist')
1394 elif error_code == -403:
1395 self.raise_login_required('This is a private playlist. You need to login as its owner')
1396 elif error_code == 11010:
1397 raise ExtractorError('Playlist is no longer available', expected=True)
1398 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1399
1400 query = {
1401 'ps': 20,
1402 'with_current': False,
1403 **traverse_obj(initial_state, {
1404 'type': ('playlist', 'type', {int_or_none}),
1405 'biz_id': ('playlist', 'id', {int_or_none}),
1406 'tid': ('tid', {int_or_none}),
1407 'sort_field': ('sortFiled', {int_or_none}),
1408 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
add96eb9 1409 }),
9e68747f 1410 }
1411 metadata = {
1412 'id': f'{query["type"]}_{query["biz_id"]}',
1413 **traverse_obj(initial_state, ('mediaListInfo', {
1414 'title': ('title', {str}),
1415 'uploader': ('upper', 'name', {str}),
1416 'uploader_id': ('upper', 'mid', {str_or_none}),
1417 'timestamp': ('ctime', {int_or_none}),
1418 'thumbnail': ('cover', {url_or_none}),
1419 })),
1420 }
1421 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 1422
1423
c34f505b 1424class BilibiliCategoryIE(InfoExtractor):
1425 IE_NAME = 'Bilibili category extractor'
1426 _MAX_RESULTS = 1000000
9e68747f 1427 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 1428 _TESTS = [{
1429 'url': 'https://www.bilibili.com/v/kichiku/mad',
1430 'info_dict': {
1431 'id': 'kichiku: mad',
add96eb9 1432 'title': 'kichiku: mad',
c34f505b 1433 },
1434 'playlist_mincount': 45,
1435 'params': {
add96eb9 1436 'playlistend': 45,
1437 },
c34f505b 1438 }]
1439
1440 def _fetch_page(self, api_url, num_pages, query, page_num):
1441 parsed_json = self._download_json(
1442 api_url, query, query={'Search_key': query, 'pn': page_num},
add96eb9 1443 note=f'Extracting results from page {page_num} of {num_pages}')
c34f505b 1444
f8580bf0 1445 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 1446 if not video_list:
add96eb9 1447 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
c34f505b 1448
1449 for video in video_list:
1450 yield self.url_result(
add96eb9 1451 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
c34f505b 1452
1453 def _entries(self, category, subcategory, query):
1454 # map of categories : subcategories : RIDs
1455 rid_map = {
1456 'kichiku': {
1457 'mad': 26,
1458 'manual_vocaloid': 126,
1459 'guide': 22,
1460 'theatre': 216,
add96eb9 1461 'course': 127,
c34f505b 1462 },
1463 }
1464
1465 if category not in rid_map:
e88d44c6 1466 raise ExtractorError(
1467 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1468 if subcategory not in rid_map[category]:
e88d44c6 1469 raise ExtractorError(
1470 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1471 rid_value = rid_map[category][subcategory]
1472
1473 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1474 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1475 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1476 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1477 if count is None or not size:
1478 raise ExtractorError('Failed to calculate either page count or size')
1479
1480 num_pages = math.ceil(count / size)
1481
1482 return OnDemandPagedList(functools.partial(
1483 self._fetch_page, api_url, num_pages, query), size)
1484
1485 def _real_extract(self, url):
ad974876 1486 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
add96eb9 1487 query = f'{category}: {subcategory}'
c34f505b 1488
1489 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1490
1491
06167fbb 1492class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1493 IE_DESC = 'Bilibili video search'
06167fbb 1494 _MAX_RESULTS = 100000
1495 _SEARCH_KEY = 'bilisearch'
ffa017cf 1496 _TESTS = [{
1497 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1498 'playlist_count': 3,
1499 'info_dict': {
1500 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1501 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1502 },
1503 'playlist': [{
1504 'info_dict': {
1505 'id': 'BV1n44y1Q7sc',
1506 'ext': 'mp4',
1507 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1508 'timestamp': 1669889987,
1509 'upload_date': '20221201',
1510 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1511 'tags': list,
1512 'uploader': '靡烟miya',
1513 'duration': 123.156,
1514 'uploader_id': '1958703906',
1515 'comment_count': int,
1516 'view_count': int,
1517 'like_count': int,
1518 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1519 '_old_archive_ids': ['bilibili 988222410_part1'],
1520 },
1521 }],
1522 }]
06167fbb 1523
e88d44c6 1524 def _search_results(self, query):
ffa017cf 1525 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1526 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
e88d44c6 1527 for page_num in itertools.count(1):
1528 videos = self._download_json(
1529 'https://api.bilibili.com/x/web-interface/search/type', query,
1530 note=f'Extracting results from page {page_num}', query={
1531 'Search_key': query,
1532 'keyword': query,
1533 'page': page_num,
1534 'context': '',
e88d44c6 1535 'duration': 0,
1536 'tids_2': '',
1537 '__refresh__': 'true',
1538 'search_type': 'video',
1539 'tids': 0,
1540 'highlight': 1,
2d101954 1541 })['data'].get('result')
1542 if not videos:
1543 break
06167fbb 1544 for video in videos:
e88d44c6 1545 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1546
1547
4bc15a68
RA
1548class BilibiliAudioBaseIE(InfoExtractor):
1549 def _call_api(self, path, sid, query=None):
1550 if not query:
1551 query = {'sid': sid}
1552 return self._download_json(
1553 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1554 sid, query=query)['data']
1555
1556
1557class BilibiliAudioIE(BilibiliAudioBaseIE):
1558 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1559 _TEST = {
1560 'url': 'https://www.bilibili.com/audio/au1003142',
1561 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1562 'info_dict': {
1563 'id': '1003142',
1564 'ext': 'm4a',
1565 'title': '【tsukimi】YELLOW / 神山羊',
1566 'artist': 'tsukimi',
1567 'comment_count': int,
1568 'description': 'YELLOW的mp3版!',
1569 'duration': 183,
1570 'subtitles': {
1571 'origin': [{
1572 'ext': 'lrc',
1573 }],
1574 },
1575 'thumbnail': r're:^https?://.+\.jpg',
1576 'timestamp': 1564836614,
1577 'upload_date': '20190803',
1578 'uploader': 'tsukimi-つきみぐー',
1579 'view_count': int,
1580 },
1581 }
1582
1583 def _real_extract(self, url):
1584 au_id = self._match_id(url)
1585
1586 play_data = self._call_api('url', au_id)
1587 formats = [{
1588 'url': play_data['cdns'][0],
1589 'filesize': int_or_none(play_data.get('size')),
add96eb9 1590 'vcodec': 'none',
4bc15a68
RA
1591 }]
1592
6d1b3489 1593 for a_format in formats:
1594 a_format.setdefault('http_headers', {}).update({
1595 'Referer': url,
1596 })
1597
4bc15a68
RA
1598 song = self._call_api('song/info', au_id)
1599 title = song['title']
1600 statistic = song.get('statistic') or {}
1601
1602 subtitles = None
1603 lyric = song.get('lyric')
1604 if lyric:
1605 subtitles = {
1606 'origin': [{
1607 'url': lyric,
add96eb9 1608 }],
4bc15a68
RA
1609 }
1610
1611 return {
1612 'id': au_id,
1613 'title': title,
1614 'formats': formats,
1615 'artist': song.get('author'),
1616 'comment_count': int_or_none(statistic.get('comment')),
1617 'description': song.get('intro'),
1618 'duration': int_or_none(song.get('duration')),
1619 'subtitles': subtitles,
1620 'thumbnail': song.get('cover'),
1621 'timestamp': int_or_none(song.get('passtime')),
1622 'uploader': song.get('uname'),
1623 'view_count': int_or_none(statistic.get('play')),
1624 }
1625
1626
1627class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1628 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1629 _TEST = {
1630 'url': 'https://www.bilibili.com/audio/am10624',
1631 'info_dict': {
1632 'id': '10624',
1633 'title': '每日新曲推荐(每日11:00更新)',
1634 'description': '每天11:00更新,为你推送最新音乐',
1635 },
1636 'playlist_count': 19,
1637 }
1638
1639 def _real_extract(self, url):
1640 am_id = self._match_id(url)
1641
1642 songs = self._call_api(
1643 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1644
1645 entries = []
1646 for song in songs:
1647 sid = str_or_none(song.get('id'))
1648 if not sid:
1649 continue
1650 entries.append(self.url_result(
1651 'https://www.bilibili.com/audio/au' + sid,
1652 BilibiliAudioIE.ie_key(), sid))
1653
1654 if entries:
1655 album_data = self._call_api('menu/info', am_id) or {}
1656 album_title = album_data.get('title')
1657 if album_title:
1658 for entry in entries:
1659 entry['album'] = album_title
1660 return self.playlist_result(
1661 entries, am_id, album_title, album_data.get('intro'))
1662
1663 return self.playlist_result(entries, am_id)
63dce309
S
1664
1665
1666class BiliBiliPlayerIE(InfoExtractor):
1667 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1668 _TEST = {
1669 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1670 'only_matching': True,
1671 }
1672
1673 def _real_extract(self, url):
1674 video_id = self._match_id(url)
1675 return self.url_result(
add96eb9 1676 f'http://www.bilibili.tv/video/av{video_id}/',
63dce309 1677 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1678
1679
1680class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1681 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1682 _NETRC_MACHINE = 'biliintl'
1713c882 1683 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
16f7e6be 1684
c62ecf0d 1685 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1686 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1687 if json.get('code'):
1688 if json['code'] in (10004004, 10004005, 10023006):
1689 self.raise_login_required()
1690 elif json['code'] == 10004001:
1691 self.raise_geo_restricted()
1692 else:
1693 if json.get('message') and str(json['code']) != json['message']:
1694 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1695 else:
1696 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1697 if kwargs.get('fatal'):
1698 raise ExtractorError(errmsg)
1699 else:
1700 self.report_warning(errmsg)
1701 return json.get('data')
16f7e6be 1702
efc947fb 1703 def json2srt(self, json):
add96eb9 1704 return '\n\n'.join(
efc947fb 1705 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1706 for i, line in enumerate(traverse_obj(json, (
1707 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1708
f5f15c99
LR
1709 def _get_subtitles(self, *, ep_id=None, aid=None):
1710 sub_json = self._call_api(
fbb888a3 1711 '/web/v2/subtitle', ep_id or aid, fatal=False,
1712 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1713 query=filter_dict({
f5f15c99 1714 'platform': 'web',
fbb888a3 1715 's_locale': 'en_US',
f5f15c99
LR
1716 'episode_id': ep_id,
1717 'aid': aid,
fbb888a3 1718 })) or {}
16f7e6be 1719 subtitles = {}
cf6413e8
H
1720 fetched_urls = set()
1721 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1722 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1723 if url in fetched_urls:
1724 continue
1725 fetched_urls.add(url)
1726 sub_ext = determine_ext(url)
1727 sub_lang = sub.get('lang_key') or 'en'
1728
1729 if sub_ext == 'ass':
1730 subtitles.setdefault(sub_lang, []).append({
1731 'ext': 'ass',
1732 'url': url,
1733 })
1734 elif sub_ext == 'json':
1735 sub_data = self._download_json(
1736 url, ep_id or aid, fatal=False,
1737 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1738 errnote='Unable to download subtitles')
1739
1740 if sub_data:
1741 subtitles.setdefault(sub_lang, []).append({
1742 'ext': 'srt',
1743 'data': self.json2srt(sub_data),
1744 })
1745 else:
1746 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1747
16f7e6be
AG
1748 return subtitles
1749
f5f15c99
LR
1750 def _get_formats(self, *, ep_id=None, aid=None):
1751 video_json = self._call_api(
1752 '/web/playurl', ep_id or aid, note='Downloading video formats',
1753 errnote='Unable to download video formats', query=filter_dict({
1754 'platform': 'web',
1755 'ep_id': ep_id,
1756 'aid': aid,
1757 }))
16f7e6be
AG
1758 video_json = video_json['playurl']
1759 formats = []
c62ecf0d 1760 for vid in video_json.get('video') or []:
16f7e6be
AG
1761 video_res = vid.get('video_resource') or {}
1762 video_info = vid.get('stream_info') or {}
1763 if not video_res.get('url'):
1764 continue
1765 formats.append({
1766 'url': video_res['url'],
1767 'ext': 'mp4',
1768 'format_note': video_info.get('desc_words'),
1769 'width': video_res.get('width'),
1770 'height': video_res.get('height'),
1771 'vbr': video_res.get('bandwidth'),
1772 'acodec': 'none',
1773 'vcodec': video_res.get('codecs'),
1774 'filesize': video_res.get('size'),
1775 })
c62ecf0d 1776 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1777 if not aud.get('url'):
1778 continue
1779 formats.append({
1780 'url': aud['url'],
1781 'ext': 'mp4',
1782 'abr': aud.get('bandwidth'),
1783 'acodec': aud.get('codecs'),
1784 'vcodec': 'none',
1785 'filesize': aud.get('size'),
1786 })
1787
16f7e6be
AG
1788 return formats
1789
26fdfc37 1790 def _parse_video_metadata(self, video_data):
16f7e6be 1791 return {
f5f15c99 1792 'title': video_data.get('title_display') or video_data.get('title'),
1713c882 1793 'description': video_data.get('desc'),
f5f15c99 1794 'thumbnail': video_data.get('cover'),
1713c882 1795 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
c62ecf0d 1796 'episode_number': int_or_none(self._search_regex(
f5f15c99 1797 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1798 }
1799
52efa4b3 1800 def _perform_login(self, username, password):
65f6e807 1801 if not Cryptodome.RSA:
f6a765ce 1802 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1803
1804 key_data = self._download_json(
1805 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1806 note='Downloading login key', errnote='Unable to download login key')['data']
1807
65f6e807 1808 public_key = Cryptodome.RSA.importKey(key_data['key'])
add96eb9 1809 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
cfcf60ea
M
1810 login_post = self._download_json(
1811 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1812 'username': username,
1813 'password': base64.b64encode(password_hash).decode('ascii'),
1814 'keep_me': 'true',
1815 's_locale': 'en_US',
add96eb9 1816 'isTrusted': 'true',
cfcf60ea
M
1817 }), note='Logging in', errnote='Unable to log in')
1818 if login_post.get('code'):
1819 if login_post.get('message'):
1820 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1821 else:
1822 raise ExtractorError('Unable to log in')
1823
16f7e6be
AG
1824
1825class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1826 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1827 _TESTS = [{
cfcf60ea 1828 # Bstation page
16f7e6be
AG
1829 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1830 'info_dict': {
1831 'id': '341736',
1832 'ext': 'mp4',
c62ecf0d
M
1833 'title': 'E2 - The First Night',
1834 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1835 'episode_number': 2,
d37422f1
H
1836 'upload_date': '20201009',
1837 'episode': 'Episode 2',
1838 'timestamp': 1602259500,
1839 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1840 'chapters': [{
1841 'start_time': 0,
1842 'end_time': 76.242,
add96eb9 1843 'title': '<Untitled Chapter 1>',
0ba87dd2
H
1844 }, {
1845 'start_time': 76.242,
1846 'end_time': 161.161,
add96eb9 1847 'title': 'Intro',
0ba87dd2
H
1848 }, {
1849 'start_time': 1325.742,
1850 'end_time': 1403.903,
add96eb9 1851 'title': 'Outro',
0ba87dd2 1852 }],
add96eb9 1853 },
16f7e6be 1854 }, {
cfcf60ea 1855 # Non-Bstation page
c62ecf0d 1856 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1857 'info_dict': {
c62ecf0d 1858 'id': '11005006',
16f7e6be 1859 'ext': 'mp4',
c62ecf0d
M
1860 'title': 'E3 - Who?',
1861 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1862 'episode_number': 3,
d37422f1
H
1863 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1864 'episode': 'Episode 3',
1865 'upload_date': '20211219',
1866 'timestamp': 1639928700,
0ba87dd2
H
1867 'chapters': [{
1868 'start_time': 0,
1869 'end_time': 88.0,
add96eb9 1870 'title': '<Untitled Chapter 1>',
0ba87dd2
H
1871 }, {
1872 'start_time': 88.0,
1873 'end_time': 156.0,
add96eb9 1874 'title': 'Intro',
0ba87dd2
H
1875 }, {
1876 'start_time': 1173.0,
1877 'end_time': 1259.535,
add96eb9 1878 'title': 'Outro',
0ba87dd2 1879 }],
add96eb9 1880 },
cfcf60ea
M
1881 }, {
1882 # Subtitle with empty content
1883 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1884 'info_dict': {
1885 'id': '10131790',
1886 'ext': 'mp4',
1887 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1888 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1889 'episode_number': 140,
1890 },
add96eb9 1891 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
b093c38c
H
1892 }, {
1893 # episode comment extraction
1894 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1895 'info_dict': {
1896 'id': '340317',
1897 'ext': 'mp4',
1898 'timestamp': 1604057820,
1899 'upload_date': '20201030',
1900 'episode_number': 5,
1901 'title': 'E5 - My Own Steel',
1902 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1903 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1904 'episode': 'Episode 5',
1905 'comment_count': int,
1906 'chapters': [{
1907 'start_time': 0,
1908 'end_time': 61.0,
add96eb9 1909 'title': '<Untitled Chapter 1>',
b093c38c
H
1910 }, {
1911 'start_time': 61.0,
1912 'end_time': 134.0,
add96eb9 1913 'title': 'Intro',
b093c38c
H
1914 }, {
1915 'start_time': 1290.0,
1916 'end_time': 1379.0,
add96eb9 1917 'title': 'Outro',
b093c38c
H
1918 }],
1919 },
1920 'params': {
add96eb9 1921 'getcomments': True,
1922 },
b093c38c
H
1923 }, {
1924 # user generated content comment extraction
1925 'url': 'https://www.bilibili.tv/en/video/2045730385',
1926 'info_dict': {
1927 'id': '2045730385',
1928 'ext': 'mp4',
1929 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1930 'timestamp': 1667891924,
1931 'upload_date': '20221108',
1713c882 1932 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
b093c38c 1933 'comment_count': int,
1713c882 1934 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
b093c38c
H
1935 },
1936 'params': {
add96eb9 1937 'getcomments': True,
1938 },
0ba87dd2
H
1939 }, {
1940 # episode id without intro and outro
1941 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1942 'info_dict': {
1943 'id': '11246489',
1944 'ext': 'mp4',
1945 'title': 'E1 - Operation \'Strix\' <Owl>',
1946 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1947 'timestamp': 1649516400,
1948 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1949 'episode': 'Episode 1',
1950 'episode_number': 1,
1951 'upload_date': '20220409',
1952 },
c62ecf0d
M
1953 }, {
1954 'url': 'https://www.biliintl.com/en/play/34613/341736',
1955 'only_matching': True,
f5f15c99
LR
1956 }, {
1957 # User-generated content (as opposed to a series licensed from a studio)
1958 'url': 'https://bilibili.tv/en/video/2019955076',
1959 'only_matching': True,
1960 }, {
1961 # No language in URL
1962 'url': 'https://www.bilibili.tv/video/2019955076',
1963 'only_matching': True,
0831d95c 1964 }, {
1965 # Uppercase language in URL
1966 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1967 'only_matching': True,
16f7e6be
AG
1968 }]
1969
93240fc1 1970 @staticmethod
26fdfc37 1971 def _make_url(video_id, series_id=None):
1972 if series_id:
1973 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1974 return f'https://www.bilibili.tv/en/video/{video_id}'
1975
1976 def _extract_video_metadata(self, url, video_id, season_id):
1977 url, smuggled_data = unsmuggle_url(url, {})
1978 if smuggled_data.get('title'):
1979 return smuggled_data
1980
c62ecf0d
M
1981 webpage = self._download_webpage(url, video_id)
1982 # Bstation layout
8072ef2b 1983 initial_data = (
1984 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1985 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1986 video_data = traverse_obj(
d37422f1 1987 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1988
f5f15c99 1989 if season_id and not video_data:
c62ecf0d
M
1990 # Non-Bstation layout, read through episode list
1991 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1992 video_data = traverse_obj(season_json, (
add96eb9 1993 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
26fdfc37 1994 ), expected_type=dict, get_all=False)
1995
d37422f1
H
1996 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1997 return merge_dicts(
1713c882
S
1998 self._parse_video_metadata(video_data), {
1999 'title': get_element_by_class(
2000 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2001 'description': get_element_by_class(
f1570ab8 2002 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
1713c882 2003 }, self._search_json_ld(webpage, video_id, default={}))
26fdfc37 2004
b093c38c
H
2005 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2006 comment_api_raw_data = self._download_json(
2007 'https://api.bilibili.tv/reply/web/detail', display_id,
2008 note=f'Downloading reply comment of {root_id} - {next_id}',
2009 query={
2010 'platform': 'web',
2011 'ps': 20, # comment's reply per page (default: 3)
2012 'root': root_id,
2013 'next': next_id,
2014 })
2015
2016 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2017 yield {
2018 'author': traverse_obj(replies, ('member', 'name')),
2019 'author_id': traverse_obj(replies, ('member', 'mid')),
2020 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2021 'text': traverse_obj(replies, ('content', 'message')),
2022 'id': replies.get('rpid'),
2023 'like_count': int_or_none(replies.get('like_count')),
2024 'parent': replies.get('parent'),
add96eb9 2025 'timestamp': unified_timestamp(replies.get('ctime_text')),
b093c38c
H
2026 }
2027
2028 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2029 yield from self._get_comments_reply(
2030 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2031
2032 def _get_comments(self, video_id, ep_id):
2033 for i in itertools.count(0):
2034 comment_api_raw_data = self._download_json(
2035 'https://api.bilibili.tv/reply/web/root', video_id,
2036 note=f'Downloading comment page {i + 1}',
2037 query={
2038 'platform': 'web',
2039 'pn': i, # page number
2040 'ps': 20, # comment per page (default: 20)
2041 'oid': video_id,
2042 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2043 'sort_type': 1, # 1: best, 2: recent
2044 })
2045
2046 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2047 yield {
2048 'author': traverse_obj(replies, ('member', 'name')),
2049 'author_id': traverse_obj(replies, ('member', 'mid')),
2050 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2051 'text': traverse_obj(replies, ('content', 'message')),
2052 'id': replies.get('rpid'),
2053 'like_count': int_or_none(replies.get('like_count')),
2054 'timestamp': unified_timestamp(replies.get('ctime_text')),
2055 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2056 }
2057 if replies.get('count'):
2058 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2059
2060 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2061 break
2062
26fdfc37 2063 def _real_extract(self, url):
2064 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2065 video_id = ep_id or aid
0ba87dd2
H
2066 chapters = None
2067
2068 if ep_id:
2069 intro_ending_json = self._call_api(
2070 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2071 video_id, fatal=False) or {}
2072 if intro_ending_json.get('skip'):
2073 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2074 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2075 chapters = [{
2076 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2077 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
add96eb9 2078 'title': 'Intro',
0ba87dd2
H
2079 }, {
2080 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2081 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
add96eb9 2082 'title': 'Outro',
0ba87dd2 2083 }]
26fdfc37 2084
2085 return {
2086 'id': video_id,
2087 **self._extract_video_metadata(url, video_id, season_id),
2088 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2089 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c 2090 'chapters': chapters,
1713c882
S
2091 '__post_extractor': self.extract_comments(video_id, ep_id),
2092 'http_headers': self._HEADERS,
26fdfc37 2093 }
16f7e6be
AG
2094
2095
2096class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 2097 IE_NAME = 'biliIntl:series'
76c3cecc 2098 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
2099 _TESTS = [{
2100 'url': 'https://www.bilibili.tv/en/play/34613',
2101 'playlist_mincount': 15,
2102 'info_dict': {
2103 'id': '34613',
76c3cecc
H
2104 'title': 'TONIKAWA: Over the Moon For You',
2105 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2106 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
2107 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2108 'view_count': int,
16f7e6be
AG
2109 },
2110 'params': {
2111 'skip_download': True,
16f7e6be 2112 },
76c3cecc
H
2113 }, {
2114 'url': 'https://www.bilibili.tv/en/media/1048837',
2115 'info_dict': {
2116 'id': '1048837',
2117 'title': 'SPY×FAMILY',
2118 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119 'categories': ['Adventure', 'Action', 'Comedy'],
2120 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2121 'view_count': int,
2122 },
2123 'playlist_mincount': 25,
16f7e6be
AG
2124 }, {
2125 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 2126 'only_matching': True,
0831d95c 2127 }, {
2128 'url': 'https://www.biliintl.com/EN/play/34613',
2129 'only_matching': True,
16f7e6be
AG
2130 }]
2131
c62ecf0d
M
2132 def _entries(self, series_id):
2133 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 2134 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2135 episode_id = str(episode['episode_id'])
2136 yield self.url_result(smuggle_url(
2137 BiliIntlIE._make_url(episode_id, series_id),
add96eb9 2138 self._parse_video_metadata(episode),
26fdfc37 2139 ), BiliIntlIE, episode_id)
16f7e6be
AG
2140
2141 def _real_extract(self, url):
c62ecf0d
M
2142 series_id = self._match_id(url)
2143 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2144 return self.playlist_result(
2145 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2146 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2147 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
2148
2149
2150class BiliLiveIE(InfoExtractor):
9e68747f 2151 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
2152
2153 _TESTS = [{
2154 'url': 'https://live.bilibili.com/196',
2155 'info_dict': {
2156 'id': '33989',
add96eb9 2157 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
b4f53662 2158 'ext': 'flv',
add96eb9 2159 'title': '太空狼人杀联动,不被爆杀就算赢',
2160 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
b4f53662
H
2161 'timestamp': 1650802769,
2162 },
add96eb9 2163 'skip': 'not live',
b4f53662
H
2164 }, {
2165 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
add96eb9 2166 'only_matching': True,
1c226ccd 2167 }, {
2168 'url': 'https://live.bilibili.com/blanc/196',
add96eb9 2169 'only_matching': True,
b4f53662
H
2170 }]
2171
2172 _FORMATS = {
2173 80: {'format_id': 'low', 'format_note': '流畅'},
2174 150: {'format_id': 'high_res', 'format_note': '高清'},
2175 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2176 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2177 10000: {'format_id': 'source', 'format_note': '原画'},
2178 20000: {'format_id': '4K', 'format_note': '4K'},
2179 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2180 }
2181
2182 _quality = staticmethod(qualities(list(_FORMATS)))
2183
2184 def _call_api(self, path, room_id, query):
2185 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2186 if api_result.get('code') != 0:
2187 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2188 return api_result.get('data') or {}
2189
2190 def _parse_formats(self, qn, fmt):
2191 for codec in fmt.get('codec') or []:
2192 if codec.get('current_qn') != qn:
2193 continue
2194 for url_info in codec['url_info']:
2195 yield {
2196 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2197 'ext': fmt.get('format_name'),
2198 'vcodec': codec.get('codec_name'),
2199 'quality': self._quality(qn),
2200 **self._FORMATS[qn],
2201 }
2202
2203 def _real_extract(self, url):
2204 room_id = self._match_id(url)
2205 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2206 if room_data.get('live_status') == 0:
2207 raise ExtractorError('Streamer is not live', expected=True)
2208
2209 formats = []
add96eb9 2210 for qn in self._FORMATS:
b4f53662
H
2211 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2212 'room_id': room_id,
2213 'qn': qn,
2214 'codec': '0,1',
2215 'format': '0,2',
2216 'mask': '0',
2217 'no_playurl': '0',
2218 'platform': 'web',
2219 'protocol': '0,1',
2220 })
2221 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2222 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
2223
2224 return {
2225 'id': room_id,
2226 'title': room_data.get('title'),
2227 'description': room_data.get('description'),
2228 'thumbnail': room_data.get('user_cover'),
2229 'timestamp': stream_data.get('live_time'),
2230 'formats': formats,
ca2f6e14 2231 'is_live': True,
b4f53662
H
2232 'http_headers': {
2233 'Referer': url,
2234 },
2235 }