]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/vidly] Add extractor (#8612)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
c34f505b 5import math
5336bf57 6import re
6f10cdcf 7import time
ad974876 8import urllib.parse
28746fbd 9
06167fbb 10from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 11from ..dependencies import Cryptodome
3d2623a8 12from ..networking.exceptions import HTTPError
28746fbd 13from ..utils import (
bd8f48c7 14 ExtractorError,
ad974876 15 GeoRestrictedError,
2b9d0216
L
16 InAdvancePagedList,
17 OnDemandPagedList,
9e68747f 18 bool_or_none,
f5f15c99 19 filter_dict,
6461f2b7 20 float_or_none,
ad974876 21 format_field,
2b9d0216 22 int_or_none,
bdd0b75e 23 join_nonempty,
ad974876 24 make_archive_id,
d37422f1 25 merge_dicts,
f8580bf0 26 mimetype2ext,
2b9d0216 27 parse_count,
ad974876 28 parse_qs,
b4f53662 29 qualities,
26fdfc37 30 smuggle_url,
efc947fb 31 srt_subtitles_timecode,
4bc15a68 32 str_or_none,
2b9d0216 33 traverse_obj,
6f10cdcf 34 try_call,
b093c38c 35 unified_timestamp,
26fdfc37 36 unsmuggle_url,
c62ecf0d 37 url_or_none,
ad974876 38 urlencode_postdata,
9e68747f 39 variadic,
28746fbd
PH
40)
41
42
ad974876 43class BilibiliBaseIE(InfoExtractor):
5336bf57 44 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
45
ad974876
L
46 def extract_formats(self, play_info):
47 format_names = {
48 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
49 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
50 }
51
b84fda73 52 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
ad974876
L
53 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
54 if flac_audio:
55 audios.append(flac_audio)
56 formats = [{
57 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
58 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
b84fda73 59 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
ad974876
L
60 'vcodec': 'none',
61 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 62 'filesize': int_or_none(audio.get('size')),
63 'format_id': str_or_none(audio.get('id')),
ad974876
L
64 } for audio in audios]
65
66 formats.extend({
67 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
68 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
69 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
70 'width': int_or_none(video.get('width')),
71 'height': int_or_none(video.get('height')),
72 'vcodec': video.get('codecs'),
73 'acodec': 'none' if audios else None,
b84fda73 74 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
ad974876
L
75 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
76 'filesize': int_or_none(video.get('size')),
77 'quality': int_or_none(video.get('id')),
5336bf57 78 'format_id': traverse_obj(
79 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
80 ('id', {str_or_none}), get_all=False),
ad974876
L
81 'format': format_names.get(video.get('id')),
82 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
83
84 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
85 if missing_formats:
86 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 87 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 88
ad974876
L
89 return formats
90
91 def json2srt(self, json_data):
92 srt_data = ''
93 for idx, line in enumerate(json_data.get('body') or []):
94 srt_data += (f'{idx + 1}\n'
95 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
96 f'{line["content"]}\n\n')
97 return srt_data
98
8a83baaf 99 def _get_subtitles(self, video_id, aid, cid):
ad974876
L
100 subtitles = {
101 'danmaku': [{
102 'ext': 'xml',
103 'url': f'https://comment.bilibili.com/{cid}.xml',
104 }]
105 }
106
8a83baaf
L
107 video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
108 for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
ad974876
L
109 subtitles.setdefault(s['lan'], []).append({
110 'ext': 'srt',
111 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
112 })
113 return subtitles
114
c90c5b9b 115 def _get_chapters(self, aid, cid):
116 chapters = aid and cid and self._download_json(
117 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
118 note='Extracting chapters', fatal=False)
119 return traverse_obj(chapters, ('data', 'view_points', ..., {
120 'title': 'content',
121 'start_time': 'from',
122 'end_time': 'to',
123 })) or None
124
ad974876
L
125 def _get_comments(self, aid):
126 for idx in itertools.count(1):
127 replies = traverse_obj(
128 self._download_json(
129 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
130 aid, note=f'Extracting comments from page {idx}', fatal=False),
131 ('data', 'replies'))
132 if not replies:
133 return
134 for children in map(self._get_all_children, replies):
135 yield from children
136
137 def _get_all_children(self, reply):
138 yield {
139 'author': traverse_obj(reply, ('member', 'uname')),
140 'author_id': traverse_obj(reply, ('member', 'mid')),
141 'id': reply.get('rpid'),
142 'text': traverse_obj(reply, ('content', 'message')),
143 'timestamp': reply.get('ctime'),
144 'parent': reply.get('parent') or 'root',
145 }
146 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
147 yield from children
148
bdd0b75e
GS
149 def _get_episodes_from_season(self, ss_id, url):
150 season_info = self._download_json(
151 'https://api.bilibili.com/pgc/web/season/section', ss_id,
152 note='Downloading season info', query={'season_id': ss_id},
153 headers={'Referer': url, **self.geo_verification_headers()})
154
155 for entry in traverse_obj(season_info, (
156 'result', 'main_section', 'episodes',
157 lambda _, v: url_or_none(v['share_url']) and v['id'])):
158 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
159
ad974876
L
160
161class BiliBiliIE(BilibiliBaseIE):
9e68747f 162 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 163
bd8f48c7 164 _TESTS = [{
ad974876
L
165 'url': 'https://www.bilibili.com/video/BV13x41117TL',
166 'info_dict': {
167 'id': 'BV13x41117TL',
168 'title': '阿滴英文|英文歌分享#6 "Closer',
169 'ext': 'mp4',
170 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
171 'uploader_id': '65880958',
172 'uploader': '阿滴英文',
173 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
174 'duration': 554.117,
175 'tags': list,
176 'comment_count': int,
177 'upload_date': '20170301',
178 'timestamp': 1488353834,
179 'like_count': int,
180 'view_count': int,
181 },
182 }, {
183 # old av URL version
06167fbb 184 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 185 'info_dict': {
ad974876 186 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 187 'ext': 'mp4',
f8580bf0 188 'uploader': '菊子桑',
ad974876
L
189 'uploader_id': '156160',
190 'id': 'BV11x411K7CN',
191 'title': '【金坷垃】金泡沫',
192 'duration': 308.36,
f8580bf0 193 'upload_date': '20140420',
ad974876 194 'timestamp': 1397983878,
6461f2b7 195 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
196 'like_count': int,
197 'comment_count': int,
198 'view_count': int,
199 'tags': list,
200 },
c90c5b9b 201 'params': {'skip_download': True},
bd8f48c7 202 }, {
ad974876
L
203 'note': 'Anthology',
204 'url': 'https://www.bilibili.com/video/BV1bK411W797',
205 'info_dict': {
206 'id': 'BV1bK411W797',
207 'title': '物语中的人物是如何吐槽自己的OP的'
208 },
209 'playlist_count': 18,
210 'playlist': [{
211 'info_dict': {
212 'id': 'BV1bK411W797_p1',
213 'ext': 'mp4',
214 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
215 'tags': 'count:11',
216 'timestamp': 1589601697,
217 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
218 'uploader': '打牌还是打桩',
219 'uploader_id': '150259984',
220 'like_count': int,
221 'comment_count': int,
222 'upload_date': '20200516',
223 'view_count': int,
224 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
225 'duration': 90.314,
226 }
227 }]
06167fbb 228 }, {
ad974876
L
229 'note': 'Specific page of Anthology',
230 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
231 'info_dict': {
232 'id': 'BV1bK411W797_p1',
233 'ext': 'mp4',
234 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
235 'tags': 'count:11',
236 'timestamp': 1589601697,
237 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
238 'uploader': '打牌还是打桩',
239 'uploader_id': '150259984',
240 'like_count': int,
241 'comment_count': int,
242 'upload_date': '20200516',
243 'view_count': int,
244 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
245 'duration': 90.314,
246 }
bd8f48c7 247 }, {
ad974876
L
248 'note': 'video has subtitles',
249 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 250 'info_dict': {
ad974876 251 'id': 'BV12N4y1M7rh',
bd8f48c7 252 'ext': 'mp4',
c90c5b9b 253 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
254 'tags': list,
255 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
256 'duration': 313.557,
257 'upload_date': '20220709',
9e68747f 258 'uploader': '小夫太渴',
ad974876
L
259 'timestamp': 1657347907,
260 'uploader_id': '1326814124',
261 'comment_count': int,
262 'view_count': int,
263 'like_count': int,
264 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
265 'subtitles': 'count:2'
bd8f48c7 266 },
ad974876 267 'params': {'listsubtitles': True},
ca270371 268 }, {
ad974876 269 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 270 'info_dict': {
ad974876 271 'id': 'BV13x41117TL',
f8580bf0 272 'ext': 'mp4',
ca270371 273 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 274 'upload_date': '20170301',
c90c5b9b 275 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 276 'timestamp': 1488353834,
f8580bf0 277 'uploader_id': '65880958',
278 'uploader': '阿滴英文',
ad974876 279 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 280 'duration': 554.117,
ad974876
L
281 'tags': list,
282 'comment_count': int,
283 'view_count': int,
284 'like_count': int,
89fabf11
JN
285 },
286 'params': {
287 'skip_download': True,
288 },
c90c5b9b 289 }, {
290 'note': 'video has chapter',
291 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
292 'info_dict': {
293 'id': 'BV1vL411G7N7',
294 'ext': 'mp4',
295 'title': '如何为你的B站视频添加进度条分段',
296 'timestamp': 1634554558,
297 'upload_date': '20211018',
298 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
299 'tags': list,
300 'uploader': '爱喝咖啡的当麻',
301 'duration': 669.482,
302 'uploader_id': '1680903',
303 'chapters': 'count:6',
304 'comment_count': int,
305 'view_count': int,
306 'like_count': int,
307 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
308 },
309 'params': {'skip_download': True},
ab29e470 310 }, {
311 'note': 'video redirects to festival page',
312 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
313 'info_dict': {
314 'id': 'BV1wP4y1P72h',
315 'ext': 'mp4',
316 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
317 'timestamp': 1643947497,
318 'upload_date': '20220204',
319 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
320 'uploader': '叨叨冯聊音乐',
321 'duration': 246.719,
322 'uploader_id': '528182630',
323 'view_count': int,
324 'like_count': int,
325 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
326 },
327 'params': {'skip_download': True},
328 }, {
329 'note': 'newer festival video',
330 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
331 'info_dict': {
332 'id': 'BV1ay4y1d77f',
333 'ext': 'mp4',
334 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
335 'timestamp': 1674273600,
336 'upload_date': '20230121',
337 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
338 'uploader': '果蝇轰',
339 'duration': 1111.722,
340 'uploader_id': '8469526',
341 'view_count': int,
342 'like_count': int,
343 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
344 },
345 'params': {'skip_download': True},
bd8f48c7 346 }]
28746fbd 347
520e7533 348 def _real_extract(self, url):
ad974876 349 video_id = self._match_id(url)
6461f2b7 350 webpage = self._download_webpage(url, video_id)
c90c5b9b 351 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 352
ab29e470 353 is_festival = 'videoData' not in initial_state
354 if is_festival:
355 video_data = initial_state['videoInfo']
356 else:
357 play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
358 video_data = initial_state['videoData']
359
ad974876 360 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 361
adc74b3c 362 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 363 page_list_json = not is_festival and traverse_obj(
ad974876
L
364 self._download_json(
365 'https://api.bilibili.com/x/player/pagelist', video_id,
366 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
367 note='Extracting videos in anthology'),
368 'data', expected_type=list) or []
369 is_anthology = len(page_list_json) > 1
370
371 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
372 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
373 return self.playlist_from_matches(
374 page_list_json, video_id, title, ie=BiliBiliIE,
375 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 376
ad974876 377 if is_anthology:
f74371a9 378 part_id = part_id or 1
379 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 380
ad974876
L
381 aid = video_data.get('aid')
382 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 383
c90c5b9b 384 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
385
ab29e470 386 festival_info = {}
387 if is_festival:
388 play_info = self._download_json(
389 'https://api.bilibili.com/x/player/playurl', video_id,
390 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
391 note='Extracting festival video formats')['data']
392
393 festival_info = traverse_obj(initial_state, {
394 'uploader': ('videoInfo', 'upName'),
395 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
396 'like_count': ('videoStatus', 'like', {int_or_none}),
397 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
398 }, get_all=False)
399
ad974876 400 return {
ab29e470 401 **traverse_obj(initial_state, {
402 'uploader': ('upData', 'name'),
403 'uploader_id': ('upData', 'mid', {str_or_none}),
404 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
405 'tags': ('tags', ..., 'tag_name'),
406 'thumbnail': ('videoData', 'pic', {url_or_none}),
407 }),
408 **festival_info,
409 **traverse_obj(video_data, {
410 'description': 'desc',
411 'timestamp': ('pubdate', {int_or_none}),
412 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
413 'comment_count': ('stat', 'reply', {int_or_none}),
414 }, get_all=False),
ad974876
L
415 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
416 'formats': self.extract_formats(play_info),
417 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 418 'title': title,
c90c5b9b 419 'duration': float_or_none(play_info.get('timelength'), scale=1000),
420 'chapters': self._get_chapters(aid, cid),
8a83baaf 421 'subtitles': self.extract_subtitles(video_id, aid, cid),
c90c5b9b 422 '__post_extractor': self.extract_comments(aid),
423 'http_headers': {'Referer': url},
06167fbb 424 }
277d6ff5 425
06167fbb 426
ad974876 427class BiliBiliBangumiIE(BilibiliBaseIE):
bdd0b75e 428 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
e88d44c6 429
ad974876 430 _TESTS = [{
bdd0b75e 431 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 432 'info_dict': {
bdd0b75e 433 'id': '267851',
ad974876 434 'ext': 'mp4',
bdd0b75e
GS
435 'series': '鬼灭之刃',
436 'series_id': '4358',
437 'season': '鬼灭之刃',
438 'season_id': '26801',
ad974876 439 'season_number': 1,
bdd0b75e
GS
440 'episode': '残酷',
441 'episode_id': '267851',
442 'episode_number': 1,
443 'title': '1 残酷',
444 'duration': 1425.256,
445 'timestamp': 1554566400,
446 'upload_date': '20190406',
447 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 448 },
bdd0b75e 449 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
ad974876 450 }]
06167fbb 451
ad974876
L
452 def _real_extract(self, url):
453 video_id = self._match_id(url)
bdd0b75e 454 episode_id = video_id[2:]
ad974876 455 webpage = self._download_webpage(url, video_id)
e88d44c6 456
ad974876
L
457 if '您所在的地区无法观看本片' in webpage:
458 raise GeoRestrictedError('This video is restricted')
bdd0b75e 459 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 460 self.raise_login_required('This video is for premium members only')
6461f2b7 461
bdd0b75e
GS
462 headers = {'Referer': url, **self.geo_verification_headers()}
463 play_info = self._download_json(
464 'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
465 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
466 headers=headers)
467 premium_only = play_info.get('code') == -10403
468 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
469
ad974876 470 formats = self.extract_formats(play_info)
bdd0b75e 471 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 472 self.raise_login_required('This video is for premium members only')
bd8f48c7 473
bdd0b75e
GS
474 bangumi_info = self._download_json(
475 'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
476 query={'ep_id': episode_id}, headers=headers)['result']
477
478 episode_number, episode_info = next((
479 (idx, ep) for idx, ep in enumerate(traverse_obj(
480 bangumi_info, ('episodes', ..., {dict})), 1)
481 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 482
bdd0b75e 483 season_id = bangumi_info.get('season_id')
c90c5b9b 484 season_number = season_id and next((
485 idx + 1 for idx, e in enumerate(
bdd0b75e 486 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 487 if e.get('season_id') == season_id
488 ), None)
06167fbb 489
bdd0b75e
GS
490 aid = episode_info.get('aid')
491
e88d44c6 492 return {
ad974876
L
493 'id': video_id,
494 'formats': formats,
bdd0b75e
GS
495 **traverse_obj(bangumi_info, {
496 'series': ('series', 'series_title', {str}),
497 'series_id': ('series', 'series_id', {str_or_none}),
498 'thumbnail': ('square_cover', {url_or_none}),
499 }),
500 'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
501 'episode': episode_info.get('long_title'),
502 'episode_id': episode_id,
503 'episode_number': int_or_none(episode_info.get('title')) or episode_number,
504 'season_id': str_or_none(season_id),
c90c5b9b 505 'season_number': season_number,
bdd0b75e 506 'timestamp': int_or_none(episode_info.get('pub_time')),
c90c5b9b 507 'duration': float_or_none(play_info.get('timelength'), scale=1000),
bdd0b75e
GS
508 'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
509 '__post_extractor': self.extract_comments(aid),
510 'http_headers': headers,
e88d44c6 511 }
bd8f48c7 512
bd8f48c7 513
bdd0b75e 514class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 515 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 516 _TESTS = [{
ad974876 517 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 518 'info_dict': {
ad974876 519 'id': '24097891',
bd8f48c7 520 },
ad974876 521 'playlist_mincount': 25,
bd8f48c7
YCH
522 }]
523
bd8f48c7 524 def _real_extract(self, url):
ad974876
L
525 media_id = self._match_id(url)
526 webpage = self._download_webpage(url, media_id)
bdd0b75e
GS
527 ss_id = self._search_json(
528 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
529
530 return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
531
bd8f48c7 532
bdd0b75e 533class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 534 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
535 _TESTS = [{
536 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
537 'info_dict': {
538 'id': '26801'
539 },
540 'playlist_mincount': 26
541 }]
542
543 def _real_extract(self, url):
544 ss_id = self._match_id(url)
bd8f48c7 545
bdd0b75e 546 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
4bc15a68
RA
547
548
2b9d0216
L
549class BilibiliSpaceBaseIE(InfoExtractor):
550 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 551 first_page = fetch_page(0)
2b9d0216
L
552 metadata = get_metadata(first_page)
553
554 paged_list = InAdvancePagedList(
12f153a8 555 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
556 metadata['page_count'], metadata['page_size'])
557
558 return metadata, paged_list
559
560
561class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
562 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 563 _TESTS = [{
564 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
565 'info_dict': {
566 'id': '3985676',
567 },
568 'playlist_mincount': 178,
6f10cdcf
E
569 }, {
570 'url': 'https://space.bilibili.com/313580179/video',
571 'info_dict': {
572 'id': '313580179',
573 },
574 'playlist_mincount': 92,
6efb0711 575 }]
576
6f10cdcf
E
577 def _extract_signature(self, playlist_id):
578 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
579
580 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
581 img_key = traverse_obj(
582 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
583 sub_key = traverse_obj(
584 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
585
586 session_key = img_key + sub_key
587
588 signature_values = []
589 for position in (
590 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
591 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
592 57, 62, 11, 36, 20, 34, 44, 52
593 ):
594 char_at_position = try_call(lambda: session_key[position])
595 if char_at_position:
596 signature_values.append(char_at_position)
597
598 return ''.join(signature_values)[:32]
599
2b9d0216
L
600 def _real_extract(self, url):
601 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
602 if not is_video_url:
603 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
604 'To download audios, add a "/audio" to the URL')
605
6f10cdcf
E
606 signature = self._extract_signature(playlist_id)
607
2b9d0216 608 def fetch_page(page_idx):
6f10cdcf
E
609 query = {
610 'keyword': '',
611 'mid': playlist_id,
612 'order': 'pubdate',
613 'order_avoided': 'true',
614 'platform': 'web',
615 'pn': page_idx + 1,
616 'ps': 30,
617 'tid': 0,
618 'web_location': 1550101,
619 'wts': int(time.time()),
620 }
621 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
622
12f153a8 623 try:
6f10cdcf
E
624 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
625 playlist_id, note=f'Downloading page {page_idx}', query=query)
12f153a8 626 except ExtractorError as e:
3d2623a8 627 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
628 raise ExtractorError(
629 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
630 raise
631 if response['code'] == -401:
632 raise ExtractorError(
633 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
634 return response['data']
2b9d0216
L
635
636 def get_metadata(page_data):
637 page_size = page_data['page']['ps']
638 entry_count = page_data['page']['count']
639 return {
640 'page_count': math.ceil(entry_count / page_size),
641 'page_size': page_size,
642 }
6efb0711 643
2b9d0216
L
644 def get_entries(page_data):
645 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
646 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 647
2b9d0216
L
648 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
649 return self.playlist_result(paged_list, playlist_id)
6efb0711 650
6efb0711 651
2b9d0216
L
652class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
653 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
654 _TESTS = [{
6f10cdcf 655 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 656 'info_dict': {
6f10cdcf 657 'id': '313580179',
2b9d0216
L
658 },
659 'playlist_mincount': 1,
660 }]
661
662 def _real_extract(self, url):
663 playlist_id = self._match_id(url)
664
665 def fetch_page(page_idx):
666 return self._download_json(
667 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
668 note=f'Downloading page {page_idx}',
12f153a8 669 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
670
671 def get_metadata(page_data):
672 return {
673 'page_count': page_data['pageCount'],
674 'page_size': page_data['pageSize'],
675 }
676
677 def get_entries(page_data):
678 for entry in page_data.get('data', []):
679 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
680
681 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
682 return self.playlist_result(paged_list, playlist_id)
683
684
9e68747f 685class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
686 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
687 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
688 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
689
690 def _get_uploader(self, uid, playlist_id):
691 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
692 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
693
694 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
695 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
696 metadata.pop('page_count', None)
697 metadata.pop('page_size', None)
698 return metadata, page_list
699
700
701class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
702 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
703 _TESTS = [{
704 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
705 'info_dict': {
706 'id': '2142762_57445',
9e68747f 707 'title': '【完结】《底特律 变人》全结局流程解说',
708 'description': '',
709 'uploader': '老戴在此',
710 'uploader_id': '2142762',
711 'timestamp': int,
712 'upload_date': str,
713 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
714 },
715 'playlist_mincount': 31,
716 }]
06167fbb 717
718 def _real_extract(self, url):
2b9d0216
L
719 mid, sid = self._match_valid_url(url).group('mid', 'sid')
720 playlist_id = f'{mid}_{sid}'
721
722 def fetch_page(page_idx):
723 return self._download_json(
724 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
725 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 726 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
727
728 def get_metadata(page_data):
729 page_size = page_data['page']['page_size']
730 entry_count = page_data['page']['total']
731 return {
732 'page_count': math.ceil(entry_count / page_size),
733 'page_size': page_size,
9e68747f 734 'uploader': self._get_uploader(mid, playlist_id),
735 **traverse_obj(page_data, {
736 'title': ('meta', 'name', {str}),
737 'description': ('meta', 'description', {str}),
738 'uploader_id': ('meta', 'mid', {str_or_none}),
739 'timestamp': ('meta', 'ptime', {int_or_none}),
740 'thumbnail': ('meta', 'cover', {url_or_none}),
741 })
2b9d0216
L
742 }
743
744 def get_entries(page_data):
9e68747f 745 return self._get_entries(page_data, 'archives')
2b9d0216
L
746
747 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 748 return self.playlist_result(paged_list, playlist_id, **metadata)
749
750
751class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
752 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
753 _TESTS = [{
754 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
755 'info_dict': {
756 'id': '1958703906_547718',
757 'title': '直播回放',
758 'description': '直播回放',
759 'uploader': '靡烟miya',
760 'uploader_id': '1958703906',
761 'timestamp': 1637985853,
762 'upload_date': '20211127',
763 'modified_timestamp': int,
764 'modified_date': str,
765 },
766 'playlist_mincount': 513,
767 }]
768
769 def _real_extract(self, url):
770 mid, sid = self._match_valid_url(url).group('mid', 'sid')
771 playlist_id = f'{mid}_{sid}'
772 playlist_meta = traverse_obj(self._download_json(
773 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
774 ), {
775 'title': ('data', 'meta', 'name', {str}),
776 'description': ('data', 'meta', 'description', {str}),
777 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
778 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
779 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
780 })
781
782 def fetch_page(page_idx):
783 return self._download_json(
784 'https://api.bilibili.com/x/series/archives',
785 playlist_id, note=f'Downloading page {page_idx}',
786 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
787
788 def get_metadata(page_data):
789 page_size = page_data['page']['size']
790 entry_count = page_data['page']['total']
791 return {
792 'page_count': math.ceil(entry_count / page_size),
793 'page_size': page_size,
794 'uploader': self._get_uploader(mid, playlist_id),
795 **playlist_meta
796 }
797
798 def get_entries(page_data):
799 return self._get_entries(page_data, 'archives')
800
801 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
802 return self.playlist_result(paged_list, playlist_id, **metadata)
803
804
805class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
806 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
807 _TESTS = [{
808 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
809 'info_dict': {
810 'id': '1103407912',
811 'title': '【V2】(旧)',
812 'description': '',
813 'uploader': '晓月春日',
814 'uploader_id': '84912',
815 'timestamp': 1604905176,
816 'upload_date': '20201109',
817 'modified_timestamp': int,
818 'modified_date': str,
819 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
820 'view_count': int,
821 'like_count': int,
822 },
823 'playlist_mincount': 22,
824 }, {
825 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
826 'only_matching': True,
827 }]
828
829 def _real_extract(self, url):
830 fid = self._match_id(url)
831
832 list_info = self._download_json(
833 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
834 fid, note='Downloading favlist metadata')
835 if list_info['code'] == -403:
836 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
837
838 entries = self._get_entries(self._download_json(
839 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
840 fid, note='Download favlist entries'), 'data')
841
842 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
843 'title': ('title', {str}),
844 'description': ('intro', {str}),
845 'uploader': ('upper', 'name', {str}),
846 'uploader_id': ('upper', 'mid', {str_or_none}),
847 'timestamp': ('ctime', {int_or_none}),
848 'modified_timestamp': ('mtime', {int_or_none}),
849 'thumbnail': ('cover', {url_or_none}),
850 'view_count': ('cnt_info', 'play', {int_or_none}),
851 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
852 })))
853
854
855class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
856 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
857 _TESTS = [{
858 'url': 'https://www.bilibili.com/watchlater/#/list',
859 'info_dict': {'id': 'watchlater'},
860 'playlist_mincount': 0,
861 'skip': 'login required',
862 }]
863
864 def _real_extract(self, url):
865 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
866 watchlater_info = self._download_json(
867 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
868 if watchlater_info['code'] == -101:
869 self.raise_login_required(msg='You need to login to access your watchlater list')
870 entries = self._get_entries(watchlater_info, ('data', 'list'))
871 return self.playlist_result(entries, id=list_id, title='稍后再看')
872
873
874class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
875 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
876 _TESTS = [{
877 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
878 'info_dict': {
879 'id': '5_547718',
880 'title': '直播回放',
881 'uploader': '靡烟miya',
882 'uploader_id': '1958703906',
883 'timestamp': 1637985853,
884 'upload_date': '20211127',
885 },
886 'playlist_mincount': 513,
887 }, {
888 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
889 'info_dict': {
890 'id': '5_547718',
891 },
892 'playlist_mincount': 513,
893 'skip': 'redirect url',
894 }, {
895 'url': 'https://www.bilibili.com/list/ml1103407912',
896 'info_dict': {
897 'id': '3_1103407912',
898 'title': '【V2】(旧)',
899 'uploader': '晓月春日',
900 'uploader_id': '84912',
901 'timestamp': 1604905176,
902 'upload_date': '20201109',
903 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
904 },
905 'playlist_mincount': 22,
906 }, {
907 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
908 'info_dict': {
909 'id': '3_1103407912',
910 },
911 'playlist_mincount': 22,
912 'skip': 'redirect url',
913 }, {
914 'url': 'https://www.bilibili.com/list/watchlater',
915 'info_dict': {'id': 'watchlater'},
916 'playlist_mincount': 0,
917 'skip': 'login required',
918 }, {
919 'url': 'https://www.bilibili.com/medialist/play/watchlater',
920 'info_dict': {'id': 'watchlater'},
921 'playlist_mincount': 0,
922 'skip': 'login required',
923 }]
924
925 def _extract_medialist(self, query, list_id):
926 for page_num in itertools.count(1):
927 page_data = self._download_json(
928 'https://api.bilibili.com/x/v2/medialist/resource/list',
929 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
930 )['data']
931 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
932 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
933 if not page_data.get('has_more', False):
934 break
935
936 def _real_extract(self, url):
937 list_id = self._match_id(url)
938 webpage = self._download_webpage(url, list_id)
939 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
940 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
941 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
942 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
943 if error_code == -400 and list_id == 'watchlater':
944 self.raise_login_required('You need to login to access your watchlater playlist')
945 elif error_code == -403:
946 self.raise_login_required('This is a private playlist. You need to login as its owner')
947 elif error_code == 11010:
948 raise ExtractorError('Playlist is no longer available', expected=True)
949 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
950
951 query = {
952 'ps': 20,
953 'with_current': False,
954 **traverse_obj(initial_state, {
955 'type': ('playlist', 'type', {int_or_none}),
956 'biz_id': ('playlist', 'id', {int_or_none}),
957 'tid': ('tid', {int_or_none}),
958 'sort_field': ('sortFiled', {int_or_none}),
959 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
960 })
961 }
962 metadata = {
963 'id': f'{query["type"]}_{query["biz_id"]}',
964 **traverse_obj(initial_state, ('mediaListInfo', {
965 'title': ('title', {str}),
966 'uploader': ('upper', 'name', {str}),
967 'uploader_id': ('upper', 'mid', {str_or_none}),
968 'timestamp': ('ctime', {int_or_none}),
969 'thumbnail': ('cover', {url_or_none}),
970 })),
971 }
972 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 973
974
c34f505b 975class BilibiliCategoryIE(InfoExtractor):
976 IE_NAME = 'Bilibili category extractor'
977 _MAX_RESULTS = 1000000
9e68747f 978 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 979 _TESTS = [{
980 'url': 'https://www.bilibili.com/v/kichiku/mad',
981 'info_dict': {
982 'id': 'kichiku: mad',
983 'title': 'kichiku: mad'
984 },
985 'playlist_mincount': 45,
986 'params': {
987 'playlistend': 45
988 }
989 }]
990
991 def _fetch_page(self, api_url, num_pages, query, page_num):
992 parsed_json = self._download_json(
993 api_url, query, query={'Search_key': query, 'pn': page_num},
994 note='Extracting results from page %s of %s' % (page_num, num_pages))
995
f8580bf0 996 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 997 if not video_list:
998 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
999
1000 for video in video_list:
1001 yield self.url_result(
1002 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1003
1004 def _entries(self, category, subcategory, query):
1005 # map of categories : subcategories : RIDs
1006 rid_map = {
1007 'kichiku': {
1008 'mad': 26,
1009 'manual_vocaloid': 126,
1010 'guide': 22,
1011 'theatre': 216,
1012 'course': 127
1013 },
1014 }
1015
1016 if category not in rid_map:
e88d44c6 1017 raise ExtractorError(
1018 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1019 if subcategory not in rid_map[category]:
e88d44c6 1020 raise ExtractorError(
1021 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1022 rid_value = rid_map[category][subcategory]
1023
1024 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1025 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1026 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1027 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1028 if count is None or not size:
1029 raise ExtractorError('Failed to calculate either page count or size')
1030
1031 num_pages = math.ceil(count / size)
1032
1033 return OnDemandPagedList(functools.partial(
1034 self._fetch_page, api_url, num_pages, query), size)
1035
1036 def _real_extract(self, url):
ad974876 1037 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1038 query = '%s: %s' % (category, subcategory)
1039
1040 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1041
1042
06167fbb 1043class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1044 IE_DESC = 'Bilibili video search'
06167fbb 1045 _MAX_RESULTS = 100000
1046 _SEARCH_KEY = 'bilisearch'
06167fbb 1047
e88d44c6 1048 def _search_results(self, query):
1049 for page_num in itertools.count(1):
1050 videos = self._download_json(
1051 'https://api.bilibili.com/x/web-interface/search/type', query,
1052 note=f'Extracting results from page {page_num}', query={
1053 'Search_key': query,
1054 'keyword': query,
1055 'page': page_num,
1056 'context': '',
e88d44c6 1057 'duration': 0,
1058 'tids_2': '',
1059 '__refresh__': 'true',
1060 'search_type': 'video',
1061 'tids': 0,
1062 'highlight': 1,
2d101954 1063 })['data'].get('result')
1064 if not videos:
1065 break
06167fbb 1066 for video in videos:
e88d44c6 1067 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1068
1069
4bc15a68
RA
1070class BilibiliAudioBaseIE(InfoExtractor):
1071 def _call_api(self, path, sid, query=None):
1072 if not query:
1073 query = {'sid': sid}
1074 return self._download_json(
1075 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1076 sid, query=query)['data']
1077
1078
1079class BilibiliAudioIE(BilibiliAudioBaseIE):
1080 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1081 _TEST = {
1082 'url': 'https://www.bilibili.com/audio/au1003142',
1083 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1084 'info_dict': {
1085 'id': '1003142',
1086 'ext': 'm4a',
1087 'title': '【tsukimi】YELLOW / 神山羊',
1088 'artist': 'tsukimi',
1089 'comment_count': int,
1090 'description': 'YELLOW的mp3版!',
1091 'duration': 183,
1092 'subtitles': {
1093 'origin': [{
1094 'ext': 'lrc',
1095 }],
1096 },
1097 'thumbnail': r're:^https?://.+\.jpg',
1098 'timestamp': 1564836614,
1099 'upload_date': '20190803',
1100 'uploader': 'tsukimi-つきみぐー',
1101 'view_count': int,
1102 },
1103 }
1104
1105 def _real_extract(self, url):
1106 au_id = self._match_id(url)
1107
1108 play_data = self._call_api('url', au_id)
1109 formats = [{
1110 'url': play_data['cdns'][0],
1111 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1112 'vcodec': 'none'
4bc15a68
RA
1113 }]
1114
6d1b3489 1115 for a_format in formats:
1116 a_format.setdefault('http_headers', {}).update({
1117 'Referer': url,
1118 })
1119
4bc15a68
RA
1120 song = self._call_api('song/info', au_id)
1121 title = song['title']
1122 statistic = song.get('statistic') or {}
1123
1124 subtitles = None
1125 lyric = song.get('lyric')
1126 if lyric:
1127 subtitles = {
1128 'origin': [{
1129 'url': lyric,
1130 }]
1131 }
1132
1133 return {
1134 'id': au_id,
1135 'title': title,
1136 'formats': formats,
1137 'artist': song.get('author'),
1138 'comment_count': int_or_none(statistic.get('comment')),
1139 'description': song.get('intro'),
1140 'duration': int_or_none(song.get('duration')),
1141 'subtitles': subtitles,
1142 'thumbnail': song.get('cover'),
1143 'timestamp': int_or_none(song.get('passtime')),
1144 'uploader': song.get('uname'),
1145 'view_count': int_or_none(statistic.get('play')),
1146 }
1147
1148
1149class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1150 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1151 _TEST = {
1152 'url': 'https://www.bilibili.com/audio/am10624',
1153 'info_dict': {
1154 'id': '10624',
1155 'title': '每日新曲推荐(每日11:00更新)',
1156 'description': '每天11:00更新,为你推送最新音乐',
1157 },
1158 'playlist_count': 19,
1159 }
1160
1161 def _real_extract(self, url):
1162 am_id = self._match_id(url)
1163
1164 songs = self._call_api(
1165 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1166
1167 entries = []
1168 for song in songs:
1169 sid = str_or_none(song.get('id'))
1170 if not sid:
1171 continue
1172 entries.append(self.url_result(
1173 'https://www.bilibili.com/audio/au' + sid,
1174 BilibiliAudioIE.ie_key(), sid))
1175
1176 if entries:
1177 album_data = self._call_api('menu/info', am_id) or {}
1178 album_title = album_data.get('title')
1179 if album_title:
1180 for entry in entries:
1181 entry['album'] = album_title
1182 return self.playlist_result(
1183 entries, am_id, album_title, album_data.get('intro'))
1184
1185 return self.playlist_result(entries, am_id)
63dce309
S
1186
1187
1188class BiliBiliPlayerIE(InfoExtractor):
1189 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1190 _TEST = {
1191 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1192 'only_matching': True,
1193 }
1194
1195 def _real_extract(self, url):
1196 video_id = self._match_id(url)
1197 return self.url_result(
1198 'http://www.bilibili.tv/video/av%s/' % video_id,
1199 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1200
1201
1202class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1203 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1204 _NETRC_MACHINE = 'biliintl'
16f7e6be 1205
c62ecf0d 1206 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1207 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1208 if json.get('code'):
1209 if json['code'] in (10004004, 10004005, 10023006):
1210 self.raise_login_required()
1211 elif json['code'] == 10004001:
1212 self.raise_geo_restricted()
1213 else:
1214 if json.get('message') and str(json['code']) != json['message']:
1215 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1216 else:
1217 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1218 if kwargs.get('fatal'):
1219 raise ExtractorError(errmsg)
1220 else:
1221 self.report_warning(errmsg)
1222 return json.get('data')
16f7e6be 1223
efc947fb 1224 def json2srt(self, json):
1225 data = '\n\n'.join(
1226 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1227 for i, line in enumerate(traverse_obj(json, (
1228 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1229 return data
1230
f5f15c99
LR
1231 def _get_subtitles(self, *, ep_id=None, aid=None):
1232 sub_json = self._call_api(
fbb888a3 1233 '/web/v2/subtitle', ep_id or aid, fatal=False,
1234 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1235 query=filter_dict({
f5f15c99 1236 'platform': 'web',
fbb888a3 1237 's_locale': 'en_US',
f5f15c99
LR
1238 'episode_id': ep_id,
1239 'aid': aid,
fbb888a3 1240 })) or {}
16f7e6be 1241 subtitles = {}
c62ecf0d 1242 for sub in sub_json.get('subtitles') or []:
16f7e6be
AG
1243 sub_url = sub.get('url')
1244 if not sub_url:
1245 continue
c62ecf0d 1246 sub_data = self._download_json(
f5f15c99 1247 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
c62ecf0d 1248 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
efc947fb 1249 if not sub_data:
1250 continue
c62ecf0d 1251 subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
efc947fb 1252 'ext': 'srt',
1253 'data': self.json2srt(sub_data)
16f7e6be
AG
1254 })
1255 return subtitles
1256
f5f15c99
LR
1257 def _get_formats(self, *, ep_id=None, aid=None):
1258 video_json = self._call_api(
1259 '/web/playurl', ep_id or aid, note='Downloading video formats',
1260 errnote='Unable to download video formats', query=filter_dict({
1261 'platform': 'web',
1262 'ep_id': ep_id,
1263 'aid': aid,
1264 }))
16f7e6be
AG
1265 video_json = video_json['playurl']
1266 formats = []
c62ecf0d 1267 for vid in video_json.get('video') or []:
16f7e6be
AG
1268 video_res = vid.get('video_resource') or {}
1269 video_info = vid.get('stream_info') or {}
1270 if not video_res.get('url'):
1271 continue
1272 formats.append({
1273 'url': video_res['url'],
1274 'ext': 'mp4',
1275 'format_note': video_info.get('desc_words'),
1276 'width': video_res.get('width'),
1277 'height': video_res.get('height'),
1278 'vbr': video_res.get('bandwidth'),
1279 'acodec': 'none',
1280 'vcodec': video_res.get('codecs'),
1281 'filesize': video_res.get('size'),
1282 })
c62ecf0d 1283 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1284 if not aud.get('url'):
1285 continue
1286 formats.append({
1287 'url': aud['url'],
1288 'ext': 'mp4',
1289 'abr': aud.get('bandwidth'),
1290 'acodec': aud.get('codecs'),
1291 'vcodec': 'none',
1292 'filesize': aud.get('size'),
1293 })
1294
16f7e6be
AG
1295 return formats
1296
26fdfc37 1297 def _parse_video_metadata(self, video_data):
16f7e6be 1298 return {
f5f15c99
LR
1299 'title': video_data.get('title_display') or video_data.get('title'),
1300 'thumbnail': video_data.get('cover'),
c62ecf0d 1301 'episode_number': int_or_none(self._search_regex(
f5f15c99 1302 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1303 }
1304
52efa4b3 1305 def _perform_login(self, username, password):
65f6e807 1306 if not Cryptodome.RSA:
f6a765ce 1307 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1308
1309 key_data = self._download_json(
1310 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1311 note='Downloading login key', errnote='Unable to download login key')['data']
1312
65f6e807 1313 public_key = Cryptodome.RSA.importKey(key_data['key'])
1314 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1315 login_post = self._download_json(
1316 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1317 'username': username,
1318 'password': base64.b64encode(password_hash).decode('ascii'),
1319 'keep_me': 'true',
1320 's_locale': 'en_US',
1321 'isTrusted': 'true'
1322 }), note='Logging in', errnote='Unable to log in')
1323 if login_post.get('code'):
1324 if login_post.get('message'):
1325 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1326 else:
1327 raise ExtractorError('Unable to log in')
1328
16f7e6be
AG
1329
1330class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1331 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1332 _TESTS = [{
cfcf60ea 1333 # Bstation page
16f7e6be
AG
1334 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1335 'info_dict': {
1336 'id': '341736',
1337 'ext': 'mp4',
c62ecf0d
M
1338 'title': 'E2 - The First Night',
1339 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1340 'episode_number': 2,
d37422f1
H
1341 'upload_date': '20201009',
1342 'episode': 'Episode 2',
1343 'timestamp': 1602259500,
1344 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1345 'chapters': [{
1346 'start_time': 0,
1347 'end_time': 76.242,
1348 'title': '<Untitled Chapter 1>'
1349 }, {
1350 'start_time': 76.242,
1351 'end_time': 161.161,
1352 'title': 'Intro'
1353 }, {
1354 'start_time': 1325.742,
1355 'end_time': 1403.903,
1356 'title': 'Outro'
1357 }],
c62ecf0d 1358 }
16f7e6be 1359 }, {
cfcf60ea 1360 # Non-Bstation page
c62ecf0d 1361 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1362 'info_dict': {
c62ecf0d 1363 'id': '11005006',
16f7e6be 1364 'ext': 'mp4',
c62ecf0d
M
1365 'title': 'E3 - Who?',
1366 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1367 'episode_number': 3,
d37422f1
H
1368 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1369 'episode': 'Episode 3',
1370 'upload_date': '20211219',
1371 'timestamp': 1639928700,
0ba87dd2
H
1372 'chapters': [{
1373 'start_time': 0,
1374 'end_time': 88.0,
1375 'title': '<Untitled Chapter 1>'
1376 }, {
1377 'start_time': 88.0,
1378 'end_time': 156.0,
1379 'title': 'Intro'
1380 }, {
1381 'start_time': 1173.0,
1382 'end_time': 1259.535,
1383 'title': 'Outro'
1384 }],
c62ecf0d 1385 }
cfcf60ea
M
1386 }, {
1387 # Subtitle with empty content
1388 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1389 'info_dict': {
1390 'id': '10131790',
1391 'ext': 'mp4',
1392 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1393 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1394 'episode_number': 140,
1395 },
1396 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
d37422f1
H
1397 }, {
1398 'url': 'https://www.bilibili.tv/en/video/2041863208',
1399 'info_dict': {
1400 'id': '2041863208',
1401 'ext': 'mp4',
1402 'timestamp': 1670874843,
1403 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1404 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1405 'upload_date': '20221212',
1406 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
b093c38c
H
1407 },
1408 }, {
1409 # episode comment extraction
1410 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1411 'info_dict': {
1412 'id': '340317',
1413 'ext': 'mp4',
1414 'timestamp': 1604057820,
1415 'upload_date': '20201030',
1416 'episode_number': 5,
1417 'title': 'E5 - My Own Steel',
1418 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1419 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1420 'episode': 'Episode 5',
1421 'comment_count': int,
1422 'chapters': [{
1423 'start_time': 0,
1424 'end_time': 61.0,
1425 'title': '<Untitled Chapter 1>'
1426 }, {
1427 'start_time': 61.0,
1428 'end_time': 134.0,
1429 'title': 'Intro'
1430 }, {
1431 'start_time': 1290.0,
1432 'end_time': 1379.0,
1433 'title': 'Outro'
1434 }],
1435 },
1436 'params': {
1437 'getcomments': True
1438 }
1439 }, {
1440 # user generated content comment extraction
1441 'url': 'https://www.bilibili.tv/en/video/2045730385',
1442 'info_dict': {
1443 'id': '2045730385',
1444 'ext': 'mp4',
1445 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1446 'timestamp': 1667891924,
1447 'upload_date': '20221108',
1448 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1449 'comment_count': int,
1450 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1451 },
1452 'params': {
1453 'getcomments': True
d37422f1 1454 }
0ba87dd2
H
1455 }, {
1456 # episode id without intro and outro
1457 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1458 'info_dict': {
1459 'id': '11246489',
1460 'ext': 'mp4',
1461 'title': 'E1 - Operation \'Strix\' <Owl>',
1462 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1463 'timestamp': 1649516400,
1464 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1465 'episode': 'Episode 1',
1466 'episode_number': 1,
1467 'upload_date': '20220409',
1468 },
c62ecf0d
M
1469 }, {
1470 'url': 'https://www.biliintl.com/en/play/34613/341736',
1471 'only_matching': True,
f5f15c99
LR
1472 }, {
1473 # User-generated content (as opposed to a series licensed from a studio)
1474 'url': 'https://bilibili.tv/en/video/2019955076',
1475 'only_matching': True,
1476 }, {
1477 # No language in URL
1478 'url': 'https://www.bilibili.tv/video/2019955076',
1479 'only_matching': True,
0831d95c 1480 }, {
1481 # Uppercase language in URL
1482 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1483 'only_matching': True,
16f7e6be
AG
1484 }]
1485
26fdfc37 1486 def _make_url(video_id, series_id=None):
1487 if series_id:
1488 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1489 return f'https://www.bilibili.tv/en/video/{video_id}'
1490
1491 def _extract_video_metadata(self, url, video_id, season_id):
1492 url, smuggled_data = unsmuggle_url(url, {})
1493 if smuggled_data.get('title'):
1494 return smuggled_data
1495
c62ecf0d
M
1496 webpage = self._download_webpage(url, video_id)
1497 # Bstation layout
8072ef2b 1498 initial_data = (
1499 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1500 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1501 video_data = traverse_obj(
d37422f1 1502 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1503
f5f15c99 1504 if season_id and not video_data:
c62ecf0d
M
1505 # Non-Bstation layout, read through episode list
1506 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1507 video_data = traverse_obj(season_json, (
1508 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1509 ), expected_type=dict, get_all=False)
1510
d37422f1
H
1511 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1512 return merge_dicts(
b093c38c 1513 self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
d37422f1
H
1514 'title': self._html_search_meta('og:title', webpage),
1515 'description': self._html_search_meta('og:description', webpage)
1516 })
26fdfc37 1517
b093c38c
H
1518 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1519 comment_api_raw_data = self._download_json(
1520 'https://api.bilibili.tv/reply/web/detail', display_id,
1521 note=f'Downloading reply comment of {root_id} - {next_id}',
1522 query={
1523 'platform': 'web',
1524 'ps': 20, # comment's reply per page (default: 3)
1525 'root': root_id,
1526 'next': next_id,
1527 })
1528
1529 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1530 yield {
1531 'author': traverse_obj(replies, ('member', 'name')),
1532 'author_id': traverse_obj(replies, ('member', 'mid')),
1533 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1534 'text': traverse_obj(replies, ('content', 'message')),
1535 'id': replies.get('rpid'),
1536 'like_count': int_or_none(replies.get('like_count')),
1537 'parent': replies.get('parent'),
1538 'timestamp': unified_timestamp(replies.get('ctime_text'))
1539 }
1540
1541 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1542 yield from self._get_comments_reply(
1543 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1544
1545 def _get_comments(self, video_id, ep_id):
1546 for i in itertools.count(0):
1547 comment_api_raw_data = self._download_json(
1548 'https://api.bilibili.tv/reply/web/root', video_id,
1549 note=f'Downloading comment page {i + 1}',
1550 query={
1551 'platform': 'web',
1552 'pn': i, # page number
1553 'ps': 20, # comment per page (default: 20)
1554 'oid': video_id,
1555 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
1556 'sort_type': 1, # 1: best, 2: recent
1557 })
1558
1559 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1560 yield {
1561 'author': traverse_obj(replies, ('member', 'name')),
1562 'author_id': traverse_obj(replies, ('member', 'mid')),
1563 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1564 'text': traverse_obj(replies, ('content', 'message')),
1565 'id': replies.get('rpid'),
1566 'like_count': int_or_none(replies.get('like_count')),
1567 'timestamp': unified_timestamp(replies.get('ctime_text')),
1568 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1569 }
1570 if replies.get('count'):
1571 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1572
1573 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1574 break
1575
26fdfc37 1576 def _real_extract(self, url):
1577 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1578 video_id = ep_id or aid
0ba87dd2
H
1579 chapters = None
1580
1581 if ep_id:
1582 intro_ending_json = self._call_api(
1583 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1584 video_id, fatal=False) or {}
1585 if intro_ending_json.get('skip'):
1586 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1587 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1588 chapters = [{
1589 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1590 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1591 'title': 'Intro'
1592 }, {
1593 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1594 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1595 'title': 'Outro'
1596 }]
26fdfc37 1597
1598 return {
1599 'id': video_id,
1600 **self._extract_video_metadata(url, video_id, season_id),
1601 'formats': self._get_formats(ep_id=ep_id, aid=aid),
1602 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c
H
1603 'chapters': chapters,
1604 '__post_extractor': self.extract_comments(video_id, ep_id)
26fdfc37 1605 }
16f7e6be
AG
1606
1607
1608class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 1609 IE_NAME = 'biliIntl:series'
76c3cecc 1610 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
1611 _TESTS = [{
1612 'url': 'https://www.bilibili.tv/en/play/34613',
1613 'playlist_mincount': 15,
1614 'info_dict': {
1615 'id': '34613',
76c3cecc
H
1616 'title': 'TONIKAWA: Over the Moon For You',
1617 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1618 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
1619 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1620 'view_count': int,
16f7e6be
AG
1621 },
1622 'params': {
1623 'skip_download': True,
16f7e6be 1624 },
76c3cecc
H
1625 }, {
1626 'url': 'https://www.bilibili.tv/en/media/1048837',
1627 'info_dict': {
1628 'id': '1048837',
1629 'title': 'SPY×FAMILY',
1630 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1631 'categories': ['Adventure', 'Action', 'Comedy'],
1632 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1633 'view_count': int,
1634 },
1635 'playlist_mincount': 25,
16f7e6be
AG
1636 }, {
1637 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 1638 'only_matching': True,
0831d95c 1639 }, {
1640 'url': 'https://www.biliintl.com/EN/play/34613',
1641 'only_matching': True,
16f7e6be
AG
1642 }]
1643
c62ecf0d
M
1644 def _entries(self, series_id):
1645 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 1646 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1647 episode_id = str(episode['episode_id'])
1648 yield self.url_result(smuggle_url(
1649 BiliIntlIE._make_url(episode_id, series_id),
1650 self._parse_video_metadata(episode)
1651 ), BiliIntlIE, episode_id)
16f7e6be
AG
1652
1653 def _real_extract(self, url):
c62ecf0d
M
1654 series_id = self._match_id(url)
1655 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1656 return self.playlist_result(
1657 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1658 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1659 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
1660
1661
1662class BiliLiveIE(InfoExtractor):
9e68747f 1663 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
1664
1665 _TESTS = [{
1666 'url': 'https://live.bilibili.com/196',
1667 'info_dict': {
1668 'id': '33989',
1669 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1670 'ext': 'flv',
1671 'title': "太空狼人杀联动,不被爆杀就算赢",
1672 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1673 'timestamp': 1650802769,
1674 },
1675 'skip': 'not live'
1676 }, {
1677 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1678 'only_matching': True
1c226ccd 1679 }, {
1680 'url': 'https://live.bilibili.com/blanc/196',
1681 'only_matching': True
b4f53662
H
1682 }]
1683
1684 _FORMATS = {
1685 80: {'format_id': 'low', 'format_note': '流畅'},
1686 150: {'format_id': 'high_res', 'format_note': '高清'},
1687 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1688 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1689 10000: {'format_id': 'source', 'format_note': '原画'},
1690 20000: {'format_id': '4K', 'format_note': '4K'},
1691 30000: {'format_id': 'dolby', 'format_note': '杜比'},
1692 }
1693
1694 _quality = staticmethod(qualities(list(_FORMATS)))
1695
1696 def _call_api(self, path, room_id, query):
1697 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1698 if api_result.get('code') != 0:
1699 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1700 return api_result.get('data') or {}
1701
1702 def _parse_formats(self, qn, fmt):
1703 for codec in fmt.get('codec') or []:
1704 if codec.get('current_qn') != qn:
1705 continue
1706 for url_info in codec['url_info']:
1707 yield {
1708 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1709 'ext': fmt.get('format_name'),
1710 'vcodec': codec.get('codec_name'),
1711 'quality': self._quality(qn),
1712 **self._FORMATS[qn],
1713 }
1714
1715 def _real_extract(self, url):
1716 room_id = self._match_id(url)
1717 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1718 if room_data.get('live_status') == 0:
1719 raise ExtractorError('Streamer is not live', expected=True)
1720
1721 formats = []
1722 for qn in self._FORMATS.keys():
1723 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1724 'room_id': room_id,
1725 'qn': qn,
1726 'codec': '0,1',
1727 'format': '0,2',
1728 'mask': '0',
1729 'no_playurl': '0',
1730 'platform': 'web',
1731 'protocol': '0,1',
1732 })
1733 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1734 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
1735
1736 return {
1737 'id': room_id,
1738 'title': room_data.get('title'),
1739 'description': room_data.get('description'),
1740 'thumbnail': room_data.get('user_cover'),
1741 'timestamp': stream_data.get('live_time'),
1742 'formats': formats,
ca2f6e14 1743 'is_live': True,
b4f53662
H
1744 'http_headers': {
1745 'Referer': url,
1746 },
1747 }