]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[ie/bilibili] Add support for series, favorites and watch later (#7518)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
6f10cdcf 3import hashlib
ad974876 4import itertools
c34f505b 5import math
5336bf57 6import re
6f10cdcf 7import time
ad974876 8import urllib.parse
28746fbd 9
06167fbb 10from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 11from ..dependencies import Cryptodome
3d2623a8 12from ..networking.exceptions import HTTPError
28746fbd 13from ..utils import (
bd8f48c7 14 ExtractorError,
ad974876 15 GeoRestrictedError,
2b9d0216
L
16 InAdvancePagedList,
17 OnDemandPagedList,
9e68747f 18 bool_or_none,
f5f15c99 19 filter_dict,
6461f2b7 20 float_or_none,
ad974876 21 format_field,
2b9d0216 22 int_or_none,
bdd0b75e 23 join_nonempty,
ad974876 24 make_archive_id,
d37422f1 25 merge_dicts,
f8580bf0 26 mimetype2ext,
2b9d0216 27 parse_count,
ad974876 28 parse_qs,
b4f53662 29 qualities,
26fdfc37 30 smuggle_url,
efc947fb 31 srt_subtitles_timecode,
4bc15a68 32 str_or_none,
2b9d0216 33 traverse_obj,
6f10cdcf 34 try_call,
b093c38c 35 unified_timestamp,
26fdfc37 36 unsmuggle_url,
c62ecf0d 37 url_or_none,
ad974876 38 urlencode_postdata,
9e68747f 39 variadic,
28746fbd
PH
40)
41
42
ad974876 43class BilibiliBaseIE(InfoExtractor):
5336bf57 44 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
45
ad974876
L
46 def extract_formats(self, play_info):
47 format_names = {
48 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
49 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
50 }
51
52 audios = traverse_obj(play_info, ('dash', 'audio', ...))
53 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
54 if flac_audio:
55 audios.append(flac_audio)
56 formats = [{
57 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
58 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
59 'acodec': audio.get('codecs'),
60 'vcodec': 'none',
61 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
5336bf57 62 'filesize': int_or_none(audio.get('size')),
63 'format_id': str_or_none(audio.get('id')),
ad974876
L
64 } for audio in audios]
65
66 formats.extend({
67 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
68 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
69 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
70 'width': int_or_none(video.get('width')),
71 'height': int_or_none(video.get('height')),
72 'vcodec': video.get('codecs'),
73 'acodec': 'none' if audios else None,
74 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
75 'filesize': int_or_none(video.get('size')),
76 'quality': int_or_none(video.get('id')),
5336bf57 77 'format_id': traverse_obj(
78 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
79 ('id', {str_or_none}), get_all=False),
ad974876
L
80 'format': format_names.get(video.get('id')),
81 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
82
83 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
84 if missing_formats:
85 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 86 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 87
ad974876
L
88 return formats
89
90 def json2srt(self, json_data):
91 srt_data = ''
92 for idx, line in enumerate(json_data.get('body') or []):
93 srt_data += (f'{idx + 1}\n'
94 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
95 f'{line["content"]}\n\n')
96 return srt_data
97
8a83baaf 98 def _get_subtitles(self, video_id, aid, cid):
ad974876
L
99 subtitles = {
100 'danmaku': [{
101 'ext': 'xml',
102 'url': f'https://comment.bilibili.com/{cid}.xml',
103 }]
104 }
105
8a83baaf
L
106 video_info_json = self._download_json(f'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id)
107 for s in traverse_obj(video_info_json, ('data', 'subtitle', 'subtitles', ...)):
ad974876
L
108 subtitles.setdefault(s['lan'], []).append({
109 'ext': 'srt',
110 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
111 })
112 return subtitles
113
c90c5b9b 114 def _get_chapters(self, aid, cid):
115 chapters = aid and cid and self._download_json(
116 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
117 note='Extracting chapters', fatal=False)
118 return traverse_obj(chapters, ('data', 'view_points', ..., {
119 'title': 'content',
120 'start_time': 'from',
121 'end_time': 'to',
122 })) or None
123
ad974876
L
124 def _get_comments(self, aid):
125 for idx in itertools.count(1):
126 replies = traverse_obj(
127 self._download_json(
128 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
129 aid, note=f'Extracting comments from page {idx}', fatal=False),
130 ('data', 'replies'))
131 if not replies:
132 return
133 for children in map(self._get_all_children, replies):
134 yield from children
135
136 def _get_all_children(self, reply):
137 yield {
138 'author': traverse_obj(reply, ('member', 'uname')),
139 'author_id': traverse_obj(reply, ('member', 'mid')),
140 'id': reply.get('rpid'),
141 'text': traverse_obj(reply, ('content', 'message')),
142 'timestamp': reply.get('ctime'),
143 'parent': reply.get('parent') or 'root',
144 }
145 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
146 yield from children
147
bdd0b75e
GS
148 def _get_episodes_from_season(self, ss_id, url):
149 season_info = self._download_json(
150 'https://api.bilibili.com/pgc/web/season/section', ss_id,
151 note='Downloading season info', query={'season_id': ss_id},
152 headers={'Referer': url, **self.geo_verification_headers()})
153
154 for entry in traverse_obj(season_info, (
155 'result', 'main_section', 'episodes',
156 lambda _, v: url_or_none(v['share_url']) and v['id'])):
157 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, f'ep{entry["id"]}')
158
ad974876
L
159
160class BiliBiliIE(BilibiliBaseIE):
9e68747f 161 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 162
bd8f48c7 163 _TESTS = [{
ad974876
L
164 'url': 'https://www.bilibili.com/video/BV13x41117TL',
165 'info_dict': {
166 'id': 'BV13x41117TL',
167 'title': '阿滴英文|英文歌分享#6 "Closer',
168 'ext': 'mp4',
169 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
170 'uploader_id': '65880958',
171 'uploader': '阿滴英文',
172 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
173 'duration': 554.117,
174 'tags': list,
175 'comment_count': int,
176 'upload_date': '20170301',
177 'timestamp': 1488353834,
178 'like_count': int,
179 'view_count': int,
180 },
181 }, {
182 # old av URL version
06167fbb 183 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 184 'info_dict': {
ad974876 185 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 186 'ext': 'mp4',
f8580bf0 187 'uploader': '菊子桑',
ad974876
L
188 'uploader_id': '156160',
189 'id': 'BV11x411K7CN',
190 'title': '【金坷垃】金泡沫',
191 'duration': 308.36,
f8580bf0 192 'upload_date': '20140420',
ad974876 193 'timestamp': 1397983878,
6461f2b7 194 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
195 'like_count': int,
196 'comment_count': int,
197 'view_count': int,
198 'tags': list,
199 },
c90c5b9b 200 'params': {'skip_download': True},
bd8f48c7 201 }, {
ad974876
L
202 'note': 'Anthology',
203 'url': 'https://www.bilibili.com/video/BV1bK411W797',
204 'info_dict': {
205 'id': 'BV1bK411W797',
206 'title': '物语中的人物是如何吐槽自己的OP的'
207 },
208 'playlist_count': 18,
209 'playlist': [{
210 'info_dict': {
211 'id': 'BV1bK411W797_p1',
212 'ext': 'mp4',
213 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
214 'tags': 'count:11',
215 'timestamp': 1589601697,
216 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
217 'uploader': '打牌还是打桩',
218 'uploader_id': '150259984',
219 'like_count': int,
220 'comment_count': int,
221 'upload_date': '20200516',
222 'view_count': int,
223 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
224 'duration': 90.314,
225 }
226 }]
06167fbb 227 }, {
ad974876
L
228 'note': 'Specific page of Anthology',
229 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
230 'info_dict': {
231 'id': 'BV1bK411W797_p1',
232 'ext': 'mp4',
233 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
234 'tags': 'count:11',
235 'timestamp': 1589601697,
236 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
237 'uploader': '打牌还是打桩',
238 'uploader_id': '150259984',
239 'like_count': int,
240 'comment_count': int,
241 'upload_date': '20200516',
242 'view_count': int,
243 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
244 'duration': 90.314,
245 }
bd8f48c7 246 }, {
ad974876
L
247 'note': 'video has subtitles',
248 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 249 'info_dict': {
ad974876 250 'id': 'BV12N4y1M7rh',
bd8f48c7 251 'ext': 'mp4',
c90c5b9b 252 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
253 'tags': list,
254 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
255 'duration': 313.557,
256 'upload_date': '20220709',
9e68747f 257 'uploader': '小夫太渴',
ad974876
L
258 'timestamp': 1657347907,
259 'uploader_id': '1326814124',
260 'comment_count': int,
261 'view_count': int,
262 'like_count': int,
263 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
264 'subtitles': 'count:2'
bd8f48c7 265 },
ad974876 266 'params': {'listsubtitles': True},
ca270371 267 }, {
ad974876 268 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 269 'info_dict': {
ad974876 270 'id': 'BV13x41117TL',
f8580bf0 271 'ext': 'mp4',
ca270371 272 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 273 'upload_date': '20170301',
c90c5b9b 274 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 275 'timestamp': 1488353834,
f8580bf0 276 'uploader_id': '65880958',
277 'uploader': '阿滴英文',
ad974876 278 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 279 'duration': 554.117,
ad974876
L
280 'tags': list,
281 'comment_count': int,
282 'view_count': int,
283 'like_count': int,
89fabf11
JN
284 },
285 'params': {
286 'skip_download': True,
287 },
c90c5b9b 288 }, {
289 'note': 'video has chapter',
290 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
291 'info_dict': {
292 'id': 'BV1vL411G7N7',
293 'ext': 'mp4',
294 'title': '如何为你的B站视频添加进度条分段',
295 'timestamp': 1634554558,
296 'upload_date': '20211018',
297 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
298 'tags': list,
299 'uploader': '爱喝咖啡的当麻',
300 'duration': 669.482,
301 'uploader_id': '1680903',
302 'chapters': 'count:6',
303 'comment_count': int,
304 'view_count': int,
305 'like_count': int,
306 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
307 },
308 'params': {'skip_download': True},
ab29e470 309 }, {
310 'note': 'video redirects to festival page',
311 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
312 'info_dict': {
313 'id': 'BV1wP4y1P72h',
314 'ext': 'mp4',
315 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
316 'timestamp': 1643947497,
317 'upload_date': '20220204',
318 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
319 'uploader': '叨叨冯聊音乐',
320 'duration': 246.719,
321 'uploader_id': '528182630',
322 'view_count': int,
323 'like_count': int,
324 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
325 },
326 'params': {'skip_download': True},
327 }, {
328 'note': 'newer festival video',
329 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
330 'info_dict': {
331 'id': 'BV1ay4y1d77f',
332 'ext': 'mp4',
333 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
334 'timestamp': 1674273600,
335 'upload_date': '20230121',
336 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
337 'uploader': '果蝇轰',
338 'duration': 1111.722,
339 'uploader_id': '8469526',
340 'view_count': int,
341 'like_count': int,
342 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
343 },
344 'params': {'skip_download': True},
bd8f48c7 345 }]
28746fbd 346
520e7533 347 def _real_extract(self, url):
ad974876 348 video_id = self._match_id(url)
6461f2b7 349 webpage = self._download_webpage(url, video_id)
c90c5b9b 350 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
ad974876 351
ab29e470 352 is_festival = 'videoData' not in initial_state
353 if is_festival:
354 video_data = initial_state['videoInfo']
355 else:
356 play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
357 video_data = initial_state['videoData']
358
ad974876 359 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 360
adc74b3c 361 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ab29e470 362 page_list_json = not is_festival and traverse_obj(
ad974876
L
363 self._download_json(
364 'https://api.bilibili.com/x/player/pagelist', video_id,
365 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
366 note='Extracting videos in anthology'),
367 'data', expected_type=list) or []
368 is_anthology = len(page_list_json) > 1
369
370 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
371 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
372 return self.playlist_from_matches(
373 page_list_json, video_id, title, ie=BiliBiliIE,
374 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 375
ad974876 376 if is_anthology:
f74371a9 377 part_id = part_id or 1
378 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 379
ad974876
L
380 aid = video_data.get('aid')
381 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 382
c90c5b9b 383 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
384
ab29e470 385 festival_info = {}
386 if is_festival:
387 play_info = self._download_json(
388 'https://api.bilibili.com/x/player/playurl', video_id,
389 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
390 note='Extracting festival video formats')['data']
391
392 festival_info = traverse_obj(initial_state, {
393 'uploader': ('videoInfo', 'upName'),
394 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
395 'like_count': ('videoStatus', 'like', {int_or_none}),
396 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
397 }, get_all=False)
398
ad974876 399 return {
ab29e470 400 **traverse_obj(initial_state, {
401 'uploader': ('upData', 'name'),
402 'uploader_id': ('upData', 'mid', {str_or_none}),
403 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
404 'tags': ('tags', ..., 'tag_name'),
405 'thumbnail': ('videoData', 'pic', {url_or_none}),
406 }),
407 **festival_info,
408 **traverse_obj(video_data, {
409 'description': 'desc',
410 'timestamp': ('pubdate', {int_or_none}),
411 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
412 'comment_count': ('stat', 'reply', {int_or_none}),
413 }, get_all=False),
ad974876
L
414 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
415 'formats': self.extract_formats(play_info),
416 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 417 'title': title,
c90c5b9b 418 'duration': float_or_none(play_info.get('timelength'), scale=1000),
419 'chapters': self._get_chapters(aid, cid),
8a83baaf 420 'subtitles': self.extract_subtitles(video_id, aid, cid),
c90c5b9b 421 '__post_extractor': self.extract_comments(aid),
422 'http_headers': {'Referer': url},
06167fbb 423 }
277d6ff5 424
06167fbb 425
ad974876 426class BiliBiliBangumiIE(BilibiliBaseIE):
bdd0b75e 427 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
e88d44c6 428
ad974876 429 _TESTS = [{
bdd0b75e 430 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
ad974876 431 'info_dict': {
bdd0b75e 432 'id': '267851',
ad974876 433 'ext': 'mp4',
bdd0b75e
GS
434 'series': '鬼灭之刃',
435 'series_id': '4358',
436 'season': '鬼灭之刃',
437 'season_id': '26801',
ad974876 438 'season_number': 1,
bdd0b75e
GS
439 'episode': '残酷',
440 'episode_id': '267851',
441 'episode_number': 1,
442 'title': '1 残酷',
443 'duration': 1425.256,
444 'timestamp': 1554566400,
445 'upload_date': '20190406',
446 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
ad974876 447 },
bdd0b75e 448 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
ad974876 449 }]
06167fbb 450
ad974876
L
451 def _real_extract(self, url):
452 video_id = self._match_id(url)
bdd0b75e 453 episode_id = video_id[2:]
ad974876 454 webpage = self._download_webpage(url, video_id)
e88d44c6 455
ad974876
L
456 if '您所在的地区无法观看本片' in webpage:
457 raise GeoRestrictedError('This video is restricted')
bdd0b75e 458 elif '正在观看预览,大会员免费看全片' in webpage:
ad974876 459 self.raise_login_required('This video is for premium members only')
6461f2b7 460
bdd0b75e
GS
461 headers = {'Referer': url, **self.geo_verification_headers()}
462 play_info = self._download_json(
463 'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id,
464 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
465 headers=headers)
466 premium_only = play_info.get('code') == -10403
467 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
468
ad974876 469 formats = self.extract_formats(play_info)
bdd0b75e 470 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
ad974876 471 self.raise_login_required('This video is for premium members only')
bd8f48c7 472
bdd0b75e
GS
473 bangumi_info = self._download_json(
474 'https://api.bilibili.com/pgc/view/web/season', video_id, 'Get episode details',
475 query={'ep_id': episode_id}, headers=headers)['result']
476
477 episode_number, episode_info = next((
478 (idx, ep) for idx, ep in enumerate(traverse_obj(
479 bangumi_info, ('episodes', ..., {dict})), 1)
480 if str_or_none(ep.get('id')) == episode_id), (1, {}))
c90c5b9b 481
bdd0b75e 482 season_id = bangumi_info.get('season_id')
c90c5b9b 483 season_number = season_id and next((
484 idx + 1 for idx, e in enumerate(
bdd0b75e 485 traverse_obj(bangumi_info, ('seasons', ...)))
c90c5b9b 486 if e.get('season_id') == season_id
487 ), None)
06167fbb 488
bdd0b75e
GS
489 aid = episode_info.get('aid')
490
e88d44c6 491 return {
ad974876
L
492 'id': video_id,
493 'formats': formats,
bdd0b75e
GS
494 **traverse_obj(bangumi_info, {
495 'series': ('series', 'series_title', {str}),
496 'series_id': ('series', 'series_id', {str_or_none}),
497 'thumbnail': ('square_cover', {url_or_none}),
498 }),
499 'title': join_nonempty('title', 'long_title', delim=' ', from_dict=episode_info),
500 'episode': episode_info.get('long_title'),
501 'episode_id': episode_id,
502 'episode_number': int_or_none(episode_info.get('title')) or episode_number,
503 'season_id': str_or_none(season_id),
c90c5b9b 504 'season_number': season_number,
bdd0b75e 505 'timestamp': int_or_none(episode_info.get('pub_time')),
c90c5b9b 506 'duration': float_or_none(play_info.get('timelength'), scale=1000),
bdd0b75e
GS
507 'subtitles': self.extract_subtitles(video_id, aid, episode_info.get('cid')),
508 '__post_extractor': self.extract_comments(aid),
509 'http_headers': headers,
e88d44c6 510 }
bd8f48c7 511
bd8f48c7 512
bdd0b75e 513class BiliBiliBangumiMediaIE(BilibiliBaseIE):
9e68747f 514 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 515 _TESTS = [{
ad974876 516 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 517 'info_dict': {
ad974876 518 'id': '24097891',
bd8f48c7 519 },
ad974876 520 'playlist_mincount': 25,
bd8f48c7
YCH
521 }]
522
bd8f48c7 523 def _real_extract(self, url):
ad974876
L
524 media_id = self._match_id(url)
525 webpage = self._download_webpage(url, media_id)
bdd0b75e
GS
526 ss_id = self._search_json(
527 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)['mediaInfo']['season_id']
528
529 return self.playlist_result(self._get_episodes_from_season(ss_id, url), media_id)
530
bd8f48c7 531
bdd0b75e 532class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
9e68747f 533 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
bdd0b75e
GS
534 _TESTS = [{
535 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
536 'info_dict': {
537 'id': '26801'
538 },
539 'playlist_mincount': 26
540 }]
541
542 def _real_extract(self, url):
543 ss_id = self._match_id(url)
bd8f48c7 544
bdd0b75e 545 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id)
4bc15a68
RA
546
547
2b9d0216
L
548class BilibiliSpaceBaseIE(InfoExtractor):
549 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 550 first_page = fetch_page(0)
2b9d0216
L
551 metadata = get_metadata(first_page)
552
553 paged_list = InAdvancePagedList(
12f153a8 554 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
555 metadata['page_count'], metadata['page_size'])
556
557 return metadata, paged_list
558
559
560class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
561 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 562 _TESTS = [{
563 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
564 'info_dict': {
565 'id': '3985676',
566 },
567 'playlist_mincount': 178,
6f10cdcf
E
568 }, {
569 'url': 'https://space.bilibili.com/313580179/video',
570 'info_dict': {
571 'id': '313580179',
572 },
573 'playlist_mincount': 92,
6efb0711 574 }]
575
6f10cdcf
E
576 def _extract_signature(self, playlist_id):
577 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
578
579 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
580 img_key = traverse_obj(
581 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
582 sub_key = traverse_obj(
583 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
584
585 session_key = img_key + sub_key
586
587 signature_values = []
588 for position in (
589 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
590 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
591 57, 62, 11, 36, 20, 34, 44, 52
592 ):
593 char_at_position = try_call(lambda: session_key[position])
594 if char_at_position:
595 signature_values.append(char_at_position)
596
597 return ''.join(signature_values)[:32]
598
2b9d0216
L
599 def _real_extract(self, url):
600 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
601 if not is_video_url:
602 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
603 'To download audios, add a "/audio" to the URL')
604
6f10cdcf
E
605 signature = self._extract_signature(playlist_id)
606
2b9d0216 607 def fetch_page(page_idx):
6f10cdcf
E
608 query = {
609 'keyword': '',
610 'mid': playlist_id,
611 'order': 'pubdate',
612 'order_avoided': 'true',
613 'platform': 'web',
614 'pn': page_idx + 1,
615 'ps': 30,
616 'tid': 0,
617 'web_location': 1550101,
618 'wts': int(time.time()),
619 }
620 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
621
12f153a8 622 try:
6f10cdcf
E
623 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
624 playlist_id, note=f'Downloading page {page_idx}', query=query)
12f153a8 625 except ExtractorError as e:
3d2623a8 626 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
12f153a8
L
627 raise ExtractorError(
628 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
629 raise
630 if response['code'] == -401:
631 raise ExtractorError(
632 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
633 return response['data']
2b9d0216
L
634
635 def get_metadata(page_data):
636 page_size = page_data['page']['ps']
637 entry_count = page_data['page']['count']
638 return {
639 'page_count': math.ceil(entry_count / page_size),
640 'page_size': page_size,
641 }
6efb0711 642
2b9d0216
L
643 def get_entries(page_data):
644 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
645 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 646
2b9d0216
L
647 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
648 return self.playlist_result(paged_list, playlist_id)
6efb0711 649
6efb0711 650
2b9d0216
L
651class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
652 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
653 _TESTS = [{
6f10cdcf 654 'url': 'https://space.bilibili.com/313580179/audio',
2b9d0216 655 'info_dict': {
6f10cdcf 656 'id': '313580179',
2b9d0216
L
657 },
658 'playlist_mincount': 1,
659 }]
660
661 def _real_extract(self, url):
662 playlist_id = self._match_id(url)
663
664 def fetch_page(page_idx):
665 return self._download_json(
666 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
667 note=f'Downloading page {page_idx}',
12f153a8 668 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
669
670 def get_metadata(page_data):
671 return {
672 'page_count': page_data['pageCount'],
673 'page_size': page_data['pageSize'],
674 }
675
676 def get_entries(page_data):
677 for entry in page_data.get('data', []):
678 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
679
680 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
681 return self.playlist_result(paged_list, playlist_id)
682
683
9e68747f 684class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
685 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
686 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
687 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
688
689 def _get_uploader(self, uid, playlist_id):
690 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
691 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
692
693 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
694 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
695 metadata.pop('page_count', None)
696 metadata.pop('page_size', None)
697 return metadata, page_list
698
699
700class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
701 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
2b9d0216
L
702 _TESTS = [{
703 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
704 'info_dict': {
705 'id': '2142762_57445',
9e68747f 706 'title': '【完结】《底特律 变人》全结局流程解说',
707 'description': '',
708 'uploader': '老戴在此',
709 'uploader_id': '2142762',
710 'timestamp': int,
711 'upload_date': str,
712 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
2b9d0216
L
713 },
714 'playlist_mincount': 31,
715 }]
06167fbb 716
717 def _real_extract(self, url):
2b9d0216
L
718 mid, sid = self._match_valid_url(url).group('mid', 'sid')
719 playlist_id = f'{mid}_{sid}'
720
721 def fetch_page(page_idx):
722 return self._download_json(
723 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
724 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 725 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
726
727 def get_metadata(page_data):
728 page_size = page_data['page']['page_size']
729 entry_count = page_data['page']['total']
730 return {
731 'page_count': math.ceil(entry_count / page_size),
732 'page_size': page_size,
9e68747f 733 'uploader': self._get_uploader(mid, playlist_id),
734 **traverse_obj(page_data, {
735 'title': ('meta', 'name', {str}),
736 'description': ('meta', 'description', {str}),
737 'uploader_id': ('meta', 'mid', {str_or_none}),
738 'timestamp': ('meta', 'ptime', {int_or_none}),
739 'thumbnail': ('meta', 'cover', {url_or_none}),
740 })
2b9d0216
L
741 }
742
743 def get_entries(page_data):
9e68747f 744 return self._get_entries(page_data, 'archives')
2b9d0216
L
745
746 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
9e68747f 747 return self.playlist_result(paged_list, playlist_id, **metadata)
748
749
750class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
751 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
752 _TESTS = [{
753 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
754 'info_dict': {
755 'id': '1958703906_547718',
756 'title': '直播回放',
757 'description': '直播回放',
758 'uploader': '靡烟miya',
759 'uploader_id': '1958703906',
760 'timestamp': 1637985853,
761 'upload_date': '20211127',
762 'modified_timestamp': int,
763 'modified_date': str,
764 },
765 'playlist_mincount': 513,
766 }]
767
768 def _real_extract(self, url):
769 mid, sid = self._match_valid_url(url).group('mid', 'sid')
770 playlist_id = f'{mid}_{sid}'
771 playlist_meta = traverse_obj(self._download_json(
772 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
773 ), {
774 'title': ('data', 'meta', 'name', {str}),
775 'description': ('data', 'meta', 'description', {str}),
776 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
777 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
778 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
779 })
780
781 def fetch_page(page_idx):
782 return self._download_json(
783 'https://api.bilibili.com/x/series/archives',
784 playlist_id, note=f'Downloading page {page_idx}',
785 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
786
787 def get_metadata(page_data):
788 page_size = page_data['page']['size']
789 entry_count = page_data['page']['total']
790 return {
791 'page_count': math.ceil(entry_count / page_size),
792 'page_size': page_size,
793 'uploader': self._get_uploader(mid, playlist_id),
794 **playlist_meta
795 }
796
797 def get_entries(page_data):
798 return self._get_entries(page_data, 'archives')
799
800 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
801 return self.playlist_result(paged_list, playlist_id, **metadata)
802
803
804class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
805 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
806 _TESTS = [{
807 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
808 'info_dict': {
809 'id': '1103407912',
810 'title': '【V2】(旧)',
811 'description': '',
812 'uploader': '晓月春日',
813 'uploader_id': '84912',
814 'timestamp': 1604905176,
815 'upload_date': '20201109',
816 'modified_timestamp': int,
817 'modified_date': str,
818 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
819 'view_count': int,
820 'like_count': int,
821 },
822 'playlist_mincount': 22,
823 }, {
824 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
825 'only_matching': True,
826 }]
827
828 def _real_extract(self, url):
829 fid = self._match_id(url)
830
831 list_info = self._download_json(
832 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
833 fid, note='Downloading favlist metadata')
834 if list_info['code'] == -403:
835 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
836
837 entries = self._get_entries(self._download_json(
838 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
839 fid, note='Download favlist entries'), 'data')
840
841 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
842 'title': ('title', {str}),
843 'description': ('intro', {str}),
844 'uploader': ('upper', 'name', {str}),
845 'uploader_id': ('upper', 'mid', {str_or_none}),
846 'timestamp': ('ctime', {int_or_none}),
847 'modified_timestamp': ('mtime', {int_or_none}),
848 'thumbnail': ('cover', {url_or_none}),
849 'view_count': ('cnt_info', 'play', {int_or_none}),
850 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
851 })))
852
853
854class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
855 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
856 _TESTS = [{
857 'url': 'https://www.bilibili.com/watchlater/#/list',
858 'info_dict': {'id': 'watchlater'},
859 'playlist_mincount': 0,
860 'skip': 'login required',
861 }]
862
863 def _real_extract(self, url):
864 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
865 watchlater_info = self._download_json(
866 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
867 if watchlater_info['code'] == -101:
868 self.raise_login_required(msg='You need to login to access your watchlater list')
869 entries = self._get_entries(watchlater_info, ('data', 'list'))
870 return self.playlist_result(entries, id=list_id, title='稍后再看')
871
872
873class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
874 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
875 _TESTS = [{
876 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
877 'info_dict': {
878 'id': '5_547718',
879 'title': '直播回放',
880 'uploader': '靡烟miya',
881 'uploader_id': '1958703906',
882 'timestamp': 1637985853,
883 'upload_date': '20211127',
884 },
885 'playlist_mincount': 513,
886 }, {
887 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
888 'info_dict': {
889 'id': '5_547718',
890 },
891 'playlist_mincount': 513,
892 'skip': 'redirect url',
893 }, {
894 'url': 'https://www.bilibili.com/list/ml1103407912',
895 'info_dict': {
896 'id': '3_1103407912',
897 'title': '【V2】(旧)',
898 'uploader': '晓月春日',
899 'uploader_id': '84912',
900 'timestamp': 1604905176,
901 'upload_date': '20201109',
902 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
903 },
904 'playlist_mincount': 22,
905 }, {
906 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
907 'info_dict': {
908 'id': '3_1103407912',
909 },
910 'playlist_mincount': 22,
911 'skip': 'redirect url',
912 }, {
913 'url': 'https://www.bilibili.com/list/watchlater',
914 'info_dict': {'id': 'watchlater'},
915 'playlist_mincount': 0,
916 'skip': 'login required',
917 }, {
918 'url': 'https://www.bilibili.com/medialist/play/watchlater',
919 'info_dict': {'id': 'watchlater'},
920 'playlist_mincount': 0,
921 'skip': 'login required',
922 }]
923
924 def _extract_medialist(self, query, list_id):
925 for page_num in itertools.count(1):
926 page_data = self._download_json(
927 'https://api.bilibili.com/x/v2/medialist/resource/list',
928 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
929 )['data']
930 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
931 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
932 if not page_data.get('has_more', False):
933 break
934
935 def _real_extract(self, url):
936 list_id = self._match_id(url)
937 webpage = self._download_webpage(url, list_id)
938 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
939 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
940 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
941 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
942 if error_code == -400 and list_id == 'watchlater':
943 self.raise_login_required('You need to login to access your watchlater playlist')
944 elif error_code == -403:
945 self.raise_login_required('This is a private playlist. You need to login as its owner')
946 elif error_code == 11010:
947 raise ExtractorError('Playlist is no longer available', expected=True)
948 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
949
950 query = {
951 'ps': 20,
952 'with_current': False,
953 **traverse_obj(initial_state, {
954 'type': ('playlist', 'type', {int_or_none}),
955 'biz_id': ('playlist', 'id', {int_or_none}),
956 'tid': ('tid', {int_or_none}),
957 'sort_field': ('sortFiled', {int_or_none}),
958 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
959 })
960 }
961 metadata = {
962 'id': f'{query["type"]}_{query["biz_id"]}',
963 **traverse_obj(initial_state, ('mediaListInfo', {
964 'title': ('title', {str}),
965 'uploader': ('upper', 'name', {str}),
966 'uploader_id': ('upper', 'mid', {str_or_none}),
967 'timestamp': ('ctime', {int_or_none}),
968 'thumbnail': ('cover', {url_or_none}),
969 })),
970 }
971 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
06167fbb 972
973
c34f505b 974class BilibiliCategoryIE(InfoExtractor):
975 IE_NAME = 'Bilibili category extractor'
976 _MAX_RESULTS = 1000000
9e68747f 977 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
c34f505b 978 _TESTS = [{
979 'url': 'https://www.bilibili.com/v/kichiku/mad',
980 'info_dict': {
981 'id': 'kichiku: mad',
982 'title': 'kichiku: mad'
983 },
984 'playlist_mincount': 45,
985 'params': {
986 'playlistend': 45
987 }
988 }]
989
990 def _fetch_page(self, api_url, num_pages, query, page_num):
991 parsed_json = self._download_json(
992 api_url, query, query={'Search_key': query, 'pn': page_num},
993 note='Extracting results from page %s of %s' % (page_num, num_pages))
994
f8580bf0 995 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 996 if not video_list:
997 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
998
999 for video in video_list:
1000 yield self.url_result(
1001 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1002
1003 def _entries(self, category, subcategory, query):
1004 # map of categories : subcategories : RIDs
1005 rid_map = {
1006 'kichiku': {
1007 'mad': 26,
1008 'manual_vocaloid': 126,
1009 'guide': 22,
1010 'theatre': 216,
1011 'course': 127
1012 },
1013 }
1014
1015 if category not in rid_map:
e88d44c6 1016 raise ExtractorError(
1017 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 1018 if subcategory not in rid_map[category]:
e88d44c6 1019 raise ExtractorError(
1020 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 1021 rid_value = rid_map[category][subcategory]
1022
1023 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1024 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 1025 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 1026 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1027 if count is None or not size:
1028 raise ExtractorError('Failed to calculate either page count or size')
1029
1030 num_pages = math.ceil(count / size)
1031
1032 return OnDemandPagedList(functools.partial(
1033 self._fetch_page, api_url, num_pages, query), size)
1034
1035 def _real_extract(self, url):
ad974876 1036 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 1037 query = '%s: %s' % (category, subcategory)
1038
1039 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1040
1041
06167fbb 1042class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 1043 IE_DESC = 'Bilibili video search'
06167fbb 1044 _MAX_RESULTS = 100000
1045 _SEARCH_KEY = 'bilisearch'
06167fbb 1046
e88d44c6 1047 def _search_results(self, query):
1048 for page_num in itertools.count(1):
1049 videos = self._download_json(
1050 'https://api.bilibili.com/x/web-interface/search/type', query,
1051 note=f'Extracting results from page {page_num}', query={
1052 'Search_key': query,
1053 'keyword': query,
1054 'page': page_num,
1055 'context': '',
e88d44c6 1056 'duration': 0,
1057 'tids_2': '',
1058 '__refresh__': 'true',
1059 'search_type': 'video',
1060 'tids': 0,
1061 'highlight': 1,
2d101954 1062 })['data'].get('result')
1063 if not videos:
1064 break
06167fbb 1065 for video in videos:
e88d44c6 1066 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 1067
1068
4bc15a68
RA
1069class BilibiliAudioBaseIE(InfoExtractor):
1070 def _call_api(self, path, sid, query=None):
1071 if not query:
1072 query = {'sid': sid}
1073 return self._download_json(
1074 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1075 sid, query=query)['data']
1076
1077
1078class BilibiliAudioIE(BilibiliAudioBaseIE):
1079 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1080 _TEST = {
1081 'url': 'https://www.bilibili.com/audio/au1003142',
1082 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1083 'info_dict': {
1084 'id': '1003142',
1085 'ext': 'm4a',
1086 'title': '【tsukimi】YELLOW / 神山羊',
1087 'artist': 'tsukimi',
1088 'comment_count': int,
1089 'description': 'YELLOW的mp3版!',
1090 'duration': 183,
1091 'subtitles': {
1092 'origin': [{
1093 'ext': 'lrc',
1094 }],
1095 },
1096 'thumbnail': r're:^https?://.+\.jpg',
1097 'timestamp': 1564836614,
1098 'upload_date': '20190803',
1099 'uploader': 'tsukimi-つきみぐー',
1100 'view_count': int,
1101 },
1102 }
1103
1104 def _real_extract(self, url):
1105 au_id = self._match_id(url)
1106
1107 play_data = self._call_api('url', au_id)
1108 formats = [{
1109 'url': play_data['cdns'][0],
1110 'filesize': int_or_none(play_data.get('size')),
f0884c8b 1111 'vcodec': 'none'
4bc15a68
RA
1112 }]
1113
6d1b3489 1114 for a_format in formats:
1115 a_format.setdefault('http_headers', {}).update({
1116 'Referer': url,
1117 })
1118
4bc15a68
RA
1119 song = self._call_api('song/info', au_id)
1120 title = song['title']
1121 statistic = song.get('statistic') or {}
1122
1123 subtitles = None
1124 lyric = song.get('lyric')
1125 if lyric:
1126 subtitles = {
1127 'origin': [{
1128 'url': lyric,
1129 }]
1130 }
1131
1132 return {
1133 'id': au_id,
1134 'title': title,
1135 'formats': formats,
1136 'artist': song.get('author'),
1137 'comment_count': int_or_none(statistic.get('comment')),
1138 'description': song.get('intro'),
1139 'duration': int_or_none(song.get('duration')),
1140 'subtitles': subtitles,
1141 'thumbnail': song.get('cover'),
1142 'timestamp': int_or_none(song.get('passtime')),
1143 'uploader': song.get('uname'),
1144 'view_count': int_or_none(statistic.get('play')),
1145 }
1146
1147
1148class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1149 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1150 _TEST = {
1151 'url': 'https://www.bilibili.com/audio/am10624',
1152 'info_dict': {
1153 'id': '10624',
1154 'title': '每日新曲推荐(每日11:00更新)',
1155 'description': '每天11:00更新,为你推送最新音乐',
1156 },
1157 'playlist_count': 19,
1158 }
1159
1160 def _real_extract(self, url):
1161 am_id = self._match_id(url)
1162
1163 songs = self._call_api(
1164 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1165
1166 entries = []
1167 for song in songs:
1168 sid = str_or_none(song.get('id'))
1169 if not sid:
1170 continue
1171 entries.append(self.url_result(
1172 'https://www.bilibili.com/audio/au' + sid,
1173 BilibiliAudioIE.ie_key(), sid))
1174
1175 if entries:
1176 album_data = self._call_api('menu/info', am_id) or {}
1177 album_title = album_data.get('title')
1178 if album_title:
1179 for entry in entries:
1180 entry['album'] = album_title
1181 return self.playlist_result(
1182 entries, am_id, album_title, album_data.get('intro'))
1183
1184 return self.playlist_result(entries, am_id)
63dce309
S
1185
1186
1187class BiliBiliPlayerIE(InfoExtractor):
1188 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1189 _TEST = {
1190 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1191 'only_matching': True,
1192 }
1193
1194 def _real_extract(self, url):
1195 video_id = self._match_id(url)
1196 return self.url_result(
1197 'http://www.bilibili.tv/video/av%s/' % video_id,
1198 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
1199
1200
1201class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 1202 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 1203 _NETRC_MACHINE = 'biliintl'
16f7e6be 1204
c62ecf0d 1205 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
1206 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1207 if json.get('code'):
1208 if json['code'] in (10004004, 10004005, 10023006):
1209 self.raise_login_required()
1210 elif json['code'] == 10004001:
1211 self.raise_geo_restricted()
1212 else:
1213 if json.get('message') and str(json['code']) != json['message']:
1214 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1215 else:
1216 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1217 if kwargs.get('fatal'):
1218 raise ExtractorError(errmsg)
1219 else:
1220 self.report_warning(errmsg)
1221 return json.get('data')
16f7e6be 1222
efc947fb 1223 def json2srt(self, json):
1224 data = '\n\n'.join(
1225 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 1226 for i, line in enumerate(traverse_obj(json, (
1227 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 1228 return data
1229
f5f15c99
LR
1230 def _get_subtitles(self, *, ep_id=None, aid=None):
1231 sub_json = self._call_api(
fbb888a3 1232 '/web/v2/subtitle', ep_id or aid, fatal=False,
1233 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1234 query=filter_dict({
f5f15c99 1235 'platform': 'web',
fbb888a3 1236 's_locale': 'en_US',
f5f15c99
LR
1237 'episode_id': ep_id,
1238 'aid': aid,
fbb888a3 1239 })) or {}
16f7e6be 1240 subtitles = {}
c62ecf0d 1241 for sub in sub_json.get('subtitles') or []:
16f7e6be
AG
1242 sub_url = sub.get('url')
1243 if not sub_url:
1244 continue
c62ecf0d 1245 sub_data = self._download_json(
f5f15c99 1246 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
c62ecf0d 1247 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
efc947fb 1248 if not sub_data:
1249 continue
c62ecf0d 1250 subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
efc947fb 1251 'ext': 'srt',
1252 'data': self.json2srt(sub_data)
16f7e6be
AG
1253 })
1254 return subtitles
1255
f5f15c99
LR
1256 def _get_formats(self, *, ep_id=None, aid=None):
1257 video_json = self._call_api(
1258 '/web/playurl', ep_id or aid, note='Downloading video formats',
1259 errnote='Unable to download video formats', query=filter_dict({
1260 'platform': 'web',
1261 'ep_id': ep_id,
1262 'aid': aid,
1263 }))
16f7e6be
AG
1264 video_json = video_json['playurl']
1265 formats = []
c62ecf0d 1266 for vid in video_json.get('video') or []:
16f7e6be
AG
1267 video_res = vid.get('video_resource') or {}
1268 video_info = vid.get('stream_info') or {}
1269 if not video_res.get('url'):
1270 continue
1271 formats.append({
1272 'url': video_res['url'],
1273 'ext': 'mp4',
1274 'format_note': video_info.get('desc_words'),
1275 'width': video_res.get('width'),
1276 'height': video_res.get('height'),
1277 'vbr': video_res.get('bandwidth'),
1278 'acodec': 'none',
1279 'vcodec': video_res.get('codecs'),
1280 'filesize': video_res.get('size'),
1281 })
c62ecf0d 1282 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
1283 if not aud.get('url'):
1284 continue
1285 formats.append({
1286 'url': aud['url'],
1287 'ext': 'mp4',
1288 'abr': aud.get('bandwidth'),
1289 'acodec': aud.get('codecs'),
1290 'vcodec': 'none',
1291 'filesize': aud.get('size'),
1292 })
1293
16f7e6be
AG
1294 return formats
1295
26fdfc37 1296 def _parse_video_metadata(self, video_data):
16f7e6be 1297 return {
f5f15c99
LR
1298 'title': video_data.get('title_display') or video_data.get('title'),
1299 'thumbnail': video_data.get('cover'),
c62ecf0d 1300 'episode_number': int_or_none(self._search_regex(
f5f15c99 1301 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
1302 }
1303
52efa4b3 1304 def _perform_login(self, username, password):
65f6e807 1305 if not Cryptodome.RSA:
f6a765ce 1306 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
1307
1308 key_data = self._download_json(
1309 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1310 note='Downloading login key', errnote='Unable to download login key')['data']
1311
65f6e807 1312 public_key = Cryptodome.RSA.importKey(key_data['key'])
1313 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
1314 login_post = self._download_json(
1315 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1316 'username': username,
1317 'password': base64.b64encode(password_hash).decode('ascii'),
1318 'keep_me': 'true',
1319 's_locale': 'en_US',
1320 'isTrusted': 'true'
1321 }), note='Logging in', errnote='Unable to log in')
1322 if login_post.get('code'):
1323 if login_post.get('message'):
1324 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1325 else:
1326 raise ExtractorError('Unable to log in')
1327
16f7e6be
AG
1328
1329class BiliIntlIE(BiliIntlBaseIE):
0831d95c 1330 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 1331 _TESTS = [{
cfcf60ea 1332 # Bstation page
16f7e6be
AG
1333 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1334 'info_dict': {
1335 'id': '341736',
1336 'ext': 'mp4',
c62ecf0d
M
1337 'title': 'E2 - The First Night',
1338 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 1339 'episode_number': 2,
d37422f1
H
1340 'upload_date': '20201009',
1341 'episode': 'Episode 2',
1342 'timestamp': 1602259500,
1343 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
1344 'chapters': [{
1345 'start_time': 0,
1346 'end_time': 76.242,
1347 'title': '<Untitled Chapter 1>'
1348 }, {
1349 'start_time': 76.242,
1350 'end_time': 161.161,
1351 'title': 'Intro'
1352 }, {
1353 'start_time': 1325.742,
1354 'end_time': 1403.903,
1355 'title': 'Outro'
1356 }],
c62ecf0d 1357 }
16f7e6be 1358 }, {
cfcf60ea 1359 # Non-Bstation page
c62ecf0d 1360 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 1361 'info_dict': {
c62ecf0d 1362 'id': '11005006',
16f7e6be 1363 'ext': 'mp4',
c62ecf0d
M
1364 'title': 'E3 - Who?',
1365 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1366 'episode_number': 3,
d37422f1
H
1367 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1368 'episode': 'Episode 3',
1369 'upload_date': '20211219',
1370 'timestamp': 1639928700,
0ba87dd2
H
1371 'chapters': [{
1372 'start_time': 0,
1373 'end_time': 88.0,
1374 'title': '<Untitled Chapter 1>'
1375 }, {
1376 'start_time': 88.0,
1377 'end_time': 156.0,
1378 'title': 'Intro'
1379 }, {
1380 'start_time': 1173.0,
1381 'end_time': 1259.535,
1382 'title': 'Outro'
1383 }],
c62ecf0d 1384 }
cfcf60ea
M
1385 }, {
1386 # Subtitle with empty content
1387 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1388 'info_dict': {
1389 'id': '10131790',
1390 'ext': 'mp4',
1391 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1392 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1393 'episode_number': 140,
1394 },
1395 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
d37422f1
H
1396 }, {
1397 'url': 'https://www.bilibili.tv/en/video/2041863208',
1398 'info_dict': {
1399 'id': '2041863208',
1400 'ext': 'mp4',
1401 'timestamp': 1670874843,
1402 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1403 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1404 'upload_date': '20221212',
1405 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
b093c38c
H
1406 },
1407 }, {
1408 # episode comment extraction
1409 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1410 'info_dict': {
1411 'id': '340317',
1412 'ext': 'mp4',
1413 'timestamp': 1604057820,
1414 'upload_date': '20201030',
1415 'episode_number': 5,
1416 'title': 'E5 - My Own Steel',
1417 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1418 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1419 'episode': 'Episode 5',
1420 'comment_count': int,
1421 'chapters': [{
1422 'start_time': 0,
1423 'end_time': 61.0,
1424 'title': '<Untitled Chapter 1>'
1425 }, {
1426 'start_time': 61.0,
1427 'end_time': 134.0,
1428 'title': 'Intro'
1429 }, {
1430 'start_time': 1290.0,
1431 'end_time': 1379.0,
1432 'title': 'Outro'
1433 }],
1434 },
1435 'params': {
1436 'getcomments': True
1437 }
1438 }, {
1439 # user generated content comment extraction
1440 'url': 'https://www.bilibili.tv/en/video/2045730385',
1441 'info_dict': {
1442 'id': '2045730385',
1443 'ext': 'mp4',
1444 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1445 'timestamp': 1667891924,
1446 'upload_date': '20221108',
1447 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1448 'comment_count': int,
1449 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1450 },
1451 'params': {
1452 'getcomments': True
d37422f1 1453 }
0ba87dd2
H
1454 }, {
1455 # episode id without intro and outro
1456 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1457 'info_dict': {
1458 'id': '11246489',
1459 'ext': 'mp4',
1460 'title': 'E1 - Operation \'Strix\' <Owl>',
1461 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1462 'timestamp': 1649516400,
1463 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1464 'episode': 'Episode 1',
1465 'episode_number': 1,
1466 'upload_date': '20220409',
1467 },
c62ecf0d
M
1468 }, {
1469 'url': 'https://www.biliintl.com/en/play/34613/341736',
1470 'only_matching': True,
f5f15c99
LR
1471 }, {
1472 # User-generated content (as opposed to a series licensed from a studio)
1473 'url': 'https://bilibili.tv/en/video/2019955076',
1474 'only_matching': True,
1475 }, {
1476 # No language in URL
1477 'url': 'https://www.bilibili.tv/video/2019955076',
1478 'only_matching': True,
0831d95c 1479 }, {
1480 # Uppercase language in URL
1481 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1482 'only_matching': True,
16f7e6be
AG
1483 }]
1484
26fdfc37 1485 def _make_url(video_id, series_id=None):
1486 if series_id:
1487 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1488 return f'https://www.bilibili.tv/en/video/{video_id}'
1489
1490 def _extract_video_metadata(self, url, video_id, season_id):
1491 url, smuggled_data = unsmuggle_url(url, {})
1492 if smuggled_data.get('title'):
1493 return smuggled_data
1494
c62ecf0d
M
1495 webpage = self._download_webpage(url, video_id)
1496 # Bstation layout
8072ef2b 1497 initial_data = (
1498 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1499 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1500 video_data = traverse_obj(
d37422f1 1501 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1502
f5f15c99 1503 if season_id and not video_data:
c62ecf0d
M
1504 # Non-Bstation layout, read through episode list
1505 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1506 video_data = traverse_obj(season_json, (
1507 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1508 ), expected_type=dict, get_all=False)
1509
d37422f1
H
1510 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1511 return merge_dicts(
b093c38c 1512 self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
d37422f1
H
1513 'title': self._html_search_meta('og:title', webpage),
1514 'description': self._html_search_meta('og:description', webpage)
1515 })
26fdfc37 1516
b093c38c
H
1517 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1518 comment_api_raw_data = self._download_json(
1519 'https://api.bilibili.tv/reply/web/detail', display_id,
1520 note=f'Downloading reply comment of {root_id} - {next_id}',
1521 query={
1522 'platform': 'web',
1523 'ps': 20, # comment's reply per page (default: 3)
1524 'root': root_id,
1525 'next': next_id,
1526 })
1527
1528 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1529 yield {
1530 'author': traverse_obj(replies, ('member', 'name')),
1531 'author_id': traverse_obj(replies, ('member', 'mid')),
1532 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1533 'text': traverse_obj(replies, ('content', 'message')),
1534 'id': replies.get('rpid'),
1535 'like_count': int_or_none(replies.get('like_count')),
1536 'parent': replies.get('parent'),
1537 'timestamp': unified_timestamp(replies.get('ctime_text'))
1538 }
1539
1540 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1541 yield from self._get_comments_reply(
1542 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1543
1544 def _get_comments(self, video_id, ep_id):
1545 for i in itertools.count(0):
1546 comment_api_raw_data = self._download_json(
1547 'https://api.bilibili.tv/reply/web/root', video_id,
1548 note=f'Downloading comment page {i + 1}',
1549 query={
1550 'platform': 'web',
1551 'pn': i, # page number
1552 'ps': 20, # comment per page (default: 20)
1553 'oid': video_id,
1554 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
1555 'sort_type': 1, # 1: best, 2: recent
1556 })
1557
1558 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1559 yield {
1560 'author': traverse_obj(replies, ('member', 'name')),
1561 'author_id': traverse_obj(replies, ('member', 'mid')),
1562 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1563 'text': traverse_obj(replies, ('content', 'message')),
1564 'id': replies.get('rpid'),
1565 'like_count': int_or_none(replies.get('like_count')),
1566 'timestamp': unified_timestamp(replies.get('ctime_text')),
1567 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1568 }
1569 if replies.get('count'):
1570 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1571
1572 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1573 break
1574
26fdfc37 1575 def _real_extract(self, url):
1576 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1577 video_id = ep_id or aid
0ba87dd2
H
1578 chapters = None
1579
1580 if ep_id:
1581 intro_ending_json = self._call_api(
1582 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1583 video_id, fatal=False) or {}
1584 if intro_ending_json.get('skip'):
1585 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1586 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1587 chapters = [{
1588 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1589 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1590 'title': 'Intro'
1591 }, {
1592 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1593 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1594 'title': 'Outro'
1595 }]
26fdfc37 1596
1597 return {
1598 'id': video_id,
1599 **self._extract_video_metadata(url, video_id, season_id),
1600 'formats': self._get_formats(ep_id=ep_id, aid=aid),
1601 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
b093c38c
H
1602 'chapters': chapters,
1603 '__post_extractor': self.extract_comments(video_id, ep_id)
26fdfc37 1604 }
16f7e6be
AG
1605
1606
1607class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 1608 IE_NAME = 'biliIntl:series'
76c3cecc 1609 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
1610 _TESTS = [{
1611 'url': 'https://www.bilibili.tv/en/play/34613',
1612 'playlist_mincount': 15,
1613 'info_dict': {
1614 'id': '34613',
76c3cecc
H
1615 'title': 'TONIKAWA: Over the Moon For You',
1616 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1617 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
1618 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1619 'view_count': int,
16f7e6be
AG
1620 },
1621 'params': {
1622 'skip_download': True,
16f7e6be 1623 },
76c3cecc
H
1624 }, {
1625 'url': 'https://www.bilibili.tv/en/media/1048837',
1626 'info_dict': {
1627 'id': '1048837',
1628 'title': 'SPY×FAMILY',
1629 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1630 'categories': ['Adventure', 'Action', 'Comedy'],
1631 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1632 'view_count': int,
1633 },
1634 'playlist_mincount': 25,
16f7e6be
AG
1635 }, {
1636 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 1637 'only_matching': True,
0831d95c 1638 }, {
1639 'url': 'https://www.biliintl.com/EN/play/34613',
1640 'only_matching': True,
16f7e6be
AG
1641 }]
1642
c62ecf0d
M
1643 def _entries(self, series_id):
1644 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 1645 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1646 episode_id = str(episode['episode_id'])
1647 yield self.url_result(smuggle_url(
1648 BiliIntlIE._make_url(episode_id, series_id),
1649 self._parse_video_metadata(episode)
1650 ), BiliIntlIE, episode_id)
16f7e6be
AG
1651
1652 def _real_extract(self, url):
c62ecf0d
M
1653 series_id = self._match_id(url)
1654 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1655 return self.playlist_result(
1656 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1657 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1658 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
1659
1660
1661class BiliLiveIE(InfoExtractor):
9e68747f 1662 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
1663
1664 _TESTS = [{
1665 'url': 'https://live.bilibili.com/196',
1666 'info_dict': {
1667 'id': '33989',
1668 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1669 'ext': 'flv',
1670 'title': "太空狼人杀联动,不被爆杀就算赢",
1671 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1672 'timestamp': 1650802769,
1673 },
1674 'skip': 'not live'
1675 }, {
1676 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1677 'only_matching': True
1c226ccd 1678 }, {
1679 'url': 'https://live.bilibili.com/blanc/196',
1680 'only_matching': True
b4f53662
H
1681 }]
1682
1683 _FORMATS = {
1684 80: {'format_id': 'low', 'format_note': '流畅'},
1685 150: {'format_id': 'high_res', 'format_note': '高清'},
1686 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1687 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1688 10000: {'format_id': 'source', 'format_note': '原画'},
1689 20000: {'format_id': '4K', 'format_note': '4K'},
1690 30000: {'format_id': 'dolby', 'format_note': '杜比'},
1691 }
1692
1693 _quality = staticmethod(qualities(list(_FORMATS)))
1694
1695 def _call_api(self, path, room_id, query):
1696 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1697 if api_result.get('code') != 0:
1698 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1699 return api_result.get('data') or {}
1700
1701 def _parse_formats(self, qn, fmt):
1702 for codec in fmt.get('codec') or []:
1703 if codec.get('current_qn') != qn:
1704 continue
1705 for url_info in codec['url_info']:
1706 yield {
1707 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1708 'ext': fmt.get('format_name'),
1709 'vcodec': codec.get('codec_name'),
1710 'quality': self._quality(qn),
1711 **self._FORMATS[qn],
1712 }
1713
1714 def _real_extract(self, url):
1715 room_id = self._match_id(url)
1716 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1717 if room_data.get('live_status') == 0:
1718 raise ExtractorError('Streamer is not live', expected=True)
1719
1720 formats = []
1721 for qn in self._FORMATS.keys():
1722 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1723 'room_id': room_id,
1724 'qn': qn,
1725 'codec': '0,1',
1726 'format': '0,2',
1727 'mask': '0',
1728 'no_playurl': '0',
1729 'platform': 'web',
1730 'protocol': '0,1',
1731 })
1732 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1733 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
1734
1735 return {
1736 'id': room_id,
1737 'title': room_data.get('title'),
1738 'description': room_data.get('description'),
1739 'thumbnail': room_data.get('user_cover'),
1740 'timestamp': stream_data.get('live_time'),
1741 'formats': formats,
ca2f6e14 1742 'is_live': True,
b4f53662
H
1743 'http_headers': {
1744 'Referer': url,
1745 },
1746 }