]> jfr.im git - yt-dlp.git/blame - yt_dlp/extractor/bilibili.py
[dependencies] Simplify `Cryptodome`
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
CommitLineData
cfcf60ea 1import base64
c34f505b 2import functools
ad974876 3import itertools
c34f505b 4import math
ad974876
L
5import urllib.error
6import urllib.parse
28746fbd 7
06167fbb 8from .common import InfoExtractor, SearchInfoExtractor
f6a765ce 9from ..dependencies import Cryptodome
28746fbd 10from ..utils import (
bd8f48c7 11 ExtractorError,
ad974876 12 GeoRestrictedError,
2b9d0216
L
13 InAdvancePagedList,
14 OnDemandPagedList,
f5f15c99 15 filter_dict,
6461f2b7 16 float_or_none,
ad974876 17 format_field,
2b9d0216 18 int_or_none,
ad974876 19 make_archive_id,
d37422f1 20 merge_dicts,
f8580bf0 21 mimetype2ext,
2b9d0216 22 parse_count,
ad974876 23 parse_qs,
b4f53662 24 qualities,
26fdfc37 25 smuggle_url,
efc947fb 26 srt_subtitles_timecode,
4bc15a68 27 str_or_none,
2b9d0216 28 traverse_obj,
26fdfc37 29 unsmuggle_url,
c62ecf0d 30 url_or_none,
ad974876 31 urlencode_postdata,
28746fbd
PH
32)
33
34
ad974876
L
35class BilibiliBaseIE(InfoExtractor):
36 def extract_formats(self, play_info):
37 format_names = {
38 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
39 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
40 }
41
42 audios = traverse_obj(play_info, ('dash', 'audio', ...))
43 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
44 if flac_audio:
45 audios.append(flac_audio)
46 formats = [{
47 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
48 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
49 'acodec': audio.get('codecs'),
50 'vcodec': 'none',
51 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
52 'filesize': int_or_none(audio.get('size'))
53 } for audio in audios]
54
55 formats.extend({
56 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
57 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
58 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
59 'width': int_or_none(video.get('width')),
60 'height': int_or_none(video.get('height')),
61 'vcodec': video.get('codecs'),
62 'acodec': 'none' if audios else None,
63 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
64 'filesize': int_or_none(video.get('size')),
65 'quality': int_or_none(video.get('id')),
66 'format': format_names.get(video.get('id')),
67 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
68
69 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
70 if missing_formats:
71 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
6368e2e6 72 f'you have to login or become premium member to download them. {self._login_hint()}')
ad974876 73
ad974876
L
74 return formats
75
76 def json2srt(self, json_data):
77 srt_data = ''
78 for idx, line in enumerate(json_data.get('body') or []):
79 srt_data += (f'{idx + 1}\n'
80 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
81 f'{line["content"]}\n\n')
82 return srt_data
83
84 def _get_subtitles(self, video_id, initial_state, cid):
85 subtitles = {
86 'danmaku': [{
87 'ext': 'xml',
88 'url': f'https://comment.bilibili.com/{cid}.xml',
89 }]
90 }
91
92 for s in traverse_obj(initial_state, ('videoData', 'subtitle', 'list')) or []:
93 subtitles.setdefault(s['lan'], []).append({
94 'ext': 'srt',
95 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
96 })
97 return subtitles
98
c90c5b9b 99 def _get_chapters(self, aid, cid):
100 chapters = aid and cid and self._download_json(
101 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
102 note='Extracting chapters', fatal=False)
103 return traverse_obj(chapters, ('data', 'view_points', ..., {
104 'title': 'content',
105 'start_time': 'from',
106 'end_time': 'to',
107 })) or None
108
ad974876
L
109 def _get_comments(self, aid):
110 for idx in itertools.count(1):
111 replies = traverse_obj(
112 self._download_json(
113 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
114 aid, note=f'Extracting comments from page {idx}', fatal=False),
115 ('data', 'replies'))
116 if not replies:
117 return
118 for children in map(self._get_all_children, replies):
119 yield from children
120
121 def _get_all_children(self, reply):
122 yield {
123 'author': traverse_obj(reply, ('member', 'uname')),
124 'author_id': traverse_obj(reply, ('member', 'mid')),
125 'id': reply.get('rpid'),
126 'text': traverse_obj(reply, ('content', 'message')),
127 'timestamp': reply.get('ctime'),
128 'parent': reply.get('parent') or 'root',
129 }
130 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
131 yield from children
132
ad974876
L
133
134class BiliBiliIE(BilibiliBaseIE):
135 _VALID_URL = r'https?://www\.bilibili\.com/video/[aAbB][vV](?P<id>[^/?#&]+)'
28746fbd 136
bd8f48c7 137 _TESTS = [{
ad974876
L
138 'url': 'https://www.bilibili.com/video/BV13x41117TL',
139 'info_dict': {
140 'id': 'BV13x41117TL',
141 'title': '阿滴英文|英文歌分享#6 "Closer',
142 'ext': 'mp4',
143 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
144 'uploader_id': '65880958',
145 'uploader': '阿滴英文',
146 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
147 'duration': 554.117,
148 'tags': list,
149 'comment_count': int,
150 'upload_date': '20170301',
151 'timestamp': 1488353834,
152 'like_count': int,
153 'view_count': int,
154 },
155 }, {
156 # old av URL version
06167fbb 157 'url': 'http://www.bilibili.com/video/av1074402/',
28746fbd 158 'info_dict': {
ad974876 159 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
f8580bf0 160 'ext': 'mp4',
f8580bf0 161 'uploader': '菊子桑',
ad974876
L
162 'uploader_id': '156160',
163 'id': 'BV11x411K7CN',
164 'title': '【金坷垃】金泡沫',
165 'duration': 308.36,
f8580bf0 166 'upload_date': '20140420',
ad974876 167 'timestamp': 1397983878,
6461f2b7 168 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
ad974876
L
169 'like_count': int,
170 'comment_count': int,
171 'view_count': int,
172 'tags': list,
173 },
c90c5b9b 174 'params': {'skip_download': True},
bd8f48c7 175 }, {
ad974876
L
176 'note': 'Anthology',
177 'url': 'https://www.bilibili.com/video/BV1bK411W797',
178 'info_dict': {
179 'id': 'BV1bK411W797',
180 'title': '物语中的人物是如何吐槽自己的OP的'
181 },
182 'playlist_count': 18,
183 'playlist': [{
184 'info_dict': {
185 'id': 'BV1bK411W797_p1',
186 'ext': 'mp4',
187 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
188 'tags': 'count:11',
189 'timestamp': 1589601697,
190 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
191 'uploader': '打牌还是打桩',
192 'uploader_id': '150259984',
193 'like_count': int,
194 'comment_count': int,
195 'upload_date': '20200516',
196 'view_count': int,
197 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
198 'duration': 90.314,
199 }
200 }]
06167fbb 201 }, {
ad974876
L
202 'note': 'Specific page of Anthology',
203 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
204 'info_dict': {
205 'id': 'BV1bK411W797_p1',
206 'ext': 'mp4',
207 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
208 'tags': 'count:11',
209 'timestamp': 1589601697,
210 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
211 'uploader': '打牌还是打桩',
212 'uploader_id': '150259984',
213 'like_count': int,
214 'comment_count': int,
215 'upload_date': '20200516',
216 'view_count': int,
217 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
218 'duration': 90.314,
219 }
bd8f48c7 220 }, {
ad974876
L
221 'note': 'video has subtitles',
222 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
bd8f48c7 223 'info_dict': {
ad974876 224 'id': 'BV12N4y1M7rh',
bd8f48c7 225 'ext': 'mp4',
c90c5b9b 226 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
ad974876
L
227 'tags': list,
228 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
229 'duration': 313.557,
230 'upload_date': '20220709',
231 'uploader': '小夫Tech',
232 'timestamp': 1657347907,
233 'uploader_id': '1326814124',
234 'comment_count': int,
235 'view_count': int,
236 'like_count': int,
237 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
238 'subtitles': 'count:2'
bd8f48c7 239 },
ad974876 240 'params': {'listsubtitles': True},
ca270371 241 }, {
ad974876 242 'url': 'https://www.bilibili.com/video/av8903802/',
ca270371 243 'info_dict': {
ad974876 244 'id': 'BV13x41117TL',
f8580bf0 245 'ext': 'mp4',
ca270371 246 'title': '阿滴英文|英文歌分享#6 "Closer',
f8580bf0 247 'upload_date': '20170301',
c90c5b9b 248 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
ad974876 249 'timestamp': 1488353834,
f8580bf0 250 'uploader_id': '65880958',
251 'uploader': '阿滴英文',
ad974876 252 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
89fabf11 253 'duration': 554.117,
ad974876
L
254 'tags': list,
255 'comment_count': int,
256 'view_count': int,
257 'like_count': int,
89fabf11
JN
258 },
259 'params': {
260 'skip_download': True,
261 },
c90c5b9b 262 }, {
263 'note': 'video has chapter',
264 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
265 'info_dict': {
266 'id': 'BV1vL411G7N7',
267 'ext': 'mp4',
268 'title': '如何为你的B站视频添加进度条分段',
269 'timestamp': 1634554558,
270 'upload_date': '20211018',
271 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
272 'tags': list,
273 'uploader': '爱喝咖啡的当麻',
274 'duration': 669.482,
275 'uploader_id': '1680903',
276 'chapters': 'count:6',
277 'comment_count': int,
278 'view_count': int,
279 'like_count': int,
280 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
281 },
282 'params': {'skip_download': True},
bd8f48c7 283 }]
28746fbd 284
520e7533 285 def _real_extract(self, url):
ad974876 286 video_id = self._match_id(url)
6461f2b7 287 webpage = self._download_webpage(url, video_id)
c90c5b9b 288 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
289 play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
ad974876
L
290
291 video_data = initial_state['videoData']
292 video_id, title = video_data['bvid'], video_data.get('title')
6461f2b7 293
adc74b3c 294 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
ad974876
L
295 page_list_json = traverse_obj(
296 self._download_json(
297 'https://api.bilibili.com/x/player/pagelist', video_id,
298 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
299 note='Extracting videos in anthology'),
300 'data', expected_type=list) or []
301 is_anthology = len(page_list_json) > 1
302
303 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
304 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
305 return self.playlist_from_matches(
306 page_list_json, video_id, title, ie=BiliBiliIE,
307 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
10db0d2f 308
ad974876 309 if is_anthology:
f74371a9 310 part_id = part_id or 1
311 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
f8580bf0 312
ad974876
L
313 aid = video_data.get('aid')
314 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
f8580bf0 315
c90c5b9b 316 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
317
ad974876
L
318 return {
319 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
320 'formats': self.extract_formats(play_info),
321 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
d90e4030 322 'title': title,
c90c5b9b 323 'description': traverse_obj(initial_state, ('videoData', 'desc')),
324 'view_count': traverse_obj(initial_state, ('videoData', 'stat', 'view')),
325 'uploader': traverse_obj(initial_state, ('upData', 'name')),
326 'uploader_id': traverse_obj(initial_state, ('upData', 'mid')),
327 'like_count': traverse_obj(initial_state, ('videoData', 'stat', 'like')),
328 'comment_count': traverse_obj(initial_state, ('videoData', 'stat', 'reply')),
329 'tags': traverse_obj(initial_state, ('tags', ..., 'tag_name')),
330 'thumbnail': traverse_obj(initial_state, ('videoData', 'pic')),
331 'timestamp': traverse_obj(initial_state, ('videoData', 'pubdate')),
332 'duration': float_or_none(play_info.get('timelength'), scale=1000),
333 'chapters': self._get_chapters(aid, cid),
334 'subtitles': self.extract_subtitles(video_id, initial_state, cid),
335 '__post_extractor': self.extract_comments(aid),
336 'http_headers': {'Referer': url},
06167fbb 337 }
277d6ff5 338
06167fbb 339
ad974876
L
340class BiliBiliBangumiIE(BilibiliBaseIE):
341 _VALID_URL = r'(?x)https?://www\.bilibili\.com/bangumi/play/(?P<id>(?:ss|ep)\d+)'
e88d44c6 342
ad974876
L
343 _TESTS = [{
344 'url': 'https://www.bilibili.com/bangumi/play/ss897',
345 'info_dict': {
346 'id': 'ss897',
347 'ext': 'mp4',
348 'series': '神的记事本',
349 'season': '神的记事本',
350 'season_id': 897,
351 'season_number': 1,
352 'episode': '你与旅行包',
353 'episode_number': 2,
354 'title': '神的记事本:第2话 你与旅行包',
355 'duration': 1428.487,
356 'timestamp': 1310809380,
357 'upload_date': '20110716',
358 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
359 },
360 }, {
361 'url': 'https://www.bilibili.com/bangumi/play/ep508406',
362 'only_matching': True,
363 }]
06167fbb 364
ad974876
L
365 def _real_extract(self, url):
366 video_id = self._match_id(url)
367 webpage = self._download_webpage(url, video_id)
e88d44c6 368
ad974876
L
369 if '您所在的地区无法观看本片' in webpage:
370 raise GeoRestrictedError('This video is restricted')
371 elif ('开通大会员观看' in webpage and '__playinfo__' not in webpage
372 or '正在观看预览,大会员免费看全片' in webpage):
373 self.raise_login_required('This video is for premium members only')
6461f2b7 374
46d09f87 375 play_info = self._search_json(r'window\.__playinfo__\s*=', webpage, 'play info', video_id)['data']
ad974876
L
376 formats = self.extract_formats(play_info)
377 if (not formats and '成为大会员抢先看' in webpage
378 and play_info.get('durl') and not play_info.get('dash')):
379 self.raise_login_required('This video is for premium members only')
bd8f48c7 380
c90c5b9b 381 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
382
383 season_id = traverse_obj(initial_state, ('mediaInfo', 'season_id'))
384 season_number = season_id and next((
385 idx + 1 for idx, e in enumerate(
386 traverse_obj(initial_state, ('mediaInfo', 'seasons', ...)))
387 if e.get('season_id') == season_id
388 ), None)
06167fbb 389
e88d44c6 390 return {
ad974876
L
391 'id': video_id,
392 'formats': formats,
c90c5b9b 393 'title': traverse_obj(initial_state, 'h1Title'),
394 'episode': traverse_obj(initial_state, ('epInfo', 'long_title')),
395 'episode_number': int_or_none(traverse_obj(initial_state, ('epInfo', 'title'))),
396 'series': traverse_obj(initial_state, ('mediaInfo', 'series')),
397 'season': traverse_obj(initial_state, ('mediaInfo', 'season_title')),
398 'season_id': season_id,
399 'season_number': season_number,
400 'thumbnail': traverse_obj(initial_state, ('epInfo', 'cover')),
401 'timestamp': traverse_obj(initial_state, ('epInfo', 'pub_time')),
402 'duration': float_or_none(play_info.get('timelength'), scale=1000),
403 'subtitles': self.extract_subtitles(
404 video_id, initial_state, traverse_obj(initial_state, ('epInfo', 'cid'))),
405 '__post_extractor': self.extract_comments(traverse_obj(initial_state, ('epInfo', 'aid'))),
ad974876 406 'http_headers': {'Referer': url, **self.geo_verification_headers()},
e88d44c6 407 }
bd8f48c7 408
bd8f48c7 409
ad974876
L
410class BiliBiliBangumiMediaIE(InfoExtractor):
411 _VALID_URL = r'https?://www\.bilibili\.com/bangumi/media/md(?P<id>\d+)'
bd8f48c7 412 _TESTS = [{
ad974876 413 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
bd8f48c7 414 'info_dict': {
ad974876 415 'id': '24097891',
bd8f48c7 416 },
ad974876 417 'playlist_mincount': 25,
bd8f48c7
YCH
418 }]
419
bd8f48c7 420 def _real_extract(self, url):
ad974876
L
421 media_id = self._match_id(url)
422 webpage = self._download_webpage(url, media_id)
bd8f48c7 423
c90c5b9b 424 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
ad974876
L
425 episode_list = self._download_json(
426 'https://api.bilibili.com/pgc/web/season/section', media_id,
427 query={'season_id': initial_state['mediaInfo']['season_id']},
428 note='Downloading season info')['result']['main_section']['episodes']
bd8f48c7 429
ad974876
L
430 return self.playlist_result((
431 self.url_result(entry['share_url'], BiliBiliBangumiIE, entry['aid'])
432 for entry in episode_list), media_id)
4bc15a68
RA
433
434
2b9d0216
L
435class BilibiliSpaceBaseIE(InfoExtractor):
436 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
12f153a8 437 first_page = fetch_page(0)
2b9d0216
L
438 metadata = get_metadata(first_page)
439
440 paged_list = InAdvancePagedList(
12f153a8 441 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
2b9d0216
L
442 metadata['page_count'], metadata['page_size'])
443
444 return metadata, paged_list
445
446
447class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
448 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
6efb0711 449 _TESTS = [{
450 'url': 'https://space.bilibili.com/3985676/video',
2b9d0216
L
451 'info_dict': {
452 'id': '3985676',
453 },
454 'playlist_mincount': 178,
6efb0711 455 }]
456
2b9d0216
L
457 def _real_extract(self, url):
458 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
459 if not is_video_url:
460 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
461 'To download audios, add a "/audio" to the URL')
462
463 def fetch_page(page_idx):
12f153a8
L
464 try:
465 response = self._download_json('https://api.bilibili.com/x/space/arc/search',
466 playlist_id, note=f'Downloading page {page_idx}',
467 query={'mid': playlist_id, 'pn': page_idx + 1, 'jsonp': 'jsonp'})
468 except ExtractorError as e:
469 if isinstance(e.cause, urllib.error.HTTPError) and e.cause.code == 412:
470 raise ExtractorError(
471 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
472 raise
473 if response['code'] == -401:
474 raise ExtractorError(
475 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
476 return response['data']
2b9d0216
L
477
478 def get_metadata(page_data):
479 page_size = page_data['page']['ps']
480 entry_count = page_data['page']['count']
481 return {
482 'page_count': math.ceil(entry_count / page_size),
483 'page_size': page_size,
484 }
6efb0711 485
2b9d0216
L
486 def get_entries(page_data):
487 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
488 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
6efb0711 489
2b9d0216
L
490 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
491 return self.playlist_result(paged_list, playlist_id)
6efb0711 492
6efb0711 493
2b9d0216
L
494class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
495 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
496 _TESTS = [{
497 'url': 'https://space.bilibili.com/3985676/audio',
498 'info_dict': {
499 'id': '3985676',
500 },
501 'playlist_mincount': 1,
502 }]
503
504 def _real_extract(self, url):
505 playlist_id = self._match_id(url)
506
507 def fetch_page(page_idx):
508 return self._download_json(
509 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
510 note=f'Downloading page {page_idx}',
12f153a8 511 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
2b9d0216
L
512
513 def get_metadata(page_data):
514 return {
515 'page_count': page_data['pageCount'],
516 'page_size': page_data['pageSize'],
517 }
518
519 def get_entries(page_data):
520 for entry in page_data.get('data', []):
521 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
522
523 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
524 return self.playlist_result(paged_list, playlist_id)
525
526
527class BilibiliSpacePlaylistIE(BilibiliSpaceBaseIE):
528 _VALID_URL = r'https?://space.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail\?sid=(?P<sid>\d+)'
529 _TESTS = [{
530 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
531 'info_dict': {
532 'id': '2142762_57445',
533 'title': '《底特律 变人》'
534 },
535 'playlist_mincount': 31,
536 }]
06167fbb 537
538 def _real_extract(self, url):
2b9d0216
L
539 mid, sid = self._match_valid_url(url).group('mid', 'sid')
540 playlist_id = f'{mid}_{sid}'
541
542 def fetch_page(page_idx):
543 return self._download_json(
544 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
545 playlist_id, note=f'Downloading page {page_idx}',
12f153a8 546 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
2b9d0216
L
547
548 def get_metadata(page_data):
549 page_size = page_data['page']['page_size']
550 entry_count = page_data['page']['total']
551 return {
552 'page_count': math.ceil(entry_count / page_size),
553 'page_size': page_size,
554 'title': traverse_obj(page_data, ('meta', 'name'))
555 }
556
557 def get_entries(page_data):
558 for entry in page_data.get('archives', []):
559 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}',
560 BiliBiliIE, entry['bvid'])
561
562 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
563 return self.playlist_result(paged_list, playlist_id, metadata['title'])
06167fbb 564
565
c34f505b 566class BilibiliCategoryIE(InfoExtractor):
567 IE_NAME = 'Bilibili category extractor'
568 _MAX_RESULTS = 1000000
569 _VALID_URL = r'https?://www\.bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
570 _TESTS = [{
571 'url': 'https://www.bilibili.com/v/kichiku/mad',
572 'info_dict': {
573 'id': 'kichiku: mad',
574 'title': 'kichiku: mad'
575 },
576 'playlist_mincount': 45,
577 'params': {
578 'playlistend': 45
579 }
580 }]
581
582 def _fetch_page(self, api_url, num_pages, query, page_num):
583 parsed_json = self._download_json(
584 api_url, query, query={'Search_key': query, 'pn': page_num},
585 note='Extracting results from page %s of %s' % (page_num, num_pages))
586
f8580bf0 587 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
c34f505b 588 if not video_list:
589 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
590
591 for video in video_list:
592 yield self.url_result(
593 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
594
595 def _entries(self, category, subcategory, query):
596 # map of categories : subcategories : RIDs
597 rid_map = {
598 'kichiku': {
599 'mad': 26,
600 'manual_vocaloid': 126,
601 'guide': 22,
602 'theatre': 216,
603 'course': 127
604 },
605 }
606
607 if category not in rid_map:
e88d44c6 608 raise ExtractorError(
609 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
c34f505b 610 if subcategory not in rid_map[category]:
e88d44c6 611 raise ExtractorError(
612 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
c34f505b 613 rid_value = rid_map[category][subcategory]
614
615 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
616 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
f8580bf0 617 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
c34f505b 618 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
619 if count is None or not size:
620 raise ExtractorError('Failed to calculate either page count or size')
621
622 num_pages = math.ceil(count / size)
623
624 return OnDemandPagedList(functools.partial(
625 self._fetch_page, api_url, num_pages, query), size)
626
627 def _real_extract(self, url):
ad974876 628 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
c34f505b 629 query = '%s: %s' % (category, subcategory)
630
631 return self.playlist_result(self._entries(category, subcategory, query), query, query)
632
633
06167fbb 634class BiliBiliSearchIE(SearchInfoExtractor):
96565c7e 635 IE_DESC = 'Bilibili video search'
06167fbb 636 _MAX_RESULTS = 100000
637 _SEARCH_KEY = 'bilisearch'
06167fbb 638
e88d44c6 639 def _search_results(self, query):
640 for page_num in itertools.count(1):
641 videos = self._download_json(
642 'https://api.bilibili.com/x/web-interface/search/type', query,
643 note=f'Extracting results from page {page_num}', query={
644 'Search_key': query,
645 'keyword': query,
646 'page': page_num,
647 'context': '',
e88d44c6 648 'duration': 0,
649 'tids_2': '',
650 '__refresh__': 'true',
651 'search_type': 'video',
652 'tids': 0,
653 'highlight': 1,
2d101954 654 })['data'].get('result')
655 if not videos:
656 break
06167fbb 657 for video in videos:
e88d44c6 658 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
06167fbb 659
660
4bc15a68
RA
661class BilibiliAudioBaseIE(InfoExtractor):
662 def _call_api(self, path, sid, query=None):
663 if not query:
664 query = {'sid': sid}
665 return self._download_json(
666 'https://www.bilibili.com/audio/music-service-c/web/' + path,
667 sid, query=query)['data']
668
669
670class BilibiliAudioIE(BilibiliAudioBaseIE):
671 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
672 _TEST = {
673 'url': 'https://www.bilibili.com/audio/au1003142',
674 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
675 'info_dict': {
676 'id': '1003142',
677 'ext': 'm4a',
678 'title': '【tsukimi】YELLOW / 神山羊',
679 'artist': 'tsukimi',
680 'comment_count': int,
681 'description': 'YELLOW的mp3版!',
682 'duration': 183,
683 'subtitles': {
684 'origin': [{
685 'ext': 'lrc',
686 }],
687 },
688 'thumbnail': r're:^https?://.+\.jpg',
689 'timestamp': 1564836614,
690 'upload_date': '20190803',
691 'uploader': 'tsukimi-つきみぐー',
692 'view_count': int,
693 },
694 }
695
696 def _real_extract(self, url):
697 au_id = self._match_id(url)
698
699 play_data = self._call_api('url', au_id)
700 formats = [{
701 'url': play_data['cdns'][0],
702 'filesize': int_or_none(play_data.get('size')),
f0884c8b 703 'vcodec': 'none'
4bc15a68
RA
704 }]
705
6d1b3489 706 for a_format in formats:
707 a_format.setdefault('http_headers', {}).update({
708 'Referer': url,
709 })
710
4bc15a68
RA
711 song = self._call_api('song/info', au_id)
712 title = song['title']
713 statistic = song.get('statistic') or {}
714
715 subtitles = None
716 lyric = song.get('lyric')
717 if lyric:
718 subtitles = {
719 'origin': [{
720 'url': lyric,
721 }]
722 }
723
724 return {
725 'id': au_id,
726 'title': title,
727 'formats': formats,
728 'artist': song.get('author'),
729 'comment_count': int_or_none(statistic.get('comment')),
730 'description': song.get('intro'),
731 'duration': int_or_none(song.get('duration')),
732 'subtitles': subtitles,
733 'thumbnail': song.get('cover'),
734 'timestamp': int_or_none(song.get('passtime')),
735 'uploader': song.get('uname'),
736 'view_count': int_or_none(statistic.get('play')),
737 }
738
739
740class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
741 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
742 _TEST = {
743 'url': 'https://www.bilibili.com/audio/am10624',
744 'info_dict': {
745 'id': '10624',
746 'title': '每日新曲推荐(每日11:00更新)',
747 'description': '每天11:00更新,为你推送最新音乐',
748 },
749 'playlist_count': 19,
750 }
751
752 def _real_extract(self, url):
753 am_id = self._match_id(url)
754
755 songs = self._call_api(
756 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
757
758 entries = []
759 for song in songs:
760 sid = str_or_none(song.get('id'))
761 if not sid:
762 continue
763 entries.append(self.url_result(
764 'https://www.bilibili.com/audio/au' + sid,
765 BilibiliAudioIE.ie_key(), sid))
766
767 if entries:
768 album_data = self._call_api('menu/info', am_id) or {}
769 album_title = album_data.get('title')
770 if album_title:
771 for entry in entries:
772 entry['album'] = album_title
773 return self.playlist_result(
774 entries, am_id, album_title, album_data.get('intro'))
775
776 return self.playlist_result(entries, am_id)
63dce309
S
777
778
779class BiliBiliPlayerIE(InfoExtractor):
780 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
781 _TEST = {
782 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
783 'only_matching': True,
784 }
785
786 def _real_extract(self, url):
787 video_id = self._match_id(url)
788 return self.url_result(
789 'http://www.bilibili.tv/video/av%s/' % video_id,
790 ie=BiliBiliIE.ie_key(), video_id=video_id)
16f7e6be
AG
791
792
793class BiliIntlBaseIE(InfoExtractor):
c62ecf0d 794 _API_URL = 'https://api.bilibili.tv/intl/gateway'
cfcf60ea 795 _NETRC_MACHINE = 'biliintl'
16f7e6be 796
c62ecf0d 797 def _call_api(self, endpoint, *args, **kwargs):
cfcf60ea
M
798 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
799 if json.get('code'):
800 if json['code'] in (10004004, 10004005, 10023006):
801 self.raise_login_required()
802 elif json['code'] == 10004001:
803 self.raise_geo_restricted()
804 else:
805 if json.get('message') and str(json['code']) != json['message']:
806 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
807 else:
808 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
809 if kwargs.get('fatal'):
810 raise ExtractorError(errmsg)
811 else:
812 self.report_warning(errmsg)
813 return json.get('data')
16f7e6be 814
efc947fb 815 def json2srt(self, json):
816 data = '\n\n'.join(
817 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
dfb855b4 818 for i, line in enumerate(traverse_obj(json, (
819 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
efc947fb 820 return data
821
f5f15c99
LR
822 def _get_subtitles(self, *, ep_id=None, aid=None):
823 sub_json = self._call_api(
fbb888a3 824 '/web/v2/subtitle', ep_id or aid, fatal=False,
825 note='Downloading subtitles list', errnote='Unable to download subtitles list',
826 query=filter_dict({
f5f15c99 827 'platform': 'web',
fbb888a3 828 's_locale': 'en_US',
f5f15c99
LR
829 'episode_id': ep_id,
830 'aid': aid,
fbb888a3 831 })) or {}
16f7e6be 832 subtitles = {}
c62ecf0d 833 for sub in sub_json.get('subtitles') or []:
16f7e6be
AG
834 sub_url = sub.get('url')
835 if not sub_url:
836 continue
c62ecf0d 837 sub_data = self._download_json(
f5f15c99 838 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
c62ecf0d 839 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
efc947fb 840 if not sub_data:
841 continue
c62ecf0d 842 subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
efc947fb 843 'ext': 'srt',
844 'data': self.json2srt(sub_data)
16f7e6be
AG
845 })
846 return subtitles
847
f5f15c99
LR
848 def _get_formats(self, *, ep_id=None, aid=None):
849 video_json = self._call_api(
850 '/web/playurl', ep_id or aid, note='Downloading video formats',
851 errnote='Unable to download video formats', query=filter_dict({
852 'platform': 'web',
853 'ep_id': ep_id,
854 'aid': aid,
855 }))
16f7e6be
AG
856 video_json = video_json['playurl']
857 formats = []
c62ecf0d 858 for vid in video_json.get('video') or []:
16f7e6be
AG
859 video_res = vid.get('video_resource') or {}
860 video_info = vid.get('stream_info') or {}
861 if not video_res.get('url'):
862 continue
863 formats.append({
864 'url': video_res['url'],
865 'ext': 'mp4',
866 'format_note': video_info.get('desc_words'),
867 'width': video_res.get('width'),
868 'height': video_res.get('height'),
869 'vbr': video_res.get('bandwidth'),
870 'acodec': 'none',
871 'vcodec': video_res.get('codecs'),
872 'filesize': video_res.get('size'),
873 })
c62ecf0d 874 for aud in video_json.get('audio_resource') or []:
16f7e6be
AG
875 if not aud.get('url'):
876 continue
877 formats.append({
878 'url': aud['url'],
879 'ext': 'mp4',
880 'abr': aud.get('bandwidth'),
881 'acodec': aud.get('codecs'),
882 'vcodec': 'none',
883 'filesize': aud.get('size'),
884 })
885
16f7e6be
AG
886 return formats
887
26fdfc37 888 def _parse_video_metadata(self, video_data):
16f7e6be 889 return {
f5f15c99
LR
890 'title': video_data.get('title_display') or video_data.get('title'),
891 'thumbnail': video_data.get('cover'),
c62ecf0d 892 'episode_number': int_or_none(self._search_regex(
f5f15c99 893 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
16f7e6be
AG
894 }
895
52efa4b3 896 def _perform_login(self, username, password):
65f6e807 897 if not Cryptodome.RSA:
f6a765ce 898 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
cfcf60ea
M
899
900 key_data = self._download_json(
901 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
902 note='Downloading login key', errnote='Unable to download login key')['data']
903
65f6e807 904 public_key = Cryptodome.RSA.importKey(key_data['key'])
905 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
cfcf60ea
M
906 login_post = self._download_json(
907 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
908 'username': username,
909 'password': base64.b64encode(password_hash).decode('ascii'),
910 'keep_me': 'true',
911 's_locale': 'en_US',
912 'isTrusted': 'true'
913 }), note='Logging in', errnote='Unable to log in')
914 if login_post.get('code'):
915 if login_post.get('message'):
916 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
917 else:
918 raise ExtractorError('Unable to log in')
919
16f7e6be
AG
920
921class BiliIntlIE(BiliIntlBaseIE):
0831d95c 922 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
16f7e6be 923 _TESTS = [{
cfcf60ea 924 # Bstation page
16f7e6be
AG
925 'url': 'https://www.bilibili.tv/en/play/34613/341736',
926 'info_dict': {
927 'id': '341736',
928 'ext': 'mp4',
c62ecf0d
M
929 'title': 'E2 - The First Night',
930 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
16f7e6be 931 'episode_number': 2,
d37422f1
H
932 'upload_date': '20201009',
933 'episode': 'Episode 2',
934 'timestamp': 1602259500,
935 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
0ba87dd2
H
936 'chapters': [{
937 'start_time': 0,
938 'end_time': 76.242,
939 'title': '<Untitled Chapter 1>'
940 }, {
941 'start_time': 76.242,
942 'end_time': 161.161,
943 'title': 'Intro'
944 }, {
945 'start_time': 1325.742,
946 'end_time': 1403.903,
947 'title': 'Outro'
948 }],
c62ecf0d 949 }
16f7e6be 950 }, {
cfcf60ea 951 # Non-Bstation page
c62ecf0d 952 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
16f7e6be 953 'info_dict': {
c62ecf0d 954 'id': '11005006',
16f7e6be 955 'ext': 'mp4',
c62ecf0d
M
956 'title': 'E3 - Who?',
957 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
958 'episode_number': 3,
d37422f1
H
959 'description': 'md5:e1a775e71a35c43f141484715470ad09',
960 'episode': 'Episode 3',
961 'upload_date': '20211219',
962 'timestamp': 1639928700,
0ba87dd2
H
963 'chapters': [{
964 'start_time': 0,
965 'end_time': 88.0,
966 'title': '<Untitled Chapter 1>'
967 }, {
968 'start_time': 88.0,
969 'end_time': 156.0,
970 'title': 'Intro'
971 }, {
972 'start_time': 1173.0,
973 'end_time': 1259.535,
974 'title': 'Outro'
975 }],
c62ecf0d 976 }
cfcf60ea
M
977 }, {
978 # Subtitle with empty content
979 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
980 'info_dict': {
981 'id': '10131790',
982 'ext': 'mp4',
983 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
984 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
985 'episode_number': 140,
986 },
987 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
d37422f1
H
988 }, {
989 'url': 'https://www.bilibili.tv/en/video/2041863208',
990 'info_dict': {
991 'id': '2041863208',
992 'ext': 'mp4',
993 'timestamp': 1670874843,
994 'description': 'Scheduled for April 2023.\nStudio: ufotable',
995 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
996 'upload_date': '20221212',
997 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
998 }
0ba87dd2
H
999 }, {
1000 # episode id without intro and outro
1001 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1002 'info_dict': {
1003 'id': '11246489',
1004 'ext': 'mp4',
1005 'title': 'E1 - Operation \'Strix\' <Owl>',
1006 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1007 'timestamp': 1649516400,
1008 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1009 'episode': 'Episode 1',
1010 'episode_number': 1,
1011 'upload_date': '20220409',
1012 },
c62ecf0d
M
1013 }, {
1014 'url': 'https://www.biliintl.com/en/play/34613/341736',
1015 'only_matching': True,
f5f15c99
LR
1016 }, {
1017 # User-generated content (as opposed to a series licensed from a studio)
1018 'url': 'https://bilibili.tv/en/video/2019955076',
1019 'only_matching': True,
1020 }, {
1021 # No language in URL
1022 'url': 'https://www.bilibili.tv/video/2019955076',
1023 'only_matching': True,
0831d95c 1024 }, {
1025 # Uppercase language in URL
1026 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1027 'only_matching': True,
16f7e6be
AG
1028 }]
1029
26fdfc37 1030 def _make_url(video_id, series_id=None):
1031 if series_id:
1032 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1033 return f'https://www.bilibili.tv/en/video/{video_id}'
1034
1035 def _extract_video_metadata(self, url, video_id, season_id):
1036 url, smuggled_data = unsmuggle_url(url, {})
1037 if smuggled_data.get('title'):
1038 return smuggled_data
1039
c62ecf0d
M
1040 webpage = self._download_webpage(url, video_id)
1041 # Bstation layout
8072ef2b 1042 initial_data = (
1043 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1044 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1045 video_data = traverse_obj(
d37422f1 1046 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
c62ecf0d 1047
f5f15c99 1048 if season_id and not video_data:
c62ecf0d
M
1049 # Non-Bstation layout, read through episode list
1050 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
26fdfc37 1051 video_data = traverse_obj(season_json, (
1052 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1053 ), expected_type=dict, get_all=False)
1054
d37422f1
H
1055 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1056 return merge_dicts(
1057 self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
1058 'title': self._html_search_meta('og:title', webpage),
1059 'description': self._html_search_meta('og:description', webpage)
1060 })
26fdfc37 1061
1062 def _real_extract(self, url):
1063 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1064 video_id = ep_id or aid
0ba87dd2
H
1065 chapters = None
1066
1067 if ep_id:
1068 intro_ending_json = self._call_api(
1069 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1070 video_id, fatal=False) or {}
1071 if intro_ending_json.get('skip'):
1072 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1073 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1074 chapters = [{
1075 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
1076 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
1077 'title': 'Intro'
1078 }, {
1079 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
1080 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
1081 'title': 'Outro'
1082 }]
26fdfc37 1083
1084 return {
1085 'id': video_id,
1086 **self._extract_video_metadata(url, video_id, season_id),
1087 'formats': self._get_formats(ep_id=ep_id, aid=aid),
1088 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
0ba87dd2 1089 'chapters': chapters
26fdfc37 1090 }
16f7e6be
AG
1091
1092
1093class BiliIntlSeriesIE(BiliIntlBaseIE):
08e29b9f 1094 IE_NAME = 'biliIntl:series'
76c3cecc 1095 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
16f7e6be
AG
1096 _TESTS = [{
1097 'url': 'https://www.bilibili.tv/en/play/34613',
1098 'playlist_mincount': 15,
1099 'info_dict': {
1100 'id': '34613',
76c3cecc
H
1101 'title': 'TONIKAWA: Over the Moon For You',
1102 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1103 'categories': ['Slice of life', 'Comedy', 'Romance'],
c62ecf0d
M
1104 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1105 'view_count': int,
16f7e6be
AG
1106 },
1107 'params': {
1108 'skip_download': True,
16f7e6be 1109 },
76c3cecc
H
1110 }, {
1111 'url': 'https://www.bilibili.tv/en/media/1048837',
1112 'info_dict': {
1113 'id': '1048837',
1114 'title': 'SPY×FAMILY',
1115 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1116 'categories': ['Adventure', 'Action', 'Comedy'],
1117 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1118 'view_count': int,
1119 },
1120 'playlist_mincount': 25,
16f7e6be
AG
1121 }, {
1122 'url': 'https://www.biliintl.com/en/play/34613',
c62ecf0d 1123 'only_matching': True,
0831d95c 1124 }, {
1125 'url': 'https://www.biliintl.com/EN/play/34613',
1126 'only_matching': True,
16f7e6be
AG
1127 }]
1128
c62ecf0d
M
1129 def _entries(self, series_id):
1130 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
26fdfc37 1131 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
1132 episode_id = str(episode['episode_id'])
1133 yield self.url_result(smuggle_url(
1134 BiliIntlIE._make_url(episode_id, series_id),
1135 self._parse_video_metadata(episode)
1136 ), BiliIntlIE, episode_id)
16f7e6be
AG
1137
1138 def _real_extract(self, url):
c62ecf0d
M
1139 series_id = self._match_id(url)
1140 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
1141 return self.playlist_result(
1142 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
1143 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
1144 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
b4f53662
H
1145
1146
1147class BiliLiveIE(InfoExtractor):
ca2f6e14 1148 _VALID_URL = r'https?://live.bilibili.com/(?:blanc/)?(?P<id>\d+)'
b4f53662
H
1149
1150 _TESTS = [{
1151 'url': 'https://live.bilibili.com/196',
1152 'info_dict': {
1153 'id': '33989',
1154 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1155 'ext': 'flv',
1156 'title': "太空狼人杀联动,不被爆杀就算赢",
1157 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1158 'timestamp': 1650802769,
1159 },
1160 'skip': 'not live'
1161 }, {
1162 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1163 'only_matching': True
1c226ccd 1164 }, {
1165 'url': 'https://live.bilibili.com/blanc/196',
1166 'only_matching': True
b4f53662
H
1167 }]
1168
1169 _FORMATS = {
1170 80: {'format_id': 'low', 'format_note': '流畅'},
1171 150: {'format_id': 'high_res', 'format_note': '高清'},
1172 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
1173 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
1174 10000: {'format_id': 'source', 'format_note': '原画'},
1175 20000: {'format_id': '4K', 'format_note': '4K'},
1176 30000: {'format_id': 'dolby', 'format_note': '杜比'},
1177 }
1178
1179 _quality = staticmethod(qualities(list(_FORMATS)))
1180
1181 def _call_api(self, path, room_id, query):
1182 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
1183 if api_result.get('code') != 0:
1184 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
1185 return api_result.get('data') or {}
1186
1187 def _parse_formats(self, qn, fmt):
1188 for codec in fmt.get('codec') or []:
1189 if codec.get('current_qn') != qn:
1190 continue
1191 for url_info in codec['url_info']:
1192 yield {
1193 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1194 'ext': fmt.get('format_name'),
1195 'vcodec': codec.get('codec_name'),
1196 'quality': self._quality(qn),
1197 **self._FORMATS[qn],
1198 }
1199
1200 def _real_extract(self, url):
1201 room_id = self._match_id(url)
1202 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
1203 if room_data.get('live_status') == 0:
1204 raise ExtractorError('Streamer is not live', expected=True)
1205
1206 formats = []
1207 for qn in self._FORMATS.keys():
1208 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
1209 'room_id': room_id,
1210 'qn': qn,
1211 'codec': '0,1',
1212 'format': '0,2',
1213 'mask': '0',
1214 'no_playurl': '0',
1215 'platform': 'web',
1216 'protocol': '0,1',
1217 })
1218 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1219 formats.extend(self._parse_formats(qn, fmt))
b4f53662
H
1220
1221 return {
1222 'id': room_id,
1223 'title': room_data.get('title'),
1224 'description': room_data.get('description'),
1225 'thumbnail': room_data.get('user_cover'),
1226 'timestamp': stream_data.get('live_time'),
1227 'formats': formats,
ca2f6e14 1228 'is_live': True,
b4f53662
H
1229 'http_headers': {
1230 'Referer': url,
1231 },
1232 }