]> jfr.im git - yt-dlp.git/blame_incremental - yt_dlp/extractor/bilibili.py
fix motherless
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
... / ...
CommitLineData
1import base64
2import functools
3import hashlib
4import itertools
5import json
6import math
7import re
8import time
9import urllib.parse
10import uuid
11
12from .common import InfoExtractor, SearchInfoExtractor
13from ..dependencies import Cryptodome
14from ..networking.exceptions import HTTPError
15from ..utils import (
16 ExtractorError,
17 GeoRestrictedError,
18 InAdvancePagedList,
19 OnDemandPagedList,
20 bool_or_none,
21 clean_html,
22 determine_ext,
23 filter_dict,
24 float_or_none,
25 format_field,
26 get_element_by_class,
27 int_or_none,
28 join_nonempty,
29 make_archive_id,
30 merge_dicts,
31 mimetype2ext,
32 parse_count,
33 parse_qs,
34 parse_resolution,
35 qualities,
36 smuggle_url,
37 srt_subtitles_timecode,
38 str_or_none,
39 traverse_obj,
40 unified_timestamp,
41 unsmuggle_url,
42 url_or_none,
43 urlencode_postdata,
44 variadic,
45)
46
47
48class BilibiliBaseIE(InfoExtractor):
49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50 _WBI_KEY_CACHE_TIMEOUT = 30 # exact expire timeout is unclear, use 30s for one session
51 _wbi_key_cache = {}
52
53 @property
54 def is_logged_in(self):
55 return bool(self._get_cookies('https://api.bilibili.com').get('SESSDATA'))
56
57 def _check_missing_formats(self, play_info, formats):
58 parsed_qualities = set(traverse_obj(formats, (..., 'quality')))
59 missing_formats = join_nonempty(*[
60 traverse_obj(fmt, 'new_description', 'display_desc', 'quality')
61 for fmt in traverse_obj(play_info, (
62 'support_formats', lambda _, v: v['quality'] not in parsed_qualities))], delim=', ')
63 if missing_formats:
64 self.to_screen(
65 f'Format(s) {missing_formats} are missing; you have to login or '
66 f'become a premium member to download them. {self._login_hint()}')
67
68 def extract_formats(self, play_info):
69 format_names = {
70 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
71 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
72 }
73
74 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
75 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
76 if flac_audio:
77 audios.append(flac_audio)
78 formats = [{
79 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
80 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
81 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
82 'vcodec': 'none',
83 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
84 'filesize': int_or_none(audio.get('size')),
85 'format_id': str_or_none(audio.get('id')),
86 } for audio in audios]
87
88 formats.extend({
89 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
90 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
91 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
92 'width': int_or_none(video.get('width')),
93 'height': int_or_none(video.get('height')),
94 'vcodec': video.get('codecs'),
95 'acodec': 'none' if audios else None,
96 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
97 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
98 'filesize': int_or_none(video.get('size')),
99 'quality': int_or_none(video.get('id')),
100 'format_id': traverse_obj(
101 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
102 ('id', {str_or_none}), get_all=False),
103 'format': format_names.get(video.get('id')),
104 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
105
106 if formats:
107 self._check_missing_formats(play_info, formats)
108
109 fragments = traverse_obj(play_info, ('durl', lambda _, v: url_or_none(v['url']), {
110 'url': ('url', {url_or_none}),
111 'duration': ('length', {functools.partial(float_or_none, scale=1000)}),
112 'filesize': ('size', {int_or_none}),
113 }))
114 if fragments:
115 formats.append({
116 'url': fragments[0]['url'],
117 'filesize': sum(traverse_obj(fragments, (..., 'filesize'))),
118 **({
119 'fragments': fragments,
120 'protocol': 'http_dash_segments',
121 } if len(fragments) > 1 else {}),
122 **traverse_obj(play_info, {
123 'quality': ('quality', {int_or_none}),
124 'format_id': ('quality', {str_or_none}),
125 'format_note': ('quality', {lambda x: format_names.get(x)}),
126 'duration': ('timelength', {functools.partial(float_or_none, scale=1000)}),
127 }),
128 **parse_resolution(format_names.get(play_info.get('quality'))),
129 })
130 return formats
131
132 def _get_wbi_key(self, video_id):
133 if time.time() < self._wbi_key_cache.get('ts', 0) + self._WBI_KEY_CACHE_TIMEOUT:
134 return self._wbi_key_cache['key']
135
136 session_data = self._download_json(
137 'https://api.bilibili.com/x/web-interface/nav', video_id, note='Downloading wbi sign')
138
139 lookup = ''.join(traverse_obj(session_data, (
140 'data', 'wbi_img', ('img_url', 'sub_url'),
141 {lambda x: x.rpartition('/')[2].partition('.')[0]})))
142
143 # from getMixinKey() in the vendor js
144 mixin_key_enc_tab = [
145 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49,
146 33, 9, 42, 19, 29, 28, 14, 39, 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40,
147 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63, 57, 62, 11,
148 36, 20, 34, 44, 52,
149 ]
150
151 self._wbi_key_cache.update({
152 'key': ''.join(lookup[i] for i in mixin_key_enc_tab)[:32],
153 'ts': time.time(),
154 })
155 return self._wbi_key_cache['key']
156
157 def _sign_wbi(self, params, video_id):
158 params['wts'] = round(time.time())
159 params = {
160 k: ''.join(filter(lambda char: char not in "!'()*", str(v)))
161 for k, v in sorted(params.items())
162 }
163 query = urllib.parse.urlencode(params)
164 params['w_rid'] = hashlib.md5(f'{query}{self._get_wbi_key(video_id)}'.encode()).hexdigest()
165 return params
166
167 def _download_playinfo(self, bvid, cid, headers=None, qn=None):
168 params = {'bvid': bvid, 'cid': cid, 'fnval': 4048}
169 if qn:
170 params['qn'] = qn
171 return self._download_json(
172 'https://api.bilibili.com/x/player/wbi/playurl', bvid,
173 query=self._sign_wbi(params, bvid), headers=headers,
174 note=f'Downloading video formats for cid {cid} {qn or ""}')['data']
175
176 def json2srt(self, json_data):
177 srt_data = ''
178 for idx, line in enumerate(json_data.get('body') or []):
179 srt_data += (f'{idx + 1}\n'
180 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
181 f'{line["content"]}\n\n')
182 return srt_data
183
184 def _get_subtitles(self, video_id, cid, aid=None):
185 subtitles = {
186 'danmaku': [{
187 'ext': 'xml',
188 'url': f'https://comment.bilibili.com/{cid}.xml',
189 }],
190 }
191
192 video_info = self._download_json(
193 'https://api.bilibili.com/x/player/v2', video_id,
194 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
195 note=f'Extracting subtitle info {cid}')
196 if traverse_obj(video_info, ('data', 'need_login_subtitle')):
197 self.report_warning(
198 f'Subtitles are only available when logged in. {self._login_hint()}', only_once=True)
199 for s in traverse_obj(video_info, (
200 'data', 'subtitle', 'subtitles', lambda _, v: v['subtitle_url'] and v['lan'])):
201 subtitles.setdefault(s['lan'], []).append({
202 'ext': 'srt',
203 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
204 })
205 return subtitles
206
207 def _get_chapters(self, aid, cid):
208 chapters = aid and cid and self._download_json(
209 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
210 note='Extracting chapters', fatal=False)
211 return traverse_obj(chapters, ('data', 'view_points', ..., {
212 'title': 'content',
213 'start_time': 'from',
214 'end_time': 'to',
215 })) or None
216
217 def _get_comments(self, aid):
218 for idx in itertools.count(1):
219 replies = traverse_obj(
220 self._download_json(
221 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
222 aid, note=f'Extracting comments from page {idx}', fatal=False),
223 ('data', 'replies'))
224 if not replies:
225 return
226 for children in map(self._get_all_children, replies):
227 yield from children
228
229 def _get_all_children(self, reply):
230 yield {
231 'author': traverse_obj(reply, ('member', 'uname')),
232 'author_id': traverse_obj(reply, ('member', 'mid')),
233 'id': reply.get('rpid'),
234 'text': traverse_obj(reply, ('content', 'message')),
235 'timestamp': reply.get('ctime'),
236 'parent': reply.get('parent') or 'root',
237 }
238 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
239 yield from children
240
241 def _get_episodes_from_season(self, ss_id, url):
242 season_info = self._download_json(
243 'https://api.bilibili.com/pgc/web/season/section', ss_id,
244 note='Downloading season info', query={'season_id': ss_id},
245 headers={'Referer': url, **self.geo_verification_headers()})
246
247 for entry in traverse_obj(season_info, (
248 'result', 'main_section', 'episodes',
249 lambda _, v: url_or_none(v['share_url']) and v['id'])):
250 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
251
252 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
253 cid_edges = cid_edges or {}
254 division_data = self._download_json(
255 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
256 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
257 note=f'Extracting divisions from edge {edge_id}')
258 edges.setdefault(edge_id, {}).update(
259 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
260 'title': ('title', {str}),
261 'cid': ('cid', {int_or_none}),
262 }), get_all=False))
263
264 edges[edge_id].update(traverse_obj(division_data, ('data', {
265 'title': ('title', {str}),
266 'choices': ('edges', 'questions', ..., 'choices', ..., {
267 'edge_id': ('id', {int_or_none}),
268 'cid': ('cid', {int_or_none}),
269 'text': ('option', {str}),
270 }),
271 })))
272 # use dict to combine edges that use the same video section (same cid)
273 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
274 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
275 if choice['edge_id'] not in edges:
276 edges[choice['edge_id']] = {'cid': choice['cid']}
277 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
278 return cid_edges
279
280 def _get_interactive_entries(self, video_id, cid, metainfo, headers=None):
281 graph_version = traverse_obj(
282 self._download_json(
283 'https://api.bilibili.com/x/player/wbi/v2', video_id,
284 'Extracting graph version', query={'bvid': video_id, 'cid': cid}, headers=headers),
285 ('data', 'interaction', 'graph_version', {int_or_none}))
286 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
287 for cid, edges in cid_edges.items():
288 play_info = self._download_playinfo(video_id, cid, headers=headers)
289 yield {
290 **metainfo,
291 'id': f'{video_id}_{cid}',
292 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
293 'formats': self.extract_formats(play_info),
294 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
295 'duration': float_or_none(play_info.get('timelength'), scale=1000),
296 'subtitles': self.extract_subtitles(video_id, cid),
297 }
298
299
300class BiliBiliIE(BilibiliBaseIE):
301 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
302
303 _TESTS = [{
304 'url': 'https://www.bilibili.com/video/BV13x41117TL',
305 'info_dict': {
306 'id': 'BV13x41117TL',
307 'title': '阿滴英文|英文歌分享#6 "Closer',
308 'ext': 'mp4',
309 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
310 'uploader_id': '65880958',
311 'uploader': '阿滴英文',
312 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
313 'duration': 554.117,
314 'tags': list,
315 'comment_count': int,
316 'upload_date': '20170301',
317 'timestamp': 1488353834,
318 'like_count': int,
319 'view_count': int,
320 '_old_archive_ids': ['bilibili 8903802_part1'],
321 },
322 }, {
323 'note': 'old av URL version',
324 'url': 'http://www.bilibili.com/video/av1074402/',
325 'info_dict': {
326 'id': 'BV11x411K7CN',
327 'ext': 'mp4',
328 'title': '【金坷垃】金泡沫',
329 'uploader': '菊子桑',
330 'uploader_id': '156160',
331 'duration': 308.36,
332 'upload_date': '20140420',
333 'timestamp': 1397983878,
334 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
335 'like_count': int,
336 'comment_count': int,
337 'view_count': int,
338 'tags': list,
339 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
340 '_old_archive_ids': ['bilibili 1074402_part1'],
341 },
342 'params': {'skip_download': True},
343 }, {
344 'note': 'Anthology',
345 'url': 'https://www.bilibili.com/video/BV1bK411W797',
346 'info_dict': {
347 'id': 'BV1bK411W797',
348 'title': '物语中的人物是如何吐槽自己的OP的',
349 },
350 'playlist_count': 18,
351 'playlist': [{
352 'info_dict': {
353 'id': 'BV1bK411W797_p1',
354 'ext': 'mp4',
355 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
356 'tags': 'count:10',
357 'timestamp': 1589601697,
358 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
359 'uploader': '打牌还是打桩',
360 'uploader_id': '150259984',
361 'like_count': int,
362 'comment_count': int,
363 'upload_date': '20200516',
364 'view_count': int,
365 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
366 'duration': 90.314,
367 '_old_archive_ids': ['bilibili 498159642_part1'],
368 },
369 }],
370 }, {
371 'note': 'Specific page of Anthology',
372 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
373 'info_dict': {
374 'id': 'BV1bK411W797_p1',
375 'ext': 'mp4',
376 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
377 'tags': 'count:10',
378 'timestamp': 1589601697,
379 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
380 'uploader': '打牌还是打桩',
381 'uploader_id': '150259984',
382 'like_count': int,
383 'comment_count': int,
384 'upload_date': '20200516',
385 'view_count': int,
386 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
387 'duration': 90.314,
388 '_old_archive_ids': ['bilibili 498159642_part1'],
389 },
390 }, {
391 'url': 'https://www.bilibili.com/video/av8903802/',
392 'info_dict': {
393 'id': 'BV13x41117TL',
394 'ext': 'mp4',
395 'title': '阿滴英文|英文歌分享#6 "Closer',
396 'upload_date': '20170301',
397 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
398 'timestamp': 1488353834,
399 'uploader_id': '65880958',
400 'uploader': '阿滴英文',
401 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
402 'duration': 554.117,
403 'tags': list,
404 'comment_count': int,
405 'view_count': int,
406 'like_count': int,
407 '_old_archive_ids': ['bilibili 8903802_part1'],
408 },
409 'params': {
410 'skip_download': True,
411 },
412 }, {
413 'note': 'video has chapter',
414 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
415 'info_dict': {
416 'id': 'BV1vL411G7N7',
417 'ext': 'mp4',
418 'title': '如何为你的B站视频添加进度条分段',
419 'timestamp': 1634554558,
420 'upload_date': '20211018',
421 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
422 'tags': list,
423 'uploader': '爱喝咖啡的当麻',
424 'duration': 669.482,
425 'uploader_id': '1680903',
426 'chapters': 'count:6',
427 'comment_count': int,
428 'view_count': int,
429 'like_count': int,
430 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
431 '_old_archive_ids': ['bilibili 463665680_part1'],
432 },
433 'params': {'skip_download': True},
434 }, {
435 'note': 'video redirects to festival page',
436 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
437 'info_dict': {
438 'id': 'BV1wP4y1P72h',
439 'ext': 'mp4',
440 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
441 'timestamp': 1643947497,
442 'upload_date': '20220204',
443 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
444 'uploader': '叨叨冯聊音乐',
445 'duration': 246.719,
446 'uploader_id': '528182630',
447 'view_count': int,
448 'like_count': int,
449 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
450 '_old_archive_ids': ['bilibili 893839363_part1'],
451 },
452 }, {
453 'note': 'newer festival video',
454 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
455 'info_dict': {
456 'id': 'BV1ay4y1d77f',
457 'ext': 'mp4',
458 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
459 'timestamp': 1674273600,
460 'upload_date': '20230121',
461 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
462 'uploader': '果蝇轰',
463 'duration': 1111.722,
464 'uploader_id': '8469526',
465 'view_count': int,
466 'like_count': int,
467 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
468 '_old_archive_ids': ['bilibili 778246196_part1'],
469 },
470 }, {
471 'note': 'legacy flv/mp4 video',
472 'url': 'https://www.bilibili.com/video/BV1ms411Q7vw/?p=4',
473 'info_dict': {
474 'id': 'BV1ms411Q7vw_p4',
475 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
476 'timestamp': 1458222815,
477 'upload_date': '20160317',
478 'description': '云南方言快乐生产线出品',
479 'duration': float,
480 'uploader': '一笑颠天',
481 'uploader_id': '3916081',
482 'view_count': int,
483 'comment_count': int,
484 'like_count': int,
485 'tags': list,
486 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
487 '_old_archive_ids': ['bilibili 4120229_part4'],
488 },
489 'params': {'extractor_args': {'bilibili': {'prefer_multi_flv': ['32']}}},
490 'playlist_count': 19,
491 'playlist': [{
492 'info_dict': {
493 'id': 'BV1ms411Q7vw_p4_0',
494 'ext': 'flv',
495 'title': '[搞笑]【动画】云南方言快乐生产线出品 p04 新烧包谷之漫游桃花岛',
496 'duration': 399.102,
497 },
498 }],
499 }, {
500 'note': 'legacy mp4-only video',
501 'url': 'https://www.bilibili.com/video/BV1nx411u79K',
502 'info_dict': {
503 'id': 'BV1nx411u79K',
504 'ext': 'mp4',
505 'title': '【练习室】201603声乐练习《No Air》with VigoVan',
506 'timestamp': 1508893551,
507 'upload_date': '20171025',
508 'description': '@ZERO-G伯远\n声乐练习 《No Air》with Vigo Van',
509 'duration': 80.384,
510 'uploader': '伯远',
511 'uploader_id': '10584494',
512 'comment_count': int,
513 'view_count': int,
514 'like_count': int,
515 'tags': list,
516 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
517 '_old_archive_ids': ['bilibili 15700301_part1'],
518 },
519 }, {
520 'note': 'interactive/split-path video',
521 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
522 'info_dict': {
523 'id': 'BV1af4y1H7ga',
524 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
525 'timestamp': 1630500414,
526 'upload_date': '20210901',
527 'description': 'md5:01113e39ab06e28042d74ac356a08786',
528 'tags': list,
529 'uploader': '钉宫妮妮Ninico',
530 'duration': 1503,
531 'uploader_id': '8881297',
532 'comment_count': int,
533 'view_count': int,
534 'like_count': int,
535 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
536 '_old_archive_ids': ['bilibili 292734508_part1'],
537 },
538 'playlist_count': 33,
539 'playlist': [{
540 'info_dict': {
541 'id': 'BV1af4y1H7ga_400950101',
542 'ext': 'mp4',
543 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
544 'timestamp': 1630500414,
545 'upload_date': '20210901',
546 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
547 'tags': list,
548 'uploader': '钉宫妮妮Ninico',
549 'duration': 11.605,
550 'uploader_id': '8881297',
551 'comment_count': int,
552 'view_count': int,
553 'like_count': int,
554 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
555 '_old_archive_ids': ['bilibili 292734508_part1'],
556 },
557 }],
558 }, {
559 'note': '301 redirect to bangumi link',
560 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
561 'info_dict': {
562 'id': '288525',
563 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
564 'ext': 'mp4',
565 'series': '我和我的祖国',
566 'series_id': '4780',
567 'season': '幕后纪实',
568 'season_id': '28609',
569 'season_number': 1,
570 'episode': '钱学森弹道和乘波体飞行器是什么?',
571 'episode_id': '288525',
572 'episode_number': 105,
573 'duration': 1183.957,
574 'timestamp': 1571648124,
575 'upload_date': '20191021',
576 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
577 },
578 }, {
579 'note': 'video has subtitles, which requires login',
580 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
581 'info_dict': {
582 'id': 'BV12N4y1M7rh',
583 'ext': 'mp4',
584 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
585 'tags': list,
586 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
587 'duration': 313.557,
588 'upload_date': '20220709',
589 'uploader': '小夫太渴',
590 'timestamp': 1657347907,
591 'uploader_id': '1326814124',
592 'comment_count': int,
593 'view_count': int,
594 'like_count': int,
595 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
596 'subtitles': 'count:2', # login required for CC subtitle
597 '_old_archive_ids': ['bilibili 898179753_part1'],
598 },
599 'params': {'listsubtitles': True},
600 'skip': 'login required for subtitle',
601 }, {
602 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
603 'info_dict': {
604 'id': 'BV1jL41167ZG',
605 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
606 'ext': 'mp4',
607 },
608 'skip': 'supporter-only video',
609 }, {
610 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
611 'info_dict': {
612 'id': 'BV1Ks411f7aQ',
613 'title': '【BD1080P】狼与香辛料I【华盟】',
614 'ext': 'mp4',
615 },
616 'skip': 'login required',
617 }, {
618 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
619 'info_dict': {
620 'id': 'BV1GJ411x7h7',
621 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
622 'ext': 'mp4',
623 },
624 'skip': 'geo-restricted',
625 }]
626
627 def _real_extract(self, url):
628 video_id = self._match_id(url)
629 headers = self.geo_verification_headers()
630 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
631 if not self._match_valid_url(urlh.url):
632 return self.url_result(urlh.url)
633
634 headers['Referer'] = url
635
636 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
637 is_festival = 'videoData' not in initial_state
638 if is_festival:
639 video_data = initial_state['videoInfo']
640 else:
641 play_info_obj = self._search_json(
642 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
643 if not play_info_obj:
644 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
645 self.raise_login_required()
646 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
647 raise ExtractorError(
648 'This video may be deleted or geo-restricted. '
649 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
650 play_info = traverse_obj(play_info_obj, ('data', {dict}))
651 if not play_info:
652 if traverse_obj(play_info_obj, 'code') == 87007:
653 toast = get_element_by_class('tips-toast', webpage) or ''
654 msg = clean_html(
655 f'{get_element_by_class("belongs-to", toast) or ""},'
656 + (get_element_by_class('level', toast) or ''))
657 raise ExtractorError(
658 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
659 raise ExtractorError('Failed to extract play info')
660 video_data = initial_state['videoData']
661
662 video_id, title = video_data['bvid'], video_data.get('title')
663
664 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
665 page_list_json = not is_festival and traverse_obj(
666 self._download_json(
667 'https://api.bilibili.com/x/player/pagelist', video_id,
668 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
669 note='Extracting videos in anthology', headers=headers),
670 'data', expected_type=list) or []
671 is_anthology = len(page_list_json) > 1
672
673 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
674 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
675 return self.playlist_from_matches(
676 page_list_json, video_id, title, ie=BiliBiliIE,
677 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
678
679 if is_anthology:
680 part_id = part_id or 1
681 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
682
683 aid = video_data.get('aid')
684 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
685 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
686
687 festival_info = {}
688 if is_festival:
689 play_info = self._download_playinfo(video_id, cid, headers=headers)
690
691 festival_info = traverse_obj(initial_state, {
692 'uploader': ('videoInfo', 'upName'),
693 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
694 'like_count': ('videoStatus', 'like', {int_or_none}),
695 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
696 }, get_all=False)
697
698 metainfo = {
699 **traverse_obj(initial_state, {
700 'uploader': ('upData', 'name'),
701 'uploader_id': ('upData', 'mid', {str_or_none}),
702 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
703 'tags': ('tags', ..., 'tag_name'),
704 'thumbnail': ('videoData', 'pic', {url_or_none}),
705 }),
706 **festival_info,
707 **traverse_obj(video_data, {
708 'description': 'desc',
709 'timestamp': ('pubdate', {int_or_none}),
710 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
711 'comment_count': ('stat', 'reply', {int_or_none}),
712 }, get_all=False),
713 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
714 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
715 'title': title,
716 'http_headers': {'Referer': url},
717 }
718
719 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
720 if is_interactive:
721 return self.playlist_result(
722 self._get_interactive_entries(video_id, cid, metainfo, headers=headers), **metainfo,
723 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
724 __post_extractor=self.extract_comments(aid))
725 else:
726 formats = self.extract_formats(play_info)
727
728 if not traverse_obj(play_info, ('dash')):
729 # we only have legacy formats and need additional work
730 has_qn = lambda x: x in traverse_obj(formats, (..., 'quality'))
731 for qn in traverse_obj(play_info, ('accept_quality', lambda _, v: not has_qn(v), {int})):
732 formats.extend(traverse_obj(
733 self.extract_formats(self._download_playinfo(video_id, cid, headers=headers, qn=qn)),
734 lambda _, v: not has_qn(v['quality'])))
735 self._check_missing_formats(play_info, formats)
736 flv_formats = traverse_obj(formats, lambda _, v: v['fragments'])
737 if flv_formats and len(flv_formats) < len(formats):
738 # Flv and mp4 are incompatible due to `multi_video` workaround, so drop one
739 if not self._configuration_arg('prefer_multi_flv'):
740 dropped_fmts = ', '.join(
741 f'{f.get("format_note")} ({f.get("format_id")})' for f in flv_formats)
742 formats = traverse_obj(formats, lambda _, v: not v.get('fragments'))
743 if dropped_fmts:
744 self.to_screen(
745 f'Dropping incompatible flv format(s) {dropped_fmts} since mp4 is available. '
746 'To extract flv, pass --extractor-args "bilibili:prefer_multi_flv"')
747 else:
748 formats = traverse_obj(
749 # XXX: Filtering by extractor-arg is for testing purposes
750 formats, lambda _, v: v['quality'] == int(self._configuration_arg('prefer_multi_flv')[0]),
751 ) or [max(flv_formats, key=lambda x: x['quality'])]
752
753 if traverse_obj(formats, (0, 'fragments')):
754 # We have flv formats, which are individual short videos with their own timestamps and metainfo
755 # Binary concatenation corrupts their timestamps, so we need a `multi_video` workaround
756 return {
757 **metainfo,
758 '_type': 'multi_video',
759 'entries': [{
760 'id': f'{metainfo["id"]}_{idx}',
761 'title': metainfo['title'],
762 'http_headers': metainfo['http_headers'],
763 'formats': [{
764 **fragment,
765 'format_id': formats[0].get('format_id'),
766 }],
767 'subtitles': self.extract_subtitles(video_id, cid) if idx == 0 else None,
768 '__post_extractor': self.extract_comments(aid) if idx == 0 else None,
769 } for idx, fragment in enumerate(formats[0]['fragments'])],
770 'duration': float_or_none(play_info.get('timelength'), scale=1000),
771 }
772 else:
773 return {
774 **metainfo,
775 'formats': formats,
776 'duration': float_or_none(play_info.get('timelength'), scale=1000),
777 'chapters': self._get_chapters(aid, cid),
778 'subtitles': self.extract_subtitles(video_id, cid),
779 '__post_extractor': self.extract_comments(aid),
780 }
781
782
783class BiliBiliBangumiIE(BilibiliBaseIE):
784 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
785
786 _TESTS = [{
787 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
788 'info_dict': {
789 'id': '21495',
790 'ext': 'mp4',
791 'series': '悠久之翼',
792 'series_id': '774',
793 'season': '第二季',
794 'season_id': '1182',
795 'season_number': 2,
796 'episode': 'forever/ef',
797 'episode_id': '21495',
798 'episode_number': 12,
799 'title': '12 forever/ef',
800 'duration': 1420.791,
801 'timestamp': 1320412200,
802 'upload_date': '20111104',
803 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
804 },
805 }, {
806 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
807 'info_dict': {
808 'id': '267851',
809 'ext': 'mp4',
810 'series': '鬼灭之刃',
811 'series_id': '4358',
812 'season': '立志篇',
813 'season_id': '26801',
814 'season_number': 1,
815 'episode': '残酷',
816 'episode_id': '267851',
817 'episode_number': 1,
818 'title': '1 残酷',
819 'duration': 1425.256,
820 'timestamp': 1554566400,
821 'upload_date': '20190406',
822 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
823 },
824 'skip': 'Geo-restricted',
825 }, {
826 'note': 'a making-of which falls outside main section',
827 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
828 'info_dict': {
829 'id': '345120',
830 'ext': 'mp4',
831 'series': '鬼灭之刃',
832 'series_id': '4358',
833 'season': '立志篇',
834 'season_id': '26801',
835 'season_number': 1,
836 'episode': '炭治郎篇',
837 'episode_id': '345120',
838 'episode_number': 27,
839 'title': '#1 炭治郎篇',
840 'duration': 1922.129,
841 'timestamp': 1602853860,
842 'upload_date': '20201016',
843 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
844 },
845 }]
846
847 def _real_extract(self, url):
848 episode_id = self._match_id(url)
849 headers = self.geo_verification_headers()
850 webpage = self._download_webpage(url, episode_id, headers=headers)
851
852 if '您所在的地区无法观看本片' in webpage:
853 raise GeoRestrictedError('This video is restricted')
854 elif '正在观看预览,大会员免费看全片' in webpage:
855 self.raise_login_required('This video is for premium members only')
856
857 headers['Referer'] = url
858 play_info = self._download_json(
859 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
860 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
861 headers=headers)
862 premium_only = play_info.get('code') == -10403
863 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
864
865 formats = self.extract_formats(play_info)
866 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
867 self.raise_login_required('This video is for premium members only')
868
869 bangumi_info = self._download_json(
870 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
871 query={'ep_id': episode_id}, headers=headers)['result']
872
873 episode_number, episode_info = next((
874 (idx, ep) for idx, ep in enumerate(traverse_obj(
875 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
876 if str_or_none(ep.get('id')) == episode_id), (1, {}))
877
878 season_id = bangumi_info.get('season_id')
879 season_number, season_title = season_id and next((
880 (idx + 1, e.get('season_title')) for idx, e in enumerate(
881 traverse_obj(bangumi_info, ('seasons', ...)))
882 if e.get('season_id') == season_id
883 ), (None, None))
884
885 aid = episode_info.get('aid')
886
887 return {
888 'id': episode_id,
889 'formats': formats,
890 **traverse_obj(bangumi_info, {
891 'series': ('series', 'series_title', {str}),
892 'series_id': ('series', 'series_id', {str_or_none}),
893 'thumbnail': ('square_cover', {url_or_none}),
894 }),
895 **traverse_obj(episode_info, {
896 'episode': ('long_title', {str}),
897 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
898 'timestamp': ('pub_time', {int_or_none}),
899 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
900 }),
901 'episode_id': episode_id,
902 'season': str_or_none(season_title),
903 'season_id': str_or_none(season_id),
904 'season_number': season_number,
905 'duration': float_or_none(play_info.get('timelength'), scale=1000),
906 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
907 '__post_extractor': self.extract_comments(aid),
908 'http_headers': {'Referer': url},
909 }
910
911
912class BiliBiliBangumiMediaIE(BilibiliBaseIE):
913 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
914 _TESTS = [{
915 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
916 'info_dict': {
917 'id': '24097891',
918 'title': 'CAROLE & TUESDAY',
919 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
920 },
921 'playlist_mincount': 25,
922 }, {
923 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
924 'info_dict': {
925 'id': '1565',
926 'title': '攻壳机动队 S.A.C. 2nd GIG',
927 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
928 },
929 'playlist_count': 26,
930 'playlist': [{
931 'info_dict': {
932 'id': '68540',
933 'ext': 'mp4',
934 'series': '攻壳机动队',
935 'series_id': '1077',
936 'season': '第二季',
937 'season_id': '1565',
938 'season_number': 2,
939 'episode': '再启动 REEMBODY',
940 'episode_id': '68540',
941 'episode_number': 1,
942 'title': '1 再启动 REEMBODY',
943 'duration': 1525.777,
944 'timestamp': 1425074413,
945 'upload_date': '20150227',
946 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
947 },
948 }],
949 }]
950
951 def _real_extract(self, url):
952 media_id = self._match_id(url)
953 webpage = self._download_webpage(url, media_id)
954
955 initial_state = self._search_json(
956 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
957 ss_id = initial_state['mediaInfo']['season_id']
958
959 return self.playlist_result(
960 self._get_episodes_from_season(ss_id, url), media_id,
961 **traverse_obj(initial_state, ('mediaInfo', {
962 'title': ('title', {str}),
963 'description': ('evaluate', {str}),
964 })))
965
966
967class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
968 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
969 _TESTS = [{
970 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
971 'info_dict': {
972 'id': '26801',
973 'title': '鬼灭之刃',
974 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
975 },
976 'playlist_mincount': 26,
977 }, {
978 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
979 'info_dict': {
980 'id': '2251',
981 'title': '玲音',
982 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
983 },
984 'playlist_count': 13,
985 'playlist': [{
986 'info_dict': {
987 'id': '50188',
988 'ext': 'mp4',
989 'series': '玲音',
990 'series_id': '1526',
991 'season': 'TV',
992 'season_id': '2251',
993 'season_number': 1,
994 'episode': 'WEIRD',
995 'episode_id': '50188',
996 'episode_number': 1,
997 'title': '1 WEIRD',
998 'duration': 1436.992,
999 'timestamp': 1343185080,
1000 'upload_date': '20120725',
1001 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1002 },
1003 }],
1004 }]
1005
1006 def _real_extract(self, url):
1007 ss_id = self._match_id(url)
1008 webpage = self._download_webpage(url, ss_id)
1009 metainfo = traverse_obj(
1010 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
1011 ('itemListElement', ..., {
1012 'title': ('name', {str}),
1013 'description': ('description', {str}),
1014 }), get_all=False)
1015
1016 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
1017
1018
1019class BilibiliCheeseBaseIE(BilibiliBaseIE):
1020 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1021
1022 def _extract_episode(self, season_info, ep_id):
1023 episode_info = traverse_obj(season_info, (
1024 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
1025 aid, cid = episode_info['aid'], episode_info['cid']
1026
1027 if traverse_obj(episode_info, 'ep_status') == -1:
1028 raise ExtractorError('This course episode is not yet available.', expected=True)
1029 if not traverse_obj(episode_info, 'playable'):
1030 self.raise_login_required('You need to purchase the course to download this episode')
1031
1032 play_info = self._download_json(
1033 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
1034 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
1035 headers=self._HEADERS, note='Downloading playinfo')['data']
1036
1037 return {
1038 'id': str_or_none(ep_id),
1039 'episode_id': str_or_none(ep_id),
1040 'formats': self.extract_formats(play_info),
1041 'extractor_key': BilibiliCheeseIE.ie_key(),
1042 'extractor': BilibiliCheeseIE.IE_NAME,
1043 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
1044 **traverse_obj(episode_info, {
1045 'episode': ('title', {str}),
1046 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
1047 'alt_title': ('subtitle', {str}),
1048 'duration': ('duration', {int_or_none}),
1049 'episode_number': ('index', {int_or_none}),
1050 'thumbnail': ('cover', {url_or_none}),
1051 'timestamp': ('release_date', {int_or_none}),
1052 'view_count': ('play', {int_or_none}),
1053 }),
1054 **traverse_obj(season_info, {
1055 'uploader': ('up_info', 'uname', {str}),
1056 'uploader_id': ('up_info', 'mid', {str_or_none}),
1057 }),
1058 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
1059 '__post_extractor': self.extract_comments(aid),
1060 'http_headers': self._HEADERS,
1061 }
1062
1063 def _download_season_info(self, query_key, video_id):
1064 return self._download_json(
1065 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
1066 headers=self._HEADERS, note='Downloading season info')['data']
1067
1068
1069class BilibiliCheeseIE(BilibiliCheeseBaseIE):
1070 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
1071 _TESTS = [{
1072 'url': 'https://www.bilibili.com/cheese/play/ep229832',
1073 'info_dict': {
1074 'id': '229832',
1075 'ext': 'mp4',
1076 'title': '1 - 课程先导片',
1077 'alt_title': '视频课 · 3分41秒',
1078 'uploader': '马督工',
1079 'uploader_id': '316568752',
1080 'episode': '课程先导片',
1081 'episode_id': '229832',
1082 'episode_number': 1,
1083 'duration': 221,
1084 'timestamp': 1695549606,
1085 'upload_date': '20230924',
1086 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1087 'view_count': int,
1088 },
1089 }]
1090
1091 def _real_extract(self, url):
1092 ep_id = self._match_id(url)
1093 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
1094
1095
1096class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
1097 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
1098 _TESTS = [{
1099 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1100 'info_dict': {
1101 'id': '5918',
1102 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1103 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1104 },
1105 'playlist': [{
1106 'info_dict': {
1107 'id': '229832',
1108 'ext': 'mp4',
1109 'title': '1 - 课程先导片',
1110 'alt_title': '视频课 · 3分41秒',
1111 'uploader': '马督工',
1112 'uploader_id': '316568752',
1113 'episode': '课程先导片',
1114 'episode_id': '229832',
1115 'episode_number': 1,
1116 'duration': 221,
1117 'timestamp': 1695549606,
1118 'upload_date': '20230924',
1119 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1120 'view_count': int,
1121 },
1122 }],
1123 'params': {'playlist_items': '1'},
1124 }, {
1125 'url': 'https://www.bilibili.com/cheese/play/ss5918',
1126 'info_dict': {
1127 'id': '5918',
1128 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
1129 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
1130 },
1131 'playlist_mincount': 5,
1132 'skip': 'paid video in list',
1133 }]
1134
1135 def _get_cheese_entries(self, season_info):
1136 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
1137 yield self._extract_episode(season_info, ep_id)
1138
1139 def _real_extract(self, url):
1140 season_id = self._match_id(url)
1141 season_info = self._download_season_info('season_id', season_id)
1142
1143 return self.playlist_result(
1144 self._get_cheese_entries(season_info), season_id,
1145 **traverse_obj(season_info, {
1146 'title': ('title', {str}),
1147 'description': ('subtitle', {str}),
1148 }))
1149
1150
1151class BilibiliSpaceBaseIE(BilibiliBaseIE):
1152 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1153 first_page = fetch_page(0)
1154 metadata = get_metadata(first_page)
1155
1156 paged_list = InAdvancePagedList(
1157 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
1158 metadata['page_count'], metadata['page_size'])
1159
1160 return metadata, paged_list
1161
1162
1163class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
1164 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
1165 _TESTS = [{
1166 'url': 'https://space.bilibili.com/3985676/video',
1167 'info_dict': {
1168 'id': '3985676',
1169 },
1170 'playlist_mincount': 178,
1171 'skip': 'login required',
1172 }, {
1173 'url': 'https://space.bilibili.com/313580179/video',
1174 'info_dict': {
1175 'id': '313580179',
1176 },
1177 'playlist_mincount': 92,
1178 'skip': 'login required',
1179 }]
1180
1181 def _real_extract(self, url):
1182 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1183 if not is_video_url:
1184 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1185 'To download audios, add a "/audio" to the URL')
1186
1187 def fetch_page(page_idx):
1188 query = {
1189 'keyword': '',
1190 'mid': playlist_id,
1191 'order': traverse_obj(parse_qs(url), ('order', 0)) or 'pubdate',
1192 'order_avoided': 'true',
1193 'platform': 'web',
1194 'pn': page_idx + 1,
1195 'ps': 30,
1196 'tid': 0,
1197 'web_location': 1550101,
1198 }
1199
1200 try:
1201 response = self._download_json(
1202 'https://api.bilibili.com/x/space/wbi/arc/search', playlist_id,
1203 query=self._sign_wbi(query, playlist_id),
1204 note=f'Downloading space page {page_idx}', headers={'Referer': url})
1205 except ExtractorError as e:
1206 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1207 raise ExtractorError(
1208 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1209 raise
1210 status_code = response['code']
1211 if status_code == -401:
1212 raise ExtractorError(
1213 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1214 elif status_code == -352 and not self.is_logged_in:
1215 self.raise_login_required('Request is rejected, you need to login to access playlist')
1216 elif status_code != 0:
1217 raise ExtractorError(f'Request failed ({status_code}): {response.get("message") or "Unknown error"}')
1218 return response['data']
1219
1220 def get_metadata(page_data):
1221 page_size = page_data['page']['ps']
1222 entry_count = page_data['page']['count']
1223 return {
1224 'page_count': math.ceil(entry_count / page_size),
1225 'page_size': page_size,
1226 }
1227
1228 def get_entries(page_data):
1229 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1230 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1231
1232 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1233 return self.playlist_result(paged_list, playlist_id)
1234
1235
1236class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1237 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1238 _TESTS = [{
1239 'url': 'https://space.bilibili.com/313580179/audio',
1240 'info_dict': {
1241 'id': '313580179',
1242 },
1243 'playlist_mincount': 1,
1244 }]
1245
1246 def _real_extract(self, url):
1247 playlist_id = self._match_id(url)
1248
1249 def fetch_page(page_idx):
1250 return self._download_json(
1251 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1252 note=f'Downloading page {page_idx}',
1253 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1254
1255 def get_metadata(page_data):
1256 return {
1257 'page_count': page_data['pageCount'],
1258 'page_size': page_data['pageSize'],
1259 }
1260
1261 def get_entries(page_data):
1262 for entry in page_data.get('data', []):
1263 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1264
1265 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1266 return self.playlist_result(paged_list, playlist_id)
1267
1268
1269class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1270 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1271 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1272 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1273
1274 def _get_uploader(self, uid, playlist_id):
1275 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1276 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1277
1278 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1279 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1280 metadata.pop('page_count', None)
1281 metadata.pop('page_size', None)
1282 return metadata, page_list
1283
1284
1285class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1286 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1287 _TESTS = [{
1288 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1289 'info_dict': {
1290 'id': '2142762_57445',
1291 'title': '【完结】《底特律 变人》全结局流程解说',
1292 'description': '',
1293 'uploader': '老戴在此',
1294 'uploader_id': '2142762',
1295 'timestamp': int,
1296 'upload_date': str,
1297 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1298 },
1299 'playlist_mincount': 31,
1300 }]
1301
1302 def _real_extract(self, url):
1303 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1304 playlist_id = f'{mid}_{sid}'
1305
1306 def fetch_page(page_idx):
1307 return self._download_json(
1308 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1309 playlist_id, note=f'Downloading page {page_idx}',
1310 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1311
1312 def get_metadata(page_data):
1313 page_size = page_data['page']['page_size']
1314 entry_count = page_data['page']['total']
1315 return {
1316 'page_count': math.ceil(entry_count / page_size),
1317 'page_size': page_size,
1318 'uploader': self._get_uploader(mid, playlist_id),
1319 **traverse_obj(page_data, {
1320 'title': ('meta', 'name', {str}),
1321 'description': ('meta', 'description', {str}),
1322 'uploader_id': ('meta', 'mid', {str_or_none}),
1323 'timestamp': ('meta', 'ptime', {int_or_none}),
1324 'thumbnail': ('meta', 'cover', {url_or_none}),
1325 }),
1326 }
1327
1328 def get_entries(page_data):
1329 return self._get_entries(page_data, 'archives')
1330
1331 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1332 return self.playlist_result(paged_list, playlist_id, **metadata)
1333
1334
1335class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1336 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1337 _TESTS = [{
1338 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1339 'info_dict': {
1340 'id': '1958703906_547718',
1341 'title': '直播回放',
1342 'description': '直播回放',
1343 'uploader': '靡烟miya',
1344 'uploader_id': '1958703906',
1345 'timestamp': 1637985853,
1346 'upload_date': '20211127',
1347 'modified_timestamp': int,
1348 'modified_date': str,
1349 },
1350 'playlist_mincount': 513,
1351 }]
1352
1353 def _real_extract(self, url):
1354 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1355 playlist_id = f'{mid}_{sid}'
1356 playlist_meta = traverse_obj(self._download_json(
1357 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1358 ), {
1359 'title': ('data', 'meta', 'name', {str}),
1360 'description': ('data', 'meta', 'description', {str}),
1361 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1362 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1363 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1364 })
1365
1366 def fetch_page(page_idx):
1367 return self._download_json(
1368 'https://api.bilibili.com/x/series/archives',
1369 playlist_id, note=f'Downloading page {page_idx}',
1370 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1371
1372 def get_metadata(page_data):
1373 page_size = page_data['page']['size']
1374 entry_count = page_data['page']['total']
1375 return {
1376 'page_count': math.ceil(entry_count / page_size),
1377 'page_size': page_size,
1378 'uploader': self._get_uploader(mid, playlist_id),
1379 **playlist_meta,
1380 }
1381
1382 def get_entries(page_data):
1383 return self._get_entries(page_data, 'archives')
1384
1385 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1386 return self.playlist_result(paged_list, playlist_id, **metadata)
1387
1388
1389class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1390 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1391 _TESTS = [{
1392 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1393 'info_dict': {
1394 'id': '1103407912',
1395 'title': '【V2】(旧)',
1396 'description': '',
1397 'uploader': '晓月春日',
1398 'uploader_id': '84912',
1399 'timestamp': 1604905176,
1400 'upload_date': '20201109',
1401 'modified_timestamp': int,
1402 'modified_date': str,
1403 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1404 'view_count': int,
1405 'like_count': int,
1406 },
1407 'playlist_mincount': 22,
1408 }, {
1409 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1410 'only_matching': True,
1411 }]
1412
1413 def _real_extract(self, url):
1414 fid = self._match_id(url)
1415
1416 list_info = self._download_json(
1417 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1418 fid, note='Downloading favlist metadata')
1419 if list_info['code'] == -403:
1420 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1421
1422 entries = self._get_entries(self._download_json(
1423 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1424 fid, note='Download favlist entries'), 'data')
1425
1426 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1427 'title': ('title', {str}),
1428 'description': ('intro', {str}),
1429 'uploader': ('upper', 'name', {str}),
1430 'uploader_id': ('upper', 'mid', {str_or_none}),
1431 'timestamp': ('ctime', {int_or_none}),
1432 'modified_timestamp': ('mtime', {int_or_none}),
1433 'thumbnail': ('cover', {url_or_none}),
1434 'view_count': ('cnt_info', 'play', {int_or_none}),
1435 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1436 })))
1437
1438
1439class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1440 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1441 _TESTS = [{
1442 'url': 'https://www.bilibili.com/watchlater/#/list',
1443 'info_dict': {
1444 'id': r're:\d+',
1445 'title': '稍后再看',
1446 },
1447 'playlist_mincount': 0,
1448 'skip': 'login required',
1449 }]
1450
1451 def _real_extract(self, url):
1452 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1453 watchlater_info = self._download_json(
1454 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1455 if watchlater_info['code'] == -101:
1456 self.raise_login_required(msg='You need to login to access your watchlater list')
1457 entries = self._get_entries(watchlater_info, ('data', 'list'))
1458 return self.playlist_result(entries, id=list_id, title='稍后再看')
1459
1460
1461class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1462 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1463 _TESTS = [{
1464 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1465 'info_dict': {
1466 'id': '5_547718',
1467 'title': '直播回放',
1468 'uploader': '靡烟miya',
1469 'uploader_id': '1958703906',
1470 'timestamp': 1637985853,
1471 'upload_date': '20211127',
1472 },
1473 'playlist_mincount': 513,
1474 }, {
1475 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1476 'info_dict': {
1477 'id': 'BV1DU4y1r7tz',
1478 'ext': 'mp4',
1479 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1480 'upload_date': '20220820',
1481 'description': '',
1482 'timestamp': 1661016330,
1483 'uploader_id': '1958703906',
1484 'uploader': '靡烟miya',
1485 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1486 'duration': 9552.903,
1487 'tags': list,
1488 'comment_count': int,
1489 'view_count': int,
1490 'like_count': int,
1491 '_old_archive_ids': ['bilibili 687146339_part1'],
1492 },
1493 'params': {'noplaylist': True},
1494 }, {
1495 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1496 'info_dict': {
1497 'id': '5_547718',
1498 },
1499 'playlist_mincount': 513,
1500 'skip': 'redirect url',
1501 }, {
1502 'url': 'https://www.bilibili.com/list/ml1103407912',
1503 'info_dict': {
1504 'id': '3_1103407912',
1505 'title': '【V2】(旧)',
1506 'uploader': '晓月春日',
1507 'uploader_id': '84912',
1508 'timestamp': 1604905176,
1509 'upload_date': '20201109',
1510 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1511 },
1512 'playlist_mincount': 22,
1513 }, {
1514 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1515 'info_dict': {
1516 'id': '3_1103407912',
1517 },
1518 'playlist_mincount': 22,
1519 'skip': 'redirect url',
1520 }, {
1521 'url': 'https://www.bilibili.com/list/watchlater',
1522 'info_dict': {
1523 'id': r're:2_\d+',
1524 'title': '稍后再看',
1525 'uploader': str,
1526 'uploader_id': str,
1527 },
1528 'playlist_mincount': 0,
1529 'skip': 'login required',
1530 }, {
1531 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1532 'info_dict': {'id': 'watchlater'},
1533 'playlist_mincount': 0,
1534 'skip': 'redirect url & login required',
1535 }]
1536
1537 def _extract_medialist(self, query, list_id):
1538 for page_num in itertools.count(1):
1539 page_data = self._download_json(
1540 'https://api.bilibili.com/x/v2/medialist/resource/list',
1541 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1542 )['data']
1543 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1544 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1545 if not page_data.get('has_more', False):
1546 break
1547
1548 def _real_extract(self, url):
1549 list_id = self._match_id(url)
1550
1551 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1552 if not self._yes_playlist(list_id, bvid):
1553 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1554
1555 webpage = self._download_webpage(url, list_id)
1556 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1557 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1558 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1559 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1560 if error_code == -400 and list_id == 'watchlater':
1561 self.raise_login_required('You need to login to access your watchlater playlist')
1562 elif error_code == -403:
1563 self.raise_login_required('This is a private playlist. You need to login as its owner')
1564 elif error_code == 11010:
1565 raise ExtractorError('Playlist is no longer available', expected=True)
1566 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1567
1568 query = {
1569 'ps': 20,
1570 'with_current': False,
1571 **traverse_obj(initial_state, {
1572 'type': ('playlist', 'type', {int_or_none}),
1573 'biz_id': ('playlist', 'id', {int_or_none}),
1574 'tid': ('tid', {int_or_none}),
1575 'sort_field': ('sortFiled', {int_or_none}),
1576 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1577 }),
1578 }
1579 metadata = {
1580 'id': f'{query["type"]}_{query["biz_id"]}',
1581 **traverse_obj(initial_state, ('mediaListInfo', {
1582 'title': ('title', {str}),
1583 'uploader': ('upper', 'name', {str}),
1584 'uploader_id': ('upper', 'mid', {str_or_none}),
1585 'timestamp': ('ctime', {int_or_none}, {lambda x: x or None}),
1586 'thumbnail': ('cover', {url_or_none}),
1587 })),
1588 }
1589 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1590
1591
1592class BilibiliCategoryIE(InfoExtractor):
1593 IE_NAME = 'Bilibili category extractor'
1594 _MAX_RESULTS = 1000000
1595 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1596 _TESTS = [{
1597 'url': 'https://www.bilibili.com/v/kichiku/mad',
1598 'info_dict': {
1599 'id': 'kichiku: mad',
1600 'title': 'kichiku: mad',
1601 },
1602 'playlist_mincount': 45,
1603 'params': {
1604 'playlistend': 45,
1605 },
1606 }]
1607
1608 def _fetch_page(self, api_url, num_pages, query, page_num):
1609 parsed_json = self._download_json(
1610 api_url, query, query={'Search_key': query, 'pn': page_num},
1611 note=f'Extracting results from page {page_num} of {num_pages}')
1612
1613 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1614 if not video_list:
1615 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1616
1617 for video in video_list:
1618 yield self.url_result(
1619 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1620
1621 def _entries(self, category, subcategory, query):
1622 # map of categories : subcategories : RIDs
1623 rid_map = {
1624 'kichiku': {
1625 'mad': 26,
1626 'manual_vocaloid': 126,
1627 'guide': 22,
1628 'theatre': 216,
1629 'course': 127,
1630 },
1631 }
1632
1633 if category not in rid_map:
1634 raise ExtractorError(
1635 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1636 if subcategory not in rid_map[category]:
1637 raise ExtractorError(
1638 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1639 rid_value = rid_map[category][subcategory]
1640
1641 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1642 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1643 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1644 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1645 if count is None or not size:
1646 raise ExtractorError('Failed to calculate either page count or size')
1647
1648 num_pages = math.ceil(count / size)
1649
1650 return OnDemandPagedList(functools.partial(
1651 self._fetch_page, api_url, num_pages, query), size)
1652
1653 def _real_extract(self, url):
1654 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1655 query = f'{category}: {subcategory}'
1656
1657 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1658
1659
1660class BiliBiliSearchIE(SearchInfoExtractor):
1661 IE_DESC = 'Bilibili video search'
1662 _MAX_RESULTS = 100000
1663 _SEARCH_KEY = 'bilisearch'
1664 _TESTS = [{
1665 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1666 'playlist_count': 3,
1667 'info_dict': {
1668 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1669 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1670 },
1671 'playlist': [{
1672 'info_dict': {
1673 'id': 'BV1n44y1Q7sc',
1674 'ext': 'mp4',
1675 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1676 'timestamp': 1669889987,
1677 'upload_date': '20221201',
1678 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1679 'tags': list,
1680 'uploader': '靡烟miya',
1681 'duration': 123.156,
1682 'uploader_id': '1958703906',
1683 'comment_count': int,
1684 'view_count': int,
1685 'like_count': int,
1686 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1687 '_old_archive_ids': ['bilibili 988222410_part1'],
1688 },
1689 }],
1690 }]
1691
1692 def _search_results(self, query):
1693 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1694 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1695 for page_num in itertools.count(1):
1696 videos = self._download_json(
1697 'https://api.bilibili.com/x/web-interface/search/type', query,
1698 note=f'Extracting results from page {page_num}', query={
1699 'Search_key': query,
1700 'keyword': query,
1701 'page': page_num,
1702 'context': '',
1703 'duration': 0,
1704 'tids_2': '',
1705 '__refresh__': 'true',
1706 'search_type': 'video',
1707 'tids': 0,
1708 'highlight': 1,
1709 })['data'].get('result')
1710 if not videos:
1711 break
1712 for video in videos:
1713 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1714
1715
1716class BilibiliAudioBaseIE(InfoExtractor):
1717 def _call_api(self, path, sid, query=None):
1718 if not query:
1719 query = {'sid': sid}
1720 return self._download_json(
1721 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1722 sid, query=query)['data']
1723
1724
1725class BilibiliAudioIE(BilibiliAudioBaseIE):
1726 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1727 _TEST = {
1728 'url': 'https://www.bilibili.com/audio/au1003142',
1729 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1730 'info_dict': {
1731 'id': '1003142',
1732 'ext': 'm4a',
1733 'title': '【tsukimi】YELLOW / 神山羊',
1734 'artist': 'tsukimi',
1735 'comment_count': int,
1736 'description': 'YELLOW的mp3版!',
1737 'duration': 183,
1738 'subtitles': {
1739 'origin': [{
1740 'ext': 'lrc',
1741 }],
1742 },
1743 'thumbnail': r're:^https?://.+\.jpg',
1744 'timestamp': 1564836614,
1745 'upload_date': '20190803',
1746 'uploader': 'tsukimi-つきみぐー',
1747 'view_count': int,
1748 },
1749 }
1750
1751 def _real_extract(self, url):
1752 au_id = self._match_id(url)
1753
1754 play_data = self._call_api('url', au_id)
1755 formats = [{
1756 'url': play_data['cdns'][0],
1757 'filesize': int_or_none(play_data.get('size')),
1758 'vcodec': 'none',
1759 }]
1760
1761 for a_format in formats:
1762 a_format.setdefault('http_headers', {}).update({
1763 'Referer': url,
1764 })
1765
1766 song = self._call_api('song/info', au_id)
1767 title = song['title']
1768 statistic = song.get('statistic') or {}
1769
1770 subtitles = None
1771 lyric = song.get('lyric')
1772 if lyric:
1773 subtitles = {
1774 'origin': [{
1775 'url': lyric,
1776 }],
1777 }
1778
1779 return {
1780 'id': au_id,
1781 'title': title,
1782 'formats': formats,
1783 'artist': song.get('author'),
1784 'comment_count': int_or_none(statistic.get('comment')),
1785 'description': song.get('intro'),
1786 'duration': int_or_none(song.get('duration')),
1787 'subtitles': subtitles,
1788 'thumbnail': song.get('cover'),
1789 'timestamp': int_or_none(song.get('passtime')),
1790 'uploader': song.get('uname'),
1791 'view_count': int_or_none(statistic.get('play')),
1792 }
1793
1794
1795class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1796 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1797 _TEST = {
1798 'url': 'https://www.bilibili.com/audio/am10624',
1799 'info_dict': {
1800 'id': '10624',
1801 'title': '每日新曲推荐(每日11:00更新)',
1802 'description': '每天11:00更新,为你推送最新音乐',
1803 },
1804 'playlist_count': 19,
1805 }
1806
1807 def _real_extract(self, url):
1808 am_id = self._match_id(url)
1809
1810 songs = self._call_api(
1811 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1812
1813 entries = []
1814 for song in songs:
1815 sid = str_or_none(song.get('id'))
1816 if not sid:
1817 continue
1818 entries.append(self.url_result(
1819 'https://www.bilibili.com/audio/au' + sid,
1820 BilibiliAudioIE.ie_key(), sid))
1821
1822 if entries:
1823 album_data = self._call_api('menu/info', am_id) or {}
1824 album_title = album_data.get('title')
1825 if album_title:
1826 for entry in entries:
1827 entry['album'] = album_title
1828 return self.playlist_result(
1829 entries, am_id, album_title, album_data.get('intro'))
1830
1831 return self.playlist_result(entries, am_id)
1832
1833
1834class BiliBiliPlayerIE(InfoExtractor):
1835 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1836 _TEST = {
1837 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1838 'only_matching': True,
1839 }
1840
1841 def _real_extract(self, url):
1842 video_id = self._match_id(url)
1843 return self.url_result(
1844 f'http://www.bilibili.tv/video/av{video_id}/',
1845 ie=BiliBiliIE.ie_key(), video_id=video_id)
1846
1847
1848class BiliIntlBaseIE(InfoExtractor):
1849 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1850 _NETRC_MACHINE = 'biliintl'
1851 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1852
1853 def _call_api(self, endpoint, *args, **kwargs):
1854 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1855 if json.get('code'):
1856 if json['code'] in (10004004, 10004005, 10023006):
1857 self.raise_login_required()
1858 elif json['code'] == 10004001:
1859 self.raise_geo_restricted()
1860 else:
1861 if json.get('message') and str(json['code']) != json['message']:
1862 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1863 else:
1864 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1865 if kwargs.get('fatal'):
1866 raise ExtractorError(errmsg)
1867 else:
1868 self.report_warning(errmsg)
1869 return json.get('data')
1870
1871 def json2srt(self, json):
1872 return '\n\n'.join(
1873 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1874 for i, line in enumerate(traverse_obj(json, (
1875 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1876
1877 def _get_subtitles(self, *, ep_id=None, aid=None):
1878 sub_json = self._call_api(
1879 '/web/v2/subtitle', ep_id or aid, fatal=False,
1880 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1881 query=filter_dict({
1882 'platform': 'web',
1883 's_locale': 'en_US',
1884 'episode_id': ep_id,
1885 'aid': aid,
1886 })) or {}
1887 subtitles = {}
1888 fetched_urls = set()
1889 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1890 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1891 if url in fetched_urls:
1892 continue
1893 fetched_urls.add(url)
1894 sub_ext = determine_ext(url)
1895 sub_lang = sub.get('lang_key') or 'en'
1896
1897 if sub_ext == 'ass':
1898 subtitles.setdefault(sub_lang, []).append({
1899 'ext': 'ass',
1900 'url': url,
1901 })
1902 elif sub_ext == 'json':
1903 sub_data = self._download_json(
1904 url, ep_id or aid, fatal=False,
1905 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1906 errnote='Unable to download subtitles')
1907
1908 if sub_data:
1909 subtitles.setdefault(sub_lang, []).append({
1910 'ext': 'srt',
1911 'data': self.json2srt(sub_data),
1912 })
1913 else:
1914 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1915
1916 return subtitles
1917
1918 def _get_formats(self, *, ep_id=None, aid=None):
1919 video_json = self._call_api(
1920 '/web/playurl', ep_id or aid, note='Downloading video formats',
1921 errnote='Unable to download video formats', query=filter_dict({
1922 'platform': 'web',
1923 'ep_id': ep_id,
1924 'aid': aid,
1925 }))
1926 video_json = video_json['playurl']
1927 formats = []
1928 for vid in video_json.get('video') or []:
1929 video_res = vid.get('video_resource') or {}
1930 video_info = vid.get('stream_info') or {}
1931 if not video_res.get('url'):
1932 continue
1933 formats.append({
1934 'url': video_res['url'],
1935 'ext': 'mp4',
1936 'format_note': video_info.get('desc_words'),
1937 'width': video_res.get('width'),
1938 'height': video_res.get('height'),
1939 'vbr': video_res.get('bandwidth'),
1940 'acodec': 'none',
1941 'vcodec': video_res.get('codecs'),
1942 'filesize': video_res.get('size'),
1943 })
1944 for aud in video_json.get('audio_resource') or []:
1945 if not aud.get('url'):
1946 continue
1947 formats.append({
1948 'url': aud['url'],
1949 'ext': 'mp4',
1950 'abr': aud.get('bandwidth'),
1951 'acodec': aud.get('codecs'),
1952 'vcodec': 'none',
1953 'filesize': aud.get('size'),
1954 })
1955
1956 return formats
1957
1958 def _parse_video_metadata(self, video_data):
1959 return {
1960 'title': video_data.get('title_display') or video_data.get('title'),
1961 'description': video_data.get('desc'),
1962 'thumbnail': video_data.get('cover'),
1963 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1964 'episode_number': int_or_none(self._search_regex(
1965 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1966 }
1967
1968 def _perform_login(self, username, password):
1969 if not Cryptodome.RSA:
1970 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1971
1972 key_data = self._download_json(
1973 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1974 note='Downloading login key', errnote='Unable to download login key')['data']
1975
1976 public_key = Cryptodome.RSA.importKey(key_data['key'])
1977 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1978 login_post = self._download_json(
1979 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None,
1980 data=urlencode_postdata({
1981 'username': username,
1982 'password': base64.b64encode(password_hash).decode('ascii'),
1983 'keep_me': 'true',
1984 's_locale': 'en_US',
1985 'isTrusted': 'true',
1986 }), note='Logging in', errnote='Unable to log in')
1987 if login_post.get('code'):
1988 if login_post.get('message'):
1989 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1990 else:
1991 raise ExtractorError('Unable to log in')
1992
1993
1994class BiliIntlIE(BiliIntlBaseIE):
1995 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1996 _TESTS = [{
1997 # Bstation page
1998 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1999 'info_dict': {
2000 'id': '341736',
2001 'ext': 'mp4',
2002 'title': 'E2 - The First Night',
2003 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2004 'episode_number': 2,
2005 'upload_date': '20201009',
2006 'episode': 'Episode 2',
2007 'timestamp': 1602259500,
2008 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2009 'chapters': [{
2010 'start_time': 0,
2011 'end_time': 76.242,
2012 'title': '<Untitled Chapter 1>',
2013 }, {
2014 'start_time': 76.242,
2015 'end_time': 161.161,
2016 'title': 'Intro',
2017 }, {
2018 'start_time': 1325.742,
2019 'end_time': 1403.903,
2020 'title': 'Outro',
2021 }],
2022 },
2023 }, {
2024 # Non-Bstation page
2025 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
2026 'info_dict': {
2027 'id': '11005006',
2028 'ext': 'mp4',
2029 'title': 'E3 - Who?',
2030 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2031 'episode_number': 3,
2032 'description': 'md5:e1a775e71a35c43f141484715470ad09',
2033 'episode': 'Episode 3',
2034 'upload_date': '20211219',
2035 'timestamp': 1639928700,
2036 'chapters': [{
2037 'start_time': 0,
2038 'end_time': 88.0,
2039 'title': '<Untitled Chapter 1>',
2040 }, {
2041 'start_time': 88.0,
2042 'end_time': 156.0,
2043 'title': 'Intro',
2044 }, {
2045 'start_time': 1173.0,
2046 'end_time': 1259.535,
2047 'title': 'Outro',
2048 }],
2049 },
2050 }, {
2051 # Subtitle with empty content
2052 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
2053 'info_dict': {
2054 'id': '10131790',
2055 'ext': 'mp4',
2056 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
2057 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2058 'episode_number': 140,
2059 },
2060 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
2061 }, {
2062 # episode comment extraction
2063 'url': 'https://www.bilibili.tv/en/play/34580/340317',
2064 'info_dict': {
2065 'id': '340317',
2066 'ext': 'mp4',
2067 'timestamp': 1604057820,
2068 'upload_date': '20201030',
2069 'episode_number': 5,
2070 'title': 'E5 - My Own Steel',
2071 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
2072 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
2073 'episode': 'Episode 5',
2074 'comment_count': int,
2075 'chapters': [{
2076 'start_time': 0,
2077 'end_time': 61.0,
2078 'title': '<Untitled Chapter 1>',
2079 }, {
2080 'start_time': 61.0,
2081 'end_time': 134.0,
2082 'title': 'Intro',
2083 }, {
2084 'start_time': 1290.0,
2085 'end_time': 1379.0,
2086 'title': 'Outro',
2087 }],
2088 },
2089 'params': {
2090 'getcomments': True,
2091 },
2092 }, {
2093 # user generated content comment extraction
2094 'url': 'https://www.bilibili.tv/en/video/2045730385',
2095 'info_dict': {
2096 'id': '2045730385',
2097 'ext': 'mp4',
2098 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
2099 'timestamp': 1667891924,
2100 'upload_date': '20221108',
2101 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
2102 'comment_count': int,
2103 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
2104 },
2105 'params': {
2106 'getcomments': True,
2107 },
2108 }, {
2109 # episode id without intro and outro
2110 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
2111 'info_dict': {
2112 'id': '11246489',
2113 'ext': 'mp4',
2114 'title': 'E1 - Operation \'Strix\' <Owl>',
2115 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2116 'timestamp': 1649516400,
2117 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
2118 'episode': 'Episode 1',
2119 'episode_number': 1,
2120 'upload_date': '20220409',
2121 },
2122 }, {
2123 'url': 'https://www.biliintl.com/en/play/34613/341736',
2124 'only_matching': True,
2125 }, {
2126 # User-generated content (as opposed to a series licensed from a studio)
2127 'url': 'https://bilibili.tv/en/video/2019955076',
2128 'only_matching': True,
2129 }, {
2130 # No language in URL
2131 'url': 'https://www.bilibili.tv/video/2019955076',
2132 'only_matching': True,
2133 }, {
2134 # Uppercase language in URL
2135 'url': 'https://www.bilibili.tv/EN/video/2019955076',
2136 'only_matching': True,
2137 }]
2138
2139 @staticmethod
2140 def _make_url(video_id, series_id=None):
2141 if series_id:
2142 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
2143 return f'https://www.bilibili.tv/en/video/{video_id}'
2144
2145 def _extract_video_metadata(self, url, video_id, season_id):
2146 url, smuggled_data = unsmuggle_url(url, {})
2147 if smuggled_data.get('title'):
2148 return smuggled_data
2149
2150 webpage = self._download_webpage(url, video_id)
2151 # Bstation layout
2152 initial_data = (
2153 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
2154 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
2155 video_data = traverse_obj(
2156 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
2157
2158 if season_id and not video_data:
2159 # Non-Bstation layout, read through episode list
2160 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
2161 video_data = traverse_obj(season_json, (
2162 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
2163 ), expected_type=dict, get_all=False)
2164
2165 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2166 return merge_dicts(
2167 self._parse_video_metadata(video_data), {
2168 'title': get_element_by_class(
2169 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2170 'description': get_element_by_class(
2171 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2172 }, self._search_json_ld(webpage, video_id, default={}))
2173
2174 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2175 comment_api_raw_data = self._download_json(
2176 'https://api.bilibili.tv/reply/web/detail', display_id,
2177 note=f'Downloading reply comment of {root_id} - {next_id}',
2178 query={
2179 'platform': 'web',
2180 'ps': 20, # comment's reply per page (default: 3)
2181 'root': root_id,
2182 'next': next_id,
2183 })
2184
2185 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2186 yield {
2187 'author': traverse_obj(replies, ('member', 'name')),
2188 'author_id': traverse_obj(replies, ('member', 'mid')),
2189 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2190 'text': traverse_obj(replies, ('content', 'message')),
2191 'id': replies.get('rpid'),
2192 'like_count': int_or_none(replies.get('like_count')),
2193 'parent': replies.get('parent'),
2194 'timestamp': unified_timestamp(replies.get('ctime_text')),
2195 }
2196
2197 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2198 yield from self._get_comments_reply(
2199 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2200
2201 def _get_comments(self, video_id, ep_id):
2202 for i in itertools.count(0):
2203 comment_api_raw_data = self._download_json(
2204 'https://api.bilibili.tv/reply/web/root', video_id,
2205 note=f'Downloading comment page {i + 1}',
2206 query={
2207 'platform': 'web',
2208 'pn': i, # page number
2209 'ps': 20, # comment per page (default: 20)
2210 'oid': video_id,
2211 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2212 'sort_type': 1, # 1: best, 2: recent
2213 })
2214
2215 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2216 yield {
2217 'author': traverse_obj(replies, ('member', 'name')),
2218 'author_id': traverse_obj(replies, ('member', 'mid')),
2219 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2220 'text': traverse_obj(replies, ('content', 'message')),
2221 'id': replies.get('rpid'),
2222 'like_count': int_or_none(replies.get('like_count')),
2223 'timestamp': unified_timestamp(replies.get('ctime_text')),
2224 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2225 }
2226 if replies.get('count'):
2227 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2228
2229 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2230 break
2231
2232 def _real_extract(self, url):
2233 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2234 video_id = ep_id or aid
2235 chapters = None
2236
2237 if ep_id:
2238 intro_ending_json = self._call_api(
2239 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2240 video_id, fatal=False) or {}
2241 if intro_ending_json.get('skip'):
2242 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2243 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2244 chapters = [{
2245 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2246 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2247 'title': 'Intro',
2248 }, {
2249 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2250 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2251 'title': 'Outro',
2252 }]
2253
2254 return {
2255 'id': video_id,
2256 **self._extract_video_metadata(url, video_id, season_id),
2257 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2258 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2259 'chapters': chapters,
2260 '__post_extractor': self.extract_comments(video_id, ep_id),
2261 'http_headers': self._HEADERS,
2262 }
2263
2264
2265class BiliIntlSeriesIE(BiliIntlBaseIE):
2266 IE_NAME = 'biliIntl:series'
2267 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2268 _TESTS = [{
2269 'url': 'https://www.bilibili.tv/en/play/34613',
2270 'playlist_mincount': 15,
2271 'info_dict': {
2272 'id': '34613',
2273 'title': 'TONIKAWA: Over the Moon For You',
2274 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2275 'categories': ['Slice of life', 'Comedy', 'Romance'],
2276 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2277 'view_count': int,
2278 },
2279 'params': {
2280 'skip_download': True,
2281 },
2282 }, {
2283 'url': 'https://www.bilibili.tv/en/media/1048837',
2284 'info_dict': {
2285 'id': '1048837',
2286 'title': 'SPY×FAMILY',
2287 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2288 'categories': ['Adventure', 'Action', 'Comedy'],
2289 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2290 'view_count': int,
2291 },
2292 'playlist_mincount': 25,
2293 }, {
2294 'url': 'https://www.biliintl.com/en/play/34613',
2295 'only_matching': True,
2296 }, {
2297 'url': 'https://www.biliintl.com/EN/play/34613',
2298 'only_matching': True,
2299 }]
2300
2301 def _entries(self, series_id):
2302 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2303 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2304 episode_id = str(episode['episode_id'])
2305 yield self.url_result(smuggle_url(
2306 BiliIntlIE._make_url(episode_id, series_id),
2307 self._parse_video_metadata(episode),
2308 ), BiliIntlIE, episode_id)
2309
2310 def _real_extract(self, url):
2311 series_id = self._match_id(url)
2312 series_info = self._call_api(
2313 f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2314 return self.playlist_result(
2315 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2316 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2317 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2318
2319
2320class BiliLiveIE(InfoExtractor):
2321 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2322
2323 _TESTS = [{
2324 'url': 'https://live.bilibili.com/196',
2325 'info_dict': {
2326 'id': '33989',
2327 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2328 'ext': 'flv',
2329 'title': '太空狼人杀联动,不被爆杀就算赢',
2330 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2331 'timestamp': 1650802769,
2332 },
2333 'skip': 'not live',
2334 }, {
2335 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2336 'only_matching': True,
2337 }, {
2338 'url': 'https://live.bilibili.com/blanc/196',
2339 'only_matching': True,
2340 }]
2341
2342 _FORMATS = {
2343 80: {'format_id': 'low', 'format_note': '流畅'},
2344 150: {'format_id': 'high_res', 'format_note': '高清'},
2345 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2346 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2347 10000: {'format_id': 'source', 'format_note': '原画'},
2348 20000: {'format_id': '4K', 'format_note': '4K'},
2349 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2350 }
2351
2352 _quality = staticmethod(qualities(list(_FORMATS)))
2353
2354 def _call_api(self, path, room_id, query):
2355 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2356 if api_result.get('code') != 0:
2357 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2358 return api_result.get('data') or {}
2359
2360 def _parse_formats(self, qn, fmt):
2361 for codec in fmt.get('codec') or []:
2362 if codec.get('current_qn') != qn:
2363 continue
2364 for url_info in codec['url_info']:
2365 yield {
2366 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2367 'ext': fmt.get('format_name'),
2368 'vcodec': codec.get('codec_name'),
2369 'quality': self._quality(qn),
2370 **self._FORMATS[qn],
2371 }
2372
2373 def _real_extract(self, url):
2374 room_id = self._match_id(url)
2375 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2376 if room_data.get('live_status') == 0:
2377 raise ExtractorError('Streamer is not live', expected=True)
2378
2379 formats = []
2380 for qn in self._FORMATS:
2381 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2382 'room_id': room_id,
2383 'qn': qn,
2384 'codec': '0,1',
2385 'format': '0,2',
2386 'mask': '0',
2387 'no_playurl': '0',
2388 'platform': 'web',
2389 'protocol': '0,1',
2390 })
2391 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2392 formats.extend(self._parse_formats(qn, fmt))
2393
2394 return {
2395 'id': room_id,
2396 'title': room_data.get('title'),
2397 'description': room_data.get('description'),
2398 'thumbnail': room_data.get('user_cover'),
2399 'timestamp': stream_data.get('live_time'),
2400 'formats': formats,
2401 'is_live': True,
2402 'http_headers': {
2403 'Referer': url,
2404 },
2405 }