]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/bilibili.py
[ie/youtube] Suppress "Unavailable videos are hidden" warning (#10159)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
1 import base64
2 import functools
3 import hashlib
4 import itertools
5 import json
6 import math
7 import re
8 import time
9 import urllib.parse
10 import uuid
11
12 from .common import InfoExtractor, SearchInfoExtractor
13 from ..dependencies import Cryptodome
14 from ..networking.exceptions import HTTPError
15 from ..utils import (
16 ExtractorError,
17 GeoRestrictedError,
18 InAdvancePagedList,
19 OnDemandPagedList,
20 bool_or_none,
21 clean_html,
22 determine_ext,
23 filter_dict,
24 float_or_none,
25 format_field,
26 get_element_by_class,
27 int_or_none,
28 join_nonempty,
29 make_archive_id,
30 merge_dicts,
31 mimetype2ext,
32 parse_count,
33 parse_qs,
34 qualities,
35 smuggle_url,
36 srt_subtitles_timecode,
37 str_or_none,
38 traverse_obj,
39 try_call,
40 unified_timestamp,
41 unsmuggle_url,
42 url_or_none,
43 urlencode_postdata,
44 variadic,
45 )
46
47
48 class BilibiliBaseIE(InfoExtractor):
49 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
50
51 def extract_formats(self, play_info):
52 format_names = {
53 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
54 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
55 }
56
57 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
58 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
59 if flac_audio:
60 audios.append(flac_audio)
61 formats = [{
62 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
64 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
65 'vcodec': 'none',
66 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
67 'filesize': int_or_none(audio.get('size')),
68 'format_id': str_or_none(audio.get('id')),
69 } for audio in audios]
70
71 formats.extend({
72 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video.get('width')),
76 'height': int_or_none(video.get('height')),
77 'vcodec': video.get('codecs'),
78 'acodec': 'none' if audios else None,
79 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
80 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
81 'filesize': int_or_none(video.get('size')),
82 'quality': int_or_none(video.get('id')),
83 'format_id': traverse_obj(
84 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
85 ('id', {str_or_none}), get_all=False),
86 'format': format_names.get(video.get('id')),
87 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
88
89 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
90 if missing_formats:
91 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
92 f'you have to login or become premium member to download them. {self._login_hint()}')
93
94 return formats
95
96 def _download_playinfo(self, video_id, cid, headers=None):
97 return self._download_json(
98 'https://api.bilibili.com/x/player/playurl', video_id,
99 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
100 note=f'Downloading video formats for cid {cid}', headers=headers)['data']
101
102 def json2srt(self, json_data):
103 srt_data = ''
104 for idx, line in enumerate(json_data.get('body') or []):
105 srt_data += (f'{idx + 1}\n'
106 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f'{line["content"]}\n\n')
108 return srt_data
109
110 def _get_subtitles(self, video_id, cid, aid=None):
111 subtitles = {
112 'danmaku': [{
113 'ext': 'xml',
114 'url': f'https://comment.bilibili.com/{cid}.xml',
115 }],
116 }
117
118 subtitle_info = traverse_obj(self._download_json(
119 'https://api.bilibili.com/x/player/v2', video_id,
120 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
121 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
123 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
124 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
126 for s in subs_list:
127 subtitles.setdefault(s['lan'], []).append({
128 'ext': 'srt',
129 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id)),
130 })
131 return subtitles
132
133 def _get_chapters(self, aid, cid):
134 chapters = aid and cid and self._download_json(
135 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
136 note='Extracting chapters', fatal=False)
137 return traverse_obj(chapters, ('data', 'view_points', ..., {
138 'title': 'content',
139 'start_time': 'from',
140 'end_time': 'to',
141 })) or None
142
143 def _get_comments(self, aid):
144 for idx in itertools.count(1):
145 replies = traverse_obj(
146 self._download_json(
147 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid, note=f'Extracting comments from page {idx}', fatal=False),
149 ('data', 'replies'))
150 if not replies:
151 return
152 for children in map(self._get_all_children, replies):
153 yield from children
154
155 def _get_all_children(self, reply):
156 yield {
157 'author': traverse_obj(reply, ('member', 'uname')),
158 'author_id': traverse_obj(reply, ('member', 'mid')),
159 'id': reply.get('rpid'),
160 'text': traverse_obj(reply, ('content', 'message')),
161 'timestamp': reply.get('ctime'),
162 'parent': reply.get('parent') or 'root',
163 }
164 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
165 yield from children
166
167 def _get_episodes_from_season(self, ss_id, url):
168 season_info = self._download_json(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id,
170 note='Downloading season info', query={'season_id': ss_id},
171 headers={'Referer': url, **self.geo_verification_headers()})
172
173 for entry in traverse_obj(season_info, (
174 'result', 'main_section', 'episodes',
175 lambda _, v: url_or_none(v['share_url']) and v['id'])):
176 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
177
178 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
179 cid_edges = cid_edges or {}
180 division_data = self._download_json(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
182 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
183 note=f'Extracting divisions from edge {edge_id}')
184 edges.setdefault(edge_id, {}).update(
185 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
186 'title': ('title', {str}),
187 'cid': ('cid', {int_or_none}),
188 }), get_all=False))
189
190 edges[edge_id].update(traverse_obj(division_data, ('data', {
191 'title': ('title', {str}),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}),
194 'cid': ('cid', {int_or_none}),
195 'text': ('option', {str}),
196 }),
197 })))
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
200 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
201 if choice['edge_id'] not in edges:
202 edges[choice['edge_id']] = {'cid': choice['cid']}
203 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
204 return cid_edges
205
206 def _get_interactive_entries(self, video_id, cid, metainfo):
207 graph_version = traverse_obj(
208 self._download_json(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id,
210 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
211 ('data', 'interaction', 'graph_version', {int_or_none}))
212 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
213 for cid, edges in cid_edges.items():
214 play_info = self._download_playinfo(video_id, cid)
215 yield {
216 **metainfo,
217 'id': f'{video_id}_{cid}',
218 'title': f'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
219 'formats': self.extract_formats(play_info),
220 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info.get('timelength'), scale=1000),
222 'subtitles': self.extract_subtitles(video_id, cid),
223 }
224
225
226 class BiliBiliIE(BilibiliBaseIE):
227 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
228
229 _TESTS = [{
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
231 'info_dict': {
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
234 'ext': 'mp4',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
237 'uploader': '阿滴英文',
238 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
239 'duration': 554.117,
240 'tags': list,
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
244 'like_count': int,
245 'view_count': int,
246 },
247 }, {
248 'note': 'old av URL version',
249 'url': 'http://www.bilibili.com/video/av1074402/',
250 'info_dict': {
251 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
252 'ext': 'mp4',
253 'uploader': '菊子桑',
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
256 'title': '【金坷垃】金泡沫',
257 'duration': 308.36,
258 'upload_date': '20140420',
259 'timestamp': 1397983878,
260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
261 'like_count': int,
262 'comment_count': int,
263 'view_count': int,
264 'tags': list,
265 },
266 'params': {'skip_download': True},
267 }, {
268 'note': 'Anthology',
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
270 'info_dict': {
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的',
273 },
274 'playlist_count': 18,
275 'playlist': [{
276 'info_dict': {
277 'id': 'BV1bK411W797_p1',
278 'ext': 'mp4',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
280 'tags': 'count:10',
281 'timestamp': 1589601697,
282 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
285 'like_count': int,
286 'comment_count': int,
287 'upload_date': '20200516',
288 'view_count': int,
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
290 'duration': 90.314,
291 },
292 }],
293 }, {
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
296 'info_dict': {
297 'id': 'BV1bK411W797_p1',
298 'ext': 'mp4',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
300 'tags': 'count:10',
301 'timestamp': 1589601697,
302 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
305 'like_count': int,
306 'comment_count': int,
307 'upload_date': '20200516',
308 'view_count': int,
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
310 'duration': 90.314,
311 },
312 }, {
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
315 'info_dict': {
316 'id': 'BV12N4y1M7rh',
317 'ext': 'mp4',
318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
319 'tags': list,
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
321 'duration': 313.557,
322 'upload_date': '20220709',
323 'uploader': '小夫太渴',
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
327 'view_count': int,
328 'like_count': int,
329 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2',
331 },
332 'params': {'listsubtitles': True},
333 }, {
334 'url': 'https://www.bilibili.com/video/av8903802/',
335 'info_dict': {
336 'id': 'BV13x41117TL',
337 'ext': 'mp4',
338 'title': '阿滴英文|英文歌分享#6 "Closer',
339 'upload_date': '20170301',
340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
341 'timestamp': 1488353834,
342 'uploader_id': '65880958',
343 'uploader': '阿滴英文',
344 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
345 'duration': 554.117,
346 'tags': list,
347 'comment_count': int,
348 'view_count': int,
349 'like_count': int,
350 },
351 'params': {
352 'skip_download': True,
353 },
354 }, {
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
357 'info_dict': {
358 'id': 'BV1vL411G7N7',
359 'ext': 'mp4',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
364 'tags': list,
365 'uploader': '爱喝咖啡的当麻',
366 'duration': 669.482,
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
370 'view_count': int,
371 'like_count': int,
372 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
373 },
374 'params': {'skip_download': True},
375 }, {
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
378 'info_dict': {
379 'id': 'BV1wP4y1P72h',
380 'ext': 'mp4',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
386 'duration': 246.719,
387 'uploader_id': '528182630',
388 'view_count': int,
389 'like_count': int,
390 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
391 },
392 'params': {'skip_download': True},
393 }, {
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
396 'info_dict': {
397 'id': 'BV1ay4y1d77f',
398 'ext': 'mp4',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
403 'uploader': '果蝇轰',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
406 'view_count': int,
407 'like_count': int,
408 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
409 },
410 'params': {'skip_download': True},
411 }, {
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
414 'info_dict': {
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
420 'tags': list,
421 'uploader': '钉宫妮妮Ninico',
422 'duration': 1503,
423 'uploader_id': '8881297',
424 'comment_count': int,
425 'view_count': int,
426 'like_count': int,
427 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
428 },
429 'playlist_count': 33,
430 'playlist': [{
431 'info_dict': {
432 'id': 'BV1af4y1H7ga_400950101',
433 'ext': 'mp4',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
438 'tags': list,
439 'uploader': '钉宫妮妮Ninico',
440 'duration': 11.605,
441 'uploader_id': '8881297',
442 'comment_count': int,
443 'view_count': int,
444 'like_count': int,
445 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
446 },
447 }],
448 }, {
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
451 'info_dict': {
452 'id': '288525',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
454 'ext': 'mp4',
455 'series': '我和我的祖国',
456 'series_id': '4780',
457 'season': '幕后纪实',
458 'season_id': '28609',
459 'season_number': 1,
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
467 },
468 }, {
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
470 'info_dict': {
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
473 'ext': 'mp4',
474 },
475 'skip': 'supporter-only video',
476 }, {
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
478 'info_dict': {
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
481 'ext': 'mp4',
482 },
483 'skip': 'login required',
484 }, {
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
486 'info_dict': {
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
489 'ext': 'mp4',
490 },
491 'skip': 'geo-restricted',
492 }]
493
494 def _real_extract(self, url):
495 video_id = self._match_id(url)
496 headers = self.geo_verification_headers()
497 webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
498 if not self._match_valid_url(urlh.url):
499 return self.url_result(urlh.url)
500
501 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
502
503 is_festival = 'videoData' not in initial_state
504 if is_festival:
505 video_data = initial_state['videoInfo']
506 else:
507 play_info_obj = self._search_json(
508 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
509 if not play_info_obj:
510 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
511 self.raise_login_required()
512 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
513 raise ExtractorError(
514 'This video may be deleted or geo-restricted. '
515 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
516 play_info = traverse_obj(play_info_obj, ('data', {dict}))
517 if not play_info:
518 if traverse_obj(play_info_obj, 'code') == 87007:
519 toast = get_element_by_class('tips-toast', webpage) or ''
520 msg = clean_html(
521 f'{get_element_by_class("belongs-to", toast) or ""},'
522 + (get_element_by_class('level', toast) or ''))
523 raise ExtractorError(
524 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
525 raise ExtractorError('Failed to extract play info')
526 video_data = initial_state['videoData']
527
528 video_id, title = video_data['bvid'], video_data.get('title')
529
530 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
531 page_list_json = not is_festival and traverse_obj(
532 self._download_json(
533 'https://api.bilibili.com/x/player/pagelist', video_id,
534 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
535 note='Extracting videos in anthology', headers=headers),
536 'data', expected_type=list) or []
537 is_anthology = len(page_list_json) > 1
538
539 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
540 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
541 return self.playlist_from_matches(
542 page_list_json, video_id, title, ie=BiliBiliIE,
543 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
544
545 if is_anthology:
546 part_id = part_id or 1
547 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
548
549 aid = video_data.get('aid')
550 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
551
552 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
553
554 festival_info = {}
555 if is_festival:
556 play_info = self._download_playinfo(video_id, cid, headers=headers)
557
558 festival_info = traverse_obj(initial_state, {
559 'uploader': ('videoInfo', 'upName'),
560 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
561 'like_count': ('videoStatus', 'like', {int_or_none}),
562 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
563 }, get_all=False)
564
565 metainfo = {
566 **traverse_obj(initial_state, {
567 'uploader': ('upData', 'name'),
568 'uploader_id': ('upData', 'mid', {str_or_none}),
569 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
570 'tags': ('tags', ..., 'tag_name'),
571 'thumbnail': ('videoData', 'pic', {url_or_none}),
572 }),
573 **festival_info,
574 **traverse_obj(video_data, {
575 'description': 'desc',
576 'timestamp': ('pubdate', {int_or_none}),
577 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
578 'comment_count': ('stat', 'reply', {int_or_none}),
579 }, get_all=False),
580 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
581 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
582 'title': title,
583 'http_headers': {'Referer': url},
584 }
585
586 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
587 if is_interactive:
588 return self.playlist_result(
589 self._get_interactive_entries(video_id, cid, metainfo), **metainfo,
590 duration=traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
591 __post_extractor=self.extract_comments(aid))
592 else:
593 return {
594 **metainfo,
595 'duration': float_or_none(play_info.get('timelength'), scale=1000),
596 'chapters': self._get_chapters(aid, cid),
597 'subtitles': self.extract_subtitles(video_id, cid),
598 'formats': self.extract_formats(play_info),
599 '__post_extractor': self.extract_comments(aid),
600 }
601
602
603 class BiliBiliBangumiIE(BilibiliBaseIE):
604 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
605
606 _TESTS = [{
607 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
608 'info_dict': {
609 'id': '21495',
610 'ext': 'mp4',
611 'series': '悠久之翼',
612 'series_id': '774',
613 'season': '第二季',
614 'season_id': '1182',
615 'season_number': 2,
616 'episode': 'forever/ef',
617 'episode_id': '21495',
618 'episode_number': 12,
619 'title': '12 forever/ef',
620 'duration': 1420.791,
621 'timestamp': 1320412200,
622 'upload_date': '20111104',
623 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
624 },
625 }, {
626 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
627 'info_dict': {
628 'id': '267851',
629 'ext': 'mp4',
630 'series': '鬼灭之刃',
631 'series_id': '4358',
632 'season': '立志篇',
633 'season_id': '26801',
634 'season_number': 1,
635 'episode': '残酷',
636 'episode_id': '267851',
637 'episode_number': 1,
638 'title': '1 残酷',
639 'duration': 1425.256,
640 'timestamp': 1554566400,
641 'upload_date': '20190406',
642 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
643 },
644 'skip': 'Geo-restricted',
645 }, {
646 'note': 'a making-of which falls outside main section',
647 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
648 'info_dict': {
649 'id': '345120',
650 'ext': 'mp4',
651 'series': '鬼灭之刃',
652 'series_id': '4358',
653 'season': '立志篇',
654 'season_id': '26801',
655 'season_number': 1,
656 'episode': '炭治郎篇',
657 'episode_id': '345120',
658 'episode_number': 27,
659 'title': '#1 炭治郎篇',
660 'duration': 1922.129,
661 'timestamp': 1602853860,
662 'upload_date': '20201016',
663 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
664 },
665 }]
666
667 def _real_extract(self, url):
668 episode_id = self._match_id(url)
669 headers = self.geo_verification_headers()
670 webpage = self._download_webpage(url, episode_id, headers=headers)
671
672 if '您所在的地区无法观看本片' in webpage:
673 raise GeoRestrictedError('This video is restricted')
674 elif '正在观看预览,大会员免费看全片' in webpage:
675 self.raise_login_required('This video is for premium members only')
676
677 headers['Referer'] = url
678 play_info = self._download_json(
679 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
680 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
681 headers=headers)
682 premium_only = play_info.get('code') == -10403
683 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
684
685 formats = self.extract_formats(play_info)
686 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
687 self.raise_login_required('This video is for premium members only')
688
689 bangumi_info = self._download_json(
690 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
691 query={'ep_id': episode_id}, headers=headers)['result']
692
693 episode_number, episode_info = next((
694 (idx, ep) for idx, ep in enumerate(traverse_obj(
695 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
696 if str_or_none(ep.get('id')) == episode_id), (1, {}))
697
698 season_id = bangumi_info.get('season_id')
699 season_number, season_title = season_id and next((
700 (idx + 1, e.get('season_title')) for idx, e in enumerate(
701 traverse_obj(bangumi_info, ('seasons', ...)))
702 if e.get('season_id') == season_id
703 ), (None, None))
704
705 aid = episode_info.get('aid')
706
707 return {
708 'id': episode_id,
709 'formats': formats,
710 **traverse_obj(bangumi_info, {
711 'series': ('series', 'series_title', {str}),
712 'series_id': ('series', 'series_id', {str_or_none}),
713 'thumbnail': ('square_cover', {url_or_none}),
714 }),
715 **traverse_obj(episode_info, {
716 'episode': ('long_title', {str}),
717 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
718 'timestamp': ('pub_time', {int_or_none}),
719 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
720 }),
721 'episode_id': episode_id,
722 'season': str_or_none(season_title),
723 'season_id': str_or_none(season_id),
724 'season_number': season_number,
725 'duration': float_or_none(play_info.get('timelength'), scale=1000),
726 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
727 '__post_extractor': self.extract_comments(aid),
728 'http_headers': {'Referer': url},
729 }
730
731
732 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
733 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
734 _TESTS = [{
735 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
736 'info_dict': {
737 'id': '24097891',
738 'title': 'CAROLE & TUESDAY',
739 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
740 },
741 'playlist_mincount': 25,
742 }, {
743 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
744 'info_dict': {
745 'id': '1565',
746 'title': '攻壳机动队 S.A.C. 2nd GIG',
747 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
748 },
749 'playlist_count': 26,
750 'playlist': [{
751 'info_dict': {
752 'id': '68540',
753 'ext': 'mp4',
754 'series': '攻壳机动队',
755 'series_id': '1077',
756 'season': '第二季',
757 'season_id': '1565',
758 'season_number': 2,
759 'episode': '再启动 REEMBODY',
760 'episode_id': '68540',
761 'episode_number': 1,
762 'title': '1 再启动 REEMBODY',
763 'duration': 1525.777,
764 'timestamp': 1425074413,
765 'upload_date': '20150227',
766 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
767 },
768 }],
769 }]
770
771 def _real_extract(self, url):
772 media_id = self._match_id(url)
773 webpage = self._download_webpage(url, media_id)
774
775 initial_state = self._search_json(
776 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
777 ss_id = initial_state['mediaInfo']['season_id']
778
779 return self.playlist_result(
780 self._get_episodes_from_season(ss_id, url), media_id,
781 **traverse_obj(initial_state, ('mediaInfo', {
782 'title': ('title', {str}),
783 'description': ('evaluate', {str}),
784 })))
785
786
787 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
788 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
789 _TESTS = [{
790 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
791 'info_dict': {
792 'id': '26801',
793 'title': '鬼灭之刃',
794 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
795 },
796 'playlist_mincount': 26,
797 }, {
798 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
799 'info_dict': {
800 'id': '2251',
801 'title': '玲音',
802 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
803 },
804 'playlist_count': 13,
805 'playlist': [{
806 'info_dict': {
807 'id': '50188',
808 'ext': 'mp4',
809 'series': '玲音',
810 'series_id': '1526',
811 'season': 'TV',
812 'season_id': '2251',
813 'season_number': 1,
814 'episode': 'WEIRD',
815 'episode_id': '50188',
816 'episode_number': 1,
817 'title': '1 WEIRD',
818 'duration': 1436.992,
819 'timestamp': 1343185080,
820 'upload_date': '20120725',
821 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
822 },
823 }],
824 }]
825
826 def _real_extract(self, url):
827 ss_id = self._match_id(url)
828 webpage = self._download_webpage(url, ss_id)
829 metainfo = traverse_obj(
830 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
831 ('itemListElement', ..., {
832 'title': ('name', {str}),
833 'description': ('description', {str}),
834 }), get_all=False)
835
836 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
837
838
839 class BilibiliCheeseBaseIE(BilibiliBaseIE):
840 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
841
842 def _extract_episode(self, season_info, ep_id):
843 episode_info = traverse_obj(season_info, (
844 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
845 aid, cid = episode_info['aid'], episode_info['cid']
846
847 if traverse_obj(episode_info, 'ep_status') == -1:
848 raise ExtractorError('This course episode is not yet available.', expected=True)
849 if not traverse_obj(episode_info, 'playable'):
850 self.raise_login_required('You need to purchase the course to download this episode')
851
852 play_info = self._download_json(
853 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
854 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
855 headers=self._HEADERS, note='Downloading playinfo')['data']
856
857 return {
858 'id': str_or_none(ep_id),
859 'episode_id': str_or_none(ep_id),
860 'formats': self.extract_formats(play_info),
861 'extractor_key': BilibiliCheeseIE.ie_key(),
862 'extractor': BilibiliCheeseIE.IE_NAME,
863 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
864 **traverse_obj(episode_info, {
865 'episode': ('title', {str}),
866 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
867 'alt_title': ('subtitle', {str}),
868 'duration': ('duration', {int_or_none}),
869 'episode_number': ('index', {int_or_none}),
870 'thumbnail': ('cover', {url_or_none}),
871 'timestamp': ('release_date', {int_or_none}),
872 'view_count': ('play', {int_or_none}),
873 }),
874 **traverse_obj(season_info, {
875 'uploader': ('up_info', 'uname', {str}),
876 'uploader_id': ('up_info', 'mid', {str_or_none}),
877 }),
878 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
879 '__post_extractor': self.extract_comments(aid),
880 'http_headers': self._HEADERS,
881 }
882
883 def _download_season_info(self, query_key, video_id):
884 return self._download_json(
885 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
886 headers=self._HEADERS, note='Downloading season info')['data']
887
888
889 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
890 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
891 _TESTS = [{
892 'url': 'https://www.bilibili.com/cheese/play/ep229832',
893 'info_dict': {
894 'id': '229832',
895 'ext': 'mp4',
896 'title': '1 - 课程先导片',
897 'alt_title': '视频课 · 3分41秒',
898 'uploader': '马督工',
899 'uploader_id': '316568752',
900 'episode': '课程先导片',
901 'episode_id': '229832',
902 'episode_number': 1,
903 'duration': 221,
904 'timestamp': 1695549606,
905 'upload_date': '20230924',
906 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
907 'view_count': int,
908 },
909 }]
910
911 def _real_extract(self, url):
912 ep_id = self._match_id(url)
913 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
914
915
916 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
917 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
918 _TESTS = [{
919 'url': 'https://www.bilibili.com/cheese/play/ss5918',
920 'info_dict': {
921 'id': '5918',
922 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
923 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
924 },
925 'playlist': [{
926 'info_dict': {
927 'id': '229832',
928 'ext': 'mp4',
929 'title': '1 - 课程先导片',
930 'alt_title': '视频课 · 3分41秒',
931 'uploader': '马督工',
932 'uploader_id': '316568752',
933 'episode': '课程先导片',
934 'episode_id': '229832',
935 'episode_number': 1,
936 'duration': 221,
937 'timestamp': 1695549606,
938 'upload_date': '20230924',
939 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
940 'view_count': int,
941 },
942 }],
943 'params': {'playlist_items': '1'},
944 }, {
945 'url': 'https://www.bilibili.com/cheese/play/ss5918',
946 'info_dict': {
947 'id': '5918',
948 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
949 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
950 },
951 'playlist_mincount': 5,
952 'skip': 'paid video in list',
953 }]
954
955 def _get_cheese_entries(self, season_info):
956 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
957 yield self._extract_episode(season_info, ep_id)
958
959 def _real_extract(self, url):
960 season_id = self._match_id(url)
961 season_info = self._download_season_info('season_id', season_id)
962
963 return self.playlist_result(
964 self._get_cheese_entries(season_info), season_id,
965 **traverse_obj(season_info, {
966 'title': ('title', {str}),
967 'description': ('subtitle', {str}),
968 }))
969
970
971 class BilibiliSpaceBaseIE(InfoExtractor):
972 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
973 first_page = fetch_page(0)
974 metadata = get_metadata(first_page)
975
976 paged_list = InAdvancePagedList(
977 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
978 metadata['page_count'], metadata['page_size'])
979
980 return metadata, paged_list
981
982
983 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
984 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
985 _TESTS = [{
986 'url': 'https://space.bilibili.com/3985676/video',
987 'info_dict': {
988 'id': '3985676',
989 },
990 'playlist_mincount': 178,
991 }, {
992 'url': 'https://space.bilibili.com/313580179/video',
993 'info_dict': {
994 'id': '313580179',
995 },
996 'playlist_mincount': 92,
997 }]
998
999 def _extract_signature(self, playlist_id):
1000 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
1001
1002 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1003 img_key = traverse_obj(
1004 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1005 sub_key = traverse_obj(
1006 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1007
1008 session_key = img_key + sub_key
1009
1010 signature_values = []
1011 for position in (
1012 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1013 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1014 57, 62, 11, 36, 20, 34, 44, 52,
1015 ):
1016 char_at_position = try_call(lambda: session_key[position])
1017 if char_at_position:
1018 signature_values.append(char_at_position)
1019
1020 return ''.join(signature_values)[:32]
1021
1022 def _real_extract(self, url):
1023 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1024 if not is_video_url:
1025 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1026 'To download audios, add a "/audio" to the URL')
1027
1028 signature = self._extract_signature(playlist_id)
1029
1030 def fetch_page(page_idx):
1031 query = {
1032 'keyword': '',
1033 'mid': playlist_id,
1034 'order': 'pubdate',
1035 'order_avoided': 'true',
1036 'platform': 'web',
1037 'pn': page_idx + 1,
1038 'ps': 30,
1039 'tid': 0,
1040 'web_location': 1550101,
1041 'wts': int(time.time()),
1042 }
1043 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1044
1045 try:
1046 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1047 playlist_id, note=f'Downloading page {page_idx}', query=query,
1048 headers={'referer': url})
1049 except ExtractorError as e:
1050 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1051 raise ExtractorError(
1052 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1053 raise
1054 if response['code'] in (-352, -401):
1055 raise ExtractorError(
1056 f'Request is blocked by server ({-response["code"]}), '
1057 'please add cookies, wait and try later.', expected=True)
1058 return response['data']
1059
1060 def get_metadata(page_data):
1061 page_size = page_data['page']['ps']
1062 entry_count = page_data['page']['count']
1063 return {
1064 'page_count': math.ceil(entry_count / page_size),
1065 'page_size': page_size,
1066 }
1067
1068 def get_entries(page_data):
1069 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1070 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1071
1072 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1073 return self.playlist_result(paged_list, playlist_id)
1074
1075
1076 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1077 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1078 _TESTS = [{
1079 'url': 'https://space.bilibili.com/313580179/audio',
1080 'info_dict': {
1081 'id': '313580179',
1082 },
1083 'playlist_mincount': 1,
1084 }]
1085
1086 def _real_extract(self, url):
1087 playlist_id = self._match_id(url)
1088
1089 def fetch_page(page_idx):
1090 return self._download_json(
1091 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1092 note=f'Downloading page {page_idx}',
1093 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1094
1095 def get_metadata(page_data):
1096 return {
1097 'page_count': page_data['pageCount'],
1098 'page_size': page_data['pageSize'],
1099 }
1100
1101 def get_entries(page_data):
1102 for entry in page_data.get('data', []):
1103 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1104
1105 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1106 return self.playlist_result(paged_list, playlist_id)
1107
1108
1109 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1110 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1111 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1112 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1113
1114 def _get_uploader(self, uid, playlist_id):
1115 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1116 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1117
1118 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1119 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1120 metadata.pop('page_count', None)
1121 metadata.pop('page_size', None)
1122 return metadata, page_list
1123
1124
1125 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1126 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1127 _TESTS = [{
1128 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1129 'info_dict': {
1130 'id': '2142762_57445',
1131 'title': '【完结】《底特律 变人》全结局流程解说',
1132 'description': '',
1133 'uploader': '老戴在此',
1134 'uploader_id': '2142762',
1135 'timestamp': int,
1136 'upload_date': str,
1137 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1138 },
1139 'playlist_mincount': 31,
1140 }]
1141
1142 def _real_extract(self, url):
1143 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1144 playlist_id = f'{mid}_{sid}'
1145
1146 def fetch_page(page_idx):
1147 return self._download_json(
1148 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1149 playlist_id, note=f'Downloading page {page_idx}',
1150 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1151
1152 def get_metadata(page_data):
1153 page_size = page_data['page']['page_size']
1154 entry_count = page_data['page']['total']
1155 return {
1156 'page_count': math.ceil(entry_count / page_size),
1157 'page_size': page_size,
1158 'uploader': self._get_uploader(mid, playlist_id),
1159 **traverse_obj(page_data, {
1160 'title': ('meta', 'name', {str}),
1161 'description': ('meta', 'description', {str}),
1162 'uploader_id': ('meta', 'mid', {str_or_none}),
1163 'timestamp': ('meta', 'ptime', {int_or_none}),
1164 'thumbnail': ('meta', 'cover', {url_or_none}),
1165 }),
1166 }
1167
1168 def get_entries(page_data):
1169 return self._get_entries(page_data, 'archives')
1170
1171 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1172 return self.playlist_result(paged_list, playlist_id, **metadata)
1173
1174
1175 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1176 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1177 _TESTS = [{
1178 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1179 'info_dict': {
1180 'id': '1958703906_547718',
1181 'title': '直播回放',
1182 'description': '直播回放',
1183 'uploader': '靡烟miya',
1184 'uploader_id': '1958703906',
1185 'timestamp': 1637985853,
1186 'upload_date': '20211127',
1187 'modified_timestamp': int,
1188 'modified_date': str,
1189 },
1190 'playlist_mincount': 513,
1191 }]
1192
1193 def _real_extract(self, url):
1194 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1195 playlist_id = f'{mid}_{sid}'
1196 playlist_meta = traverse_obj(self._download_json(
1197 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False,
1198 ), {
1199 'title': ('data', 'meta', 'name', {str}),
1200 'description': ('data', 'meta', 'description', {str}),
1201 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1202 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1203 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1204 })
1205
1206 def fetch_page(page_idx):
1207 return self._download_json(
1208 'https://api.bilibili.com/x/series/archives',
1209 playlist_id, note=f'Downloading page {page_idx}',
1210 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1211
1212 def get_metadata(page_data):
1213 page_size = page_data['page']['size']
1214 entry_count = page_data['page']['total']
1215 return {
1216 'page_count': math.ceil(entry_count / page_size),
1217 'page_size': page_size,
1218 'uploader': self._get_uploader(mid, playlist_id),
1219 **playlist_meta,
1220 }
1221
1222 def get_entries(page_data):
1223 return self._get_entries(page_data, 'archives')
1224
1225 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1226 return self.playlist_result(paged_list, playlist_id, **metadata)
1227
1228
1229 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1230 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1231 _TESTS = [{
1232 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1233 'info_dict': {
1234 'id': '1103407912',
1235 'title': '【V2】(旧)',
1236 'description': '',
1237 'uploader': '晓月春日',
1238 'uploader_id': '84912',
1239 'timestamp': 1604905176,
1240 'upload_date': '20201109',
1241 'modified_timestamp': int,
1242 'modified_date': str,
1243 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1244 'view_count': int,
1245 'like_count': int,
1246 },
1247 'playlist_mincount': 22,
1248 }, {
1249 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1250 'only_matching': True,
1251 }]
1252
1253 def _real_extract(self, url):
1254 fid = self._match_id(url)
1255
1256 list_info = self._download_json(
1257 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1258 fid, note='Downloading favlist metadata')
1259 if list_info['code'] == -403:
1260 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1261
1262 entries = self._get_entries(self._download_json(
1263 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1264 fid, note='Download favlist entries'), 'data')
1265
1266 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1267 'title': ('title', {str}),
1268 'description': ('intro', {str}),
1269 'uploader': ('upper', 'name', {str}),
1270 'uploader_id': ('upper', 'mid', {str_or_none}),
1271 'timestamp': ('ctime', {int_or_none}),
1272 'modified_timestamp': ('mtime', {int_or_none}),
1273 'thumbnail': ('cover', {url_or_none}),
1274 'view_count': ('cnt_info', 'play', {int_or_none}),
1275 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1276 })))
1277
1278
1279 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1280 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1281 _TESTS = [{
1282 'url': 'https://www.bilibili.com/watchlater/#/list',
1283 'info_dict': {'id': 'watchlater'},
1284 'playlist_mincount': 0,
1285 'skip': 'login required',
1286 }]
1287
1288 def _real_extract(self, url):
1289 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1290 watchlater_info = self._download_json(
1291 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1292 if watchlater_info['code'] == -101:
1293 self.raise_login_required(msg='You need to login to access your watchlater list')
1294 entries = self._get_entries(watchlater_info, ('data', 'list'))
1295 return self.playlist_result(entries, id=list_id, title='稍后再看')
1296
1297
1298 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1299 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1300 _TESTS = [{
1301 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1302 'info_dict': {
1303 'id': '5_547718',
1304 'title': '直播回放',
1305 'uploader': '靡烟miya',
1306 'uploader_id': '1958703906',
1307 'timestamp': 1637985853,
1308 'upload_date': '20211127',
1309 },
1310 'playlist_mincount': 513,
1311 }, {
1312 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1313 'info_dict': {
1314 'id': 'BV1DU4y1r7tz',
1315 'ext': 'mp4',
1316 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1317 'upload_date': '20220820',
1318 'description': '',
1319 'timestamp': 1661016330,
1320 'uploader_id': '1958703906',
1321 'uploader': '靡烟miya',
1322 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1323 'duration': 9552.903,
1324 'tags': list,
1325 'comment_count': int,
1326 'view_count': int,
1327 'like_count': int,
1328 '_old_archive_ids': ['bilibili 687146339_part1'],
1329 },
1330 'params': {'noplaylist': True},
1331 }, {
1332 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1333 'info_dict': {
1334 'id': '5_547718',
1335 },
1336 'playlist_mincount': 513,
1337 'skip': 'redirect url',
1338 }, {
1339 'url': 'https://www.bilibili.com/list/ml1103407912',
1340 'info_dict': {
1341 'id': '3_1103407912',
1342 'title': '【V2】(旧)',
1343 'uploader': '晓月春日',
1344 'uploader_id': '84912',
1345 'timestamp': 1604905176,
1346 'upload_date': '20201109',
1347 'thumbnail': r're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1348 },
1349 'playlist_mincount': 22,
1350 }, {
1351 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1352 'info_dict': {
1353 'id': '3_1103407912',
1354 },
1355 'playlist_mincount': 22,
1356 'skip': 'redirect url',
1357 }, {
1358 'url': 'https://www.bilibili.com/list/watchlater',
1359 'info_dict': {'id': 'watchlater'},
1360 'playlist_mincount': 0,
1361 'skip': 'login required',
1362 }, {
1363 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1364 'info_dict': {'id': 'watchlater'},
1365 'playlist_mincount': 0,
1366 'skip': 'login required',
1367 }]
1368
1369 def _extract_medialist(self, query, list_id):
1370 for page_num in itertools.count(1):
1371 page_data = self._download_json(
1372 'https://api.bilibili.com/x/v2/medialist/resource/list',
1373 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}',
1374 )['data']
1375 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1376 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1377 if not page_data.get('has_more', False):
1378 break
1379
1380 def _real_extract(self, url):
1381 list_id = self._match_id(url)
1382
1383 bvid = traverse_obj(parse_qs(url), ('bvid', 0))
1384 if not self._yes_playlist(list_id, bvid):
1385 return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
1386
1387 webpage = self._download_webpage(url, list_id)
1388 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1389 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1390 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1391 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1392 if error_code == -400 and list_id == 'watchlater':
1393 self.raise_login_required('You need to login to access your watchlater playlist')
1394 elif error_code == -403:
1395 self.raise_login_required('This is a private playlist. You need to login as its owner')
1396 elif error_code == 11010:
1397 raise ExtractorError('Playlist is no longer available', expected=True)
1398 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1399
1400 query = {
1401 'ps': 20,
1402 'with_current': False,
1403 **traverse_obj(initial_state, {
1404 'type': ('playlist', 'type', {int_or_none}),
1405 'biz_id': ('playlist', 'id', {int_or_none}),
1406 'tid': ('tid', {int_or_none}),
1407 'sort_field': ('sortFiled', {int_or_none}),
1408 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1409 }),
1410 }
1411 metadata = {
1412 'id': f'{query["type"]}_{query["biz_id"]}',
1413 **traverse_obj(initial_state, ('mediaListInfo', {
1414 'title': ('title', {str}),
1415 'uploader': ('upper', 'name', {str}),
1416 'uploader_id': ('upper', 'mid', {str_or_none}),
1417 'timestamp': ('ctime', {int_or_none}),
1418 'thumbnail': ('cover', {url_or_none}),
1419 })),
1420 }
1421 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1422
1423
1424 class BilibiliCategoryIE(InfoExtractor):
1425 IE_NAME = 'Bilibili category extractor'
1426 _MAX_RESULTS = 1000000
1427 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1428 _TESTS = [{
1429 'url': 'https://www.bilibili.com/v/kichiku/mad',
1430 'info_dict': {
1431 'id': 'kichiku: mad',
1432 'title': 'kichiku: mad',
1433 },
1434 'playlist_mincount': 45,
1435 'params': {
1436 'playlistend': 45,
1437 },
1438 }]
1439
1440 def _fetch_page(self, api_url, num_pages, query, page_num):
1441 parsed_json = self._download_json(
1442 api_url, query, query={'Search_key': query, 'pn': page_num},
1443 note=f'Extracting results from page {page_num} of {num_pages}')
1444
1445 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1446 if not video_list:
1447 raise ExtractorError(f'Failed to retrieve video list for page {page_num}')
1448
1449 for video in video_list:
1450 yield self.url_result(
1451 'https://www.bilibili.com/video/{}'.format(video['bvid']), 'BiliBili', video['bvid'])
1452
1453 def _entries(self, category, subcategory, query):
1454 # map of categories : subcategories : RIDs
1455 rid_map = {
1456 'kichiku': {
1457 'mad': 26,
1458 'manual_vocaloid': 126,
1459 'guide': 22,
1460 'theatre': 216,
1461 'course': 127,
1462 },
1463 }
1464
1465 if category not in rid_map:
1466 raise ExtractorError(
1467 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1468 if subcategory not in rid_map[category]:
1469 raise ExtractorError(
1470 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1471 rid_value = rid_map[category][subcategory]
1472
1473 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1474 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1475 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1476 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1477 if count is None or not size:
1478 raise ExtractorError('Failed to calculate either page count or size')
1479
1480 num_pages = math.ceil(count / size)
1481
1482 return OnDemandPagedList(functools.partial(
1483 self._fetch_page, api_url, num_pages, query), size)
1484
1485 def _real_extract(self, url):
1486 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1487 query = f'{category}: {subcategory}'
1488
1489 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1490
1491
1492 class BiliBiliSearchIE(SearchInfoExtractor):
1493 IE_DESC = 'Bilibili video search'
1494 _MAX_RESULTS = 100000
1495 _SEARCH_KEY = 'bilisearch'
1496 _TESTS = [{
1497 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1498 'playlist_count': 3,
1499 'info_dict': {
1500 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1501 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1502 },
1503 'playlist': [{
1504 'info_dict': {
1505 'id': 'BV1n44y1Q7sc',
1506 'ext': 'mp4',
1507 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1508 'timestamp': 1669889987,
1509 'upload_date': '20221201',
1510 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1511 'tags': list,
1512 'uploader': '靡烟miya',
1513 'duration': 123.156,
1514 'uploader_id': '1958703906',
1515 'comment_count': int,
1516 'view_count': int,
1517 'like_count': int,
1518 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
1519 '_old_archive_ids': ['bilibili 988222410_part1'],
1520 },
1521 }],
1522 }]
1523
1524 def _search_results(self, query):
1525 if not self._get_cookies('https://api.bilibili.com').get('buvid3'):
1526 self._set_cookie('.bilibili.com', 'buvid3', f'{uuid.uuid4()}infoc')
1527 for page_num in itertools.count(1):
1528 videos = self._download_json(
1529 'https://api.bilibili.com/x/web-interface/search/type', query,
1530 note=f'Extracting results from page {page_num}', query={
1531 'Search_key': query,
1532 'keyword': query,
1533 'page': page_num,
1534 'context': '',
1535 'duration': 0,
1536 'tids_2': '',
1537 '__refresh__': 'true',
1538 'search_type': 'video',
1539 'tids': 0,
1540 'highlight': 1,
1541 })['data'].get('result')
1542 if not videos:
1543 break
1544 for video in videos:
1545 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1546
1547
1548 class BilibiliAudioBaseIE(InfoExtractor):
1549 def _call_api(self, path, sid, query=None):
1550 if not query:
1551 query = {'sid': sid}
1552 return self._download_json(
1553 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1554 sid, query=query)['data']
1555
1556
1557 class BilibiliAudioIE(BilibiliAudioBaseIE):
1558 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1559 _TEST = {
1560 'url': 'https://www.bilibili.com/audio/au1003142',
1561 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1562 'info_dict': {
1563 'id': '1003142',
1564 'ext': 'm4a',
1565 'title': '【tsukimi】YELLOW / 神山羊',
1566 'artist': 'tsukimi',
1567 'comment_count': int,
1568 'description': 'YELLOW的mp3版!',
1569 'duration': 183,
1570 'subtitles': {
1571 'origin': [{
1572 'ext': 'lrc',
1573 }],
1574 },
1575 'thumbnail': r're:^https?://.+\.jpg',
1576 'timestamp': 1564836614,
1577 'upload_date': '20190803',
1578 'uploader': 'tsukimi-つきみぐー',
1579 'view_count': int,
1580 },
1581 }
1582
1583 def _real_extract(self, url):
1584 au_id = self._match_id(url)
1585
1586 play_data = self._call_api('url', au_id)
1587 formats = [{
1588 'url': play_data['cdns'][0],
1589 'filesize': int_or_none(play_data.get('size')),
1590 'vcodec': 'none',
1591 }]
1592
1593 for a_format in formats:
1594 a_format.setdefault('http_headers', {}).update({
1595 'Referer': url,
1596 })
1597
1598 song = self._call_api('song/info', au_id)
1599 title = song['title']
1600 statistic = song.get('statistic') or {}
1601
1602 subtitles = None
1603 lyric = song.get('lyric')
1604 if lyric:
1605 subtitles = {
1606 'origin': [{
1607 'url': lyric,
1608 }],
1609 }
1610
1611 return {
1612 'id': au_id,
1613 'title': title,
1614 'formats': formats,
1615 'artist': song.get('author'),
1616 'comment_count': int_or_none(statistic.get('comment')),
1617 'description': song.get('intro'),
1618 'duration': int_or_none(song.get('duration')),
1619 'subtitles': subtitles,
1620 'thumbnail': song.get('cover'),
1621 'timestamp': int_or_none(song.get('passtime')),
1622 'uploader': song.get('uname'),
1623 'view_count': int_or_none(statistic.get('play')),
1624 }
1625
1626
1627 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1628 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1629 _TEST = {
1630 'url': 'https://www.bilibili.com/audio/am10624',
1631 'info_dict': {
1632 'id': '10624',
1633 'title': '每日新曲推荐(每日11:00更新)',
1634 'description': '每天11:00更新,为你推送最新音乐',
1635 },
1636 'playlist_count': 19,
1637 }
1638
1639 def _real_extract(self, url):
1640 am_id = self._match_id(url)
1641
1642 songs = self._call_api(
1643 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1644
1645 entries = []
1646 for song in songs:
1647 sid = str_or_none(song.get('id'))
1648 if not sid:
1649 continue
1650 entries.append(self.url_result(
1651 'https://www.bilibili.com/audio/au' + sid,
1652 BilibiliAudioIE.ie_key(), sid))
1653
1654 if entries:
1655 album_data = self._call_api('menu/info', am_id) or {}
1656 album_title = album_data.get('title')
1657 if album_title:
1658 for entry in entries:
1659 entry['album'] = album_title
1660 return self.playlist_result(
1661 entries, am_id, album_title, album_data.get('intro'))
1662
1663 return self.playlist_result(entries, am_id)
1664
1665
1666 class BiliBiliPlayerIE(InfoExtractor):
1667 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1668 _TEST = {
1669 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1670 'only_matching': True,
1671 }
1672
1673 def _real_extract(self, url):
1674 video_id = self._match_id(url)
1675 return self.url_result(
1676 f'http://www.bilibili.tv/video/av{video_id}/',
1677 ie=BiliBiliIE.ie_key(), video_id=video_id)
1678
1679
1680 class BiliIntlBaseIE(InfoExtractor):
1681 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1682 _NETRC_MACHINE = 'biliintl'
1683 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
1684
1685 def _call_api(self, endpoint, *args, **kwargs):
1686 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1687 if json.get('code'):
1688 if json['code'] in (10004004, 10004005, 10023006):
1689 self.raise_login_required()
1690 elif json['code'] == 10004001:
1691 self.raise_geo_restricted()
1692 else:
1693 if json.get('message') and str(json['code']) != json['message']:
1694 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1695 else:
1696 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1697 if kwargs.get('fatal'):
1698 raise ExtractorError(errmsg)
1699 else:
1700 self.report_warning(errmsg)
1701 return json.get('data')
1702
1703 def json2srt(self, json):
1704 return '\n\n'.join(
1705 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1706 for i, line in enumerate(traverse_obj(json, (
1707 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1708
1709 def _get_subtitles(self, *, ep_id=None, aid=None):
1710 sub_json = self._call_api(
1711 '/web/v2/subtitle', ep_id or aid, fatal=False,
1712 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1713 query=filter_dict({
1714 'platform': 'web',
1715 's_locale': 'en_US',
1716 'episode_id': ep_id,
1717 'aid': aid,
1718 })) or {}
1719 subtitles = {}
1720 fetched_urls = set()
1721 for sub in traverse_obj(sub_json, (('subtitles', 'video_subtitle'), ..., {dict})):
1722 for url in traverse_obj(sub, ((None, 'ass', 'srt'), 'url', {url_or_none})):
1723 if url in fetched_urls:
1724 continue
1725 fetched_urls.add(url)
1726 sub_ext = determine_ext(url)
1727 sub_lang = sub.get('lang_key') or 'en'
1728
1729 if sub_ext == 'ass':
1730 subtitles.setdefault(sub_lang, []).append({
1731 'ext': 'ass',
1732 'url': url,
1733 })
1734 elif sub_ext == 'json':
1735 sub_data = self._download_json(
1736 url, ep_id or aid, fatal=False,
1737 note=f'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1738 errnote='Unable to download subtitles')
1739
1740 if sub_data:
1741 subtitles.setdefault(sub_lang, []).append({
1742 'ext': 'srt',
1743 'data': self.json2srt(sub_data),
1744 })
1745 else:
1746 self.report_warning('Unexpected subtitle extension', ep_id or aid)
1747
1748 return subtitles
1749
1750 def _get_formats(self, *, ep_id=None, aid=None):
1751 video_json = self._call_api(
1752 '/web/playurl', ep_id or aid, note='Downloading video formats',
1753 errnote='Unable to download video formats', query=filter_dict({
1754 'platform': 'web',
1755 'ep_id': ep_id,
1756 'aid': aid,
1757 }))
1758 video_json = video_json['playurl']
1759 formats = []
1760 for vid in video_json.get('video') or []:
1761 video_res = vid.get('video_resource') or {}
1762 video_info = vid.get('stream_info') or {}
1763 if not video_res.get('url'):
1764 continue
1765 formats.append({
1766 'url': video_res['url'],
1767 'ext': 'mp4',
1768 'format_note': video_info.get('desc_words'),
1769 'width': video_res.get('width'),
1770 'height': video_res.get('height'),
1771 'vbr': video_res.get('bandwidth'),
1772 'acodec': 'none',
1773 'vcodec': video_res.get('codecs'),
1774 'filesize': video_res.get('size'),
1775 })
1776 for aud in video_json.get('audio_resource') or []:
1777 if not aud.get('url'):
1778 continue
1779 formats.append({
1780 'url': aud['url'],
1781 'ext': 'mp4',
1782 'abr': aud.get('bandwidth'),
1783 'acodec': aud.get('codecs'),
1784 'vcodec': 'none',
1785 'filesize': aud.get('size'),
1786 })
1787
1788 return formats
1789
1790 def _parse_video_metadata(self, video_data):
1791 return {
1792 'title': video_data.get('title_display') or video_data.get('title'),
1793 'description': video_data.get('desc'),
1794 'thumbnail': video_data.get('cover'),
1795 'timestamp': unified_timestamp(video_data.get('formatted_pub_date')),
1796 'episode_number': int_or_none(self._search_regex(
1797 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1798 }
1799
1800 def _perform_login(self, username, password):
1801 if not Cryptodome.RSA:
1802 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1803
1804 key_data = self._download_json(
1805 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1806 note='Downloading login key', errnote='Unable to download login key')['data']
1807
1808 public_key = Cryptodome.RSA.importKey(key_data['key'])
1809 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode())
1810 login_post = self._download_json(
1811 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1812 'username': username,
1813 'password': base64.b64encode(password_hash).decode('ascii'),
1814 'keep_me': 'true',
1815 's_locale': 'en_US',
1816 'isTrusted': 'true',
1817 }), note='Logging in', errnote='Unable to log in')
1818 if login_post.get('code'):
1819 if login_post.get('message'):
1820 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1821 else:
1822 raise ExtractorError('Unable to log in')
1823
1824
1825 class BiliIntlIE(BiliIntlBaseIE):
1826 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1827 _TESTS = [{
1828 # Bstation page
1829 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1830 'info_dict': {
1831 'id': '341736',
1832 'ext': 'mp4',
1833 'title': 'E2 - The First Night',
1834 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1835 'episode_number': 2,
1836 'upload_date': '20201009',
1837 'episode': 'Episode 2',
1838 'timestamp': 1602259500,
1839 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1840 'chapters': [{
1841 'start_time': 0,
1842 'end_time': 76.242,
1843 'title': '<Untitled Chapter 1>',
1844 }, {
1845 'start_time': 76.242,
1846 'end_time': 161.161,
1847 'title': 'Intro',
1848 }, {
1849 'start_time': 1325.742,
1850 'end_time': 1403.903,
1851 'title': 'Outro',
1852 }],
1853 },
1854 }, {
1855 # Non-Bstation page
1856 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1857 'info_dict': {
1858 'id': '11005006',
1859 'ext': 'mp4',
1860 'title': 'E3 - Who?',
1861 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1862 'episode_number': 3,
1863 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1864 'episode': 'Episode 3',
1865 'upload_date': '20211219',
1866 'timestamp': 1639928700,
1867 'chapters': [{
1868 'start_time': 0,
1869 'end_time': 88.0,
1870 'title': '<Untitled Chapter 1>',
1871 }, {
1872 'start_time': 88.0,
1873 'end_time': 156.0,
1874 'title': 'Intro',
1875 }, {
1876 'start_time': 1173.0,
1877 'end_time': 1259.535,
1878 'title': 'Outro',
1879 }],
1880 },
1881 }, {
1882 # Subtitle with empty content
1883 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1884 'info_dict': {
1885 'id': '10131790',
1886 'ext': 'mp4',
1887 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1888 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1889 'episode_number': 140,
1890 },
1891 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
1892 }, {
1893 # episode comment extraction
1894 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1895 'info_dict': {
1896 'id': '340317',
1897 'ext': 'mp4',
1898 'timestamp': 1604057820,
1899 'upload_date': '20201030',
1900 'episode_number': 5,
1901 'title': 'E5 - My Own Steel',
1902 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1903 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1904 'episode': 'Episode 5',
1905 'comment_count': int,
1906 'chapters': [{
1907 'start_time': 0,
1908 'end_time': 61.0,
1909 'title': '<Untitled Chapter 1>',
1910 }, {
1911 'start_time': 61.0,
1912 'end_time': 134.0,
1913 'title': 'Intro',
1914 }, {
1915 'start_time': 1290.0,
1916 'end_time': 1379.0,
1917 'title': 'Outro',
1918 }],
1919 },
1920 'params': {
1921 'getcomments': True,
1922 },
1923 }, {
1924 # user generated content comment extraction
1925 'url': 'https://www.bilibili.tv/en/video/2045730385',
1926 'info_dict': {
1927 'id': '2045730385',
1928 'ext': 'mp4',
1929 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1930 'timestamp': 1667891924,
1931 'upload_date': '20221108',
1932 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1933 'comment_count': int,
1934 'thumbnail': r're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1935 },
1936 'params': {
1937 'getcomments': True,
1938 },
1939 }, {
1940 # episode id without intro and outro
1941 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1942 'info_dict': {
1943 'id': '11246489',
1944 'ext': 'mp4',
1945 'title': 'E1 - Operation \'Strix\' <Owl>',
1946 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1947 'timestamp': 1649516400,
1948 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1949 'episode': 'Episode 1',
1950 'episode_number': 1,
1951 'upload_date': '20220409',
1952 },
1953 }, {
1954 'url': 'https://www.biliintl.com/en/play/34613/341736',
1955 'only_matching': True,
1956 }, {
1957 # User-generated content (as opposed to a series licensed from a studio)
1958 'url': 'https://bilibili.tv/en/video/2019955076',
1959 'only_matching': True,
1960 }, {
1961 # No language in URL
1962 'url': 'https://www.bilibili.tv/video/2019955076',
1963 'only_matching': True,
1964 }, {
1965 # Uppercase language in URL
1966 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1967 'only_matching': True,
1968 }]
1969
1970 @staticmethod
1971 def _make_url(video_id, series_id=None):
1972 if series_id:
1973 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1974 return f'https://www.bilibili.tv/en/video/{video_id}'
1975
1976 def _extract_video_metadata(self, url, video_id, season_id):
1977 url, smuggled_data = unsmuggle_url(url, {})
1978 if smuggled_data.get('title'):
1979 return smuggled_data
1980
1981 webpage = self._download_webpage(url, video_id)
1982 # Bstation layout
1983 initial_data = (
1984 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1985 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1986 video_data = traverse_obj(
1987 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1988
1989 if season_id and not video_data:
1990 # Non-Bstation layout, read through episode list
1991 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1992 video_data = traverse_obj(season_json, (
1993 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id,
1994 ), expected_type=dict, get_all=False)
1995
1996 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1997 return merge_dicts(
1998 self._parse_video_metadata(video_data), {
1999 'title': get_element_by_class(
2000 'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
2001 'description': get_element_by_class(
2002 'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
2003 }, self._search_json_ld(webpage, video_id, default={}))
2004
2005 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
2006 comment_api_raw_data = self._download_json(
2007 'https://api.bilibili.tv/reply/web/detail', display_id,
2008 note=f'Downloading reply comment of {root_id} - {next_id}',
2009 query={
2010 'platform': 'web',
2011 'ps': 20, # comment's reply per page (default: 3)
2012 'root': root_id,
2013 'next': next_id,
2014 })
2015
2016 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2017 yield {
2018 'author': traverse_obj(replies, ('member', 'name')),
2019 'author_id': traverse_obj(replies, ('member', 'mid')),
2020 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2021 'text': traverse_obj(replies, ('content', 'message')),
2022 'id': replies.get('rpid'),
2023 'like_count': int_or_none(replies.get('like_count')),
2024 'parent': replies.get('parent'),
2025 'timestamp': unified_timestamp(replies.get('ctime_text')),
2026 }
2027
2028 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2029 yield from self._get_comments_reply(
2030 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
2031
2032 def _get_comments(self, video_id, ep_id):
2033 for i in itertools.count(0):
2034 comment_api_raw_data = self._download_json(
2035 'https://api.bilibili.tv/reply/web/root', video_id,
2036 note=f'Downloading comment page {i + 1}',
2037 query={
2038 'platform': 'web',
2039 'pn': i, # page number
2040 'ps': 20, # comment per page (default: 20)
2041 'oid': video_id,
2042 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
2043 'sort_type': 1, # 1: best, 2: recent
2044 })
2045
2046 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
2047 yield {
2048 'author': traverse_obj(replies, ('member', 'name')),
2049 'author_id': traverse_obj(replies, ('member', 'mid')),
2050 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
2051 'text': traverse_obj(replies, ('content', 'message')),
2052 'id': replies.get('rpid'),
2053 'like_count': int_or_none(replies.get('like_count')),
2054 'timestamp': unified_timestamp(replies.get('ctime_text')),
2055 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
2056 }
2057 if replies.get('count'):
2058 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
2059
2060 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
2061 break
2062
2063 def _real_extract(self, url):
2064 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
2065 video_id = ep_id or aid
2066 chapters = None
2067
2068 if ep_id:
2069 intro_ending_json = self._call_api(
2070 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2071 video_id, fatal=False) or {}
2072 if intro_ending_json.get('skip'):
2073 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2074 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2075 chapters = [{
2076 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2077 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2078 'title': 'Intro',
2079 }, {
2080 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2081 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2082 'title': 'Outro',
2083 }]
2084
2085 return {
2086 'id': video_id,
2087 **self._extract_video_metadata(url, video_id, season_id),
2088 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2089 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2090 'chapters': chapters,
2091 '__post_extractor': self.extract_comments(video_id, ep_id),
2092 'http_headers': self._HEADERS,
2093 }
2094
2095
2096 class BiliIntlSeriesIE(BiliIntlBaseIE):
2097 IE_NAME = 'biliIntl:series'
2098 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2099 _TESTS = [{
2100 'url': 'https://www.bilibili.tv/en/play/34613',
2101 'playlist_mincount': 15,
2102 'info_dict': {
2103 'id': '34613',
2104 'title': 'TONIKAWA: Over the Moon For You',
2105 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2106 'categories': ['Slice of life', 'Comedy', 'Romance'],
2107 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2108 'view_count': int,
2109 },
2110 'params': {
2111 'skip_download': True,
2112 },
2113 }, {
2114 'url': 'https://www.bilibili.tv/en/media/1048837',
2115 'info_dict': {
2116 'id': '1048837',
2117 'title': 'SPY×FAMILY',
2118 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119 'categories': ['Adventure', 'Action', 'Comedy'],
2120 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2121 'view_count': int,
2122 },
2123 'playlist_mincount': 25,
2124 }, {
2125 'url': 'https://www.biliintl.com/en/play/34613',
2126 'only_matching': True,
2127 }, {
2128 'url': 'https://www.biliintl.com/EN/play/34613',
2129 'only_matching': True,
2130 }]
2131
2132 def _entries(self, series_id):
2133 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2134 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2135 episode_id = str(episode['episode_id'])
2136 yield self.url_result(smuggle_url(
2137 BiliIntlIE._make_url(episode_id, series_id),
2138 self._parse_video_metadata(episode),
2139 ), BiliIntlIE, episode_id)
2140
2141 def _real_extract(self, url):
2142 series_id = self._match_id(url)
2143 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2144 return self.playlist_result(
2145 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2146 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2147 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2148
2149
2150 class BiliLiveIE(InfoExtractor):
2151 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2152
2153 _TESTS = [{
2154 'url': 'https://live.bilibili.com/196',
2155 'info_dict': {
2156 'id': '33989',
2157 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2158 'ext': 'flv',
2159 'title': '太空狼人杀联动,不被爆杀就算赢',
2160 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2161 'timestamp': 1650802769,
2162 },
2163 'skip': 'not live',
2164 }, {
2165 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2166 'only_matching': True,
2167 }, {
2168 'url': 'https://live.bilibili.com/blanc/196',
2169 'only_matching': True,
2170 }]
2171
2172 _FORMATS = {
2173 80: {'format_id': 'low', 'format_note': '流畅'},
2174 150: {'format_id': 'high_res', 'format_note': '高清'},
2175 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2176 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2177 10000: {'format_id': 'source', 'format_note': '原画'},
2178 20000: {'format_id': '4K', 'format_note': '4K'},
2179 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2180 }
2181
2182 _quality = staticmethod(qualities(list(_FORMATS)))
2183
2184 def _call_api(self, path, room_id, query):
2185 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2186 if api_result.get('code') != 0:
2187 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2188 return api_result.get('data') or {}
2189
2190 def _parse_formats(self, qn, fmt):
2191 for codec in fmt.get('codec') or []:
2192 if codec.get('current_qn') != qn:
2193 continue
2194 for url_info in codec['url_info']:
2195 yield {
2196 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2197 'ext': fmt.get('format_name'),
2198 'vcodec': codec.get('codec_name'),
2199 'quality': self._quality(qn),
2200 **self._FORMATS[qn],
2201 }
2202
2203 def _real_extract(self, url):
2204 room_id = self._match_id(url)
2205 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2206 if room_data.get('live_status') == 0:
2207 raise ExtractorError('Streamer is not live', expected=True)
2208
2209 formats = []
2210 for qn in self._FORMATS:
2211 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2212 'room_id': room_id,
2213 'qn': qn,
2214 'codec': '0,1',
2215 'format': '0,2',
2216 'mask': '0',
2217 'no_playurl': '0',
2218 'platform': 'web',
2219 'protocol': '0,1',
2220 })
2221 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2222 formats.extend(self._parse_formats(qn, fmt))
2223
2224 return {
2225 'id': room_id,
2226 'title': room_data.get('title'),
2227 'description': room_data.get('description'),
2228 'thumbnail': room_data.get('user_cover'),
2229 'timestamp': stream_data.get('live_time'),
2230 'formats': formats,
2231 'is_live': True,
2232 'http_headers': {
2233 'Referer': url,
2234 },
2235 }