]> jfr.im git - yt-dlp.git/blob - yt_dlp/extractor/bilibili.py
[ie/bilibili] Support courses and interactive videos (#8343)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
1 import base64
2 import functools
3 import hashlib
4 import itertools
5 import json
6 import math
7 import re
8 import time
9 import urllib.parse
10
11 from .common import InfoExtractor, SearchInfoExtractor
12 from ..dependencies import Cryptodome
13 from ..networking.exceptions import HTTPError
14 from ..utils import (
15 ExtractorError,
16 GeoRestrictedError,
17 InAdvancePagedList,
18 OnDemandPagedList,
19 bool_or_none,
20 clean_html,
21 filter_dict,
22 float_or_none,
23 format_field,
24 get_element_by_class,
25 int_or_none,
26 join_nonempty,
27 make_archive_id,
28 merge_dicts,
29 mimetype2ext,
30 parse_count,
31 parse_qs,
32 qualities,
33 smuggle_url,
34 srt_subtitles_timecode,
35 str_or_none,
36 traverse_obj,
37 try_call,
38 unified_timestamp,
39 unsmuggle_url,
40 url_or_none,
41 urlencode_postdata,
42 variadic,
43 )
44
45
46 class BilibiliBaseIE(InfoExtractor):
47 _FORMAT_ID_RE = re.compile(r'-(\d+)\.m4s\?')
48
49 def extract_formats(self, play_info):
50 format_names = {
51 r['quality']: traverse_obj(r, 'new_description', 'display_desc')
52 for r in traverse_obj(play_info, ('support_formats', lambda _, v: v['quality']))
53 }
54
55 audios = traverse_obj(play_info, ('dash', (None, 'dolby'), 'audio', ..., {dict}))
56 flac_audio = traverse_obj(play_info, ('dash', 'flac', 'audio'))
57 if flac_audio:
58 audios.append(flac_audio)
59 formats = [{
60 'url': traverse_obj(audio, 'baseUrl', 'base_url', 'url'),
61 'ext': mimetype2ext(traverse_obj(audio, 'mimeType', 'mime_type')),
62 'acodec': traverse_obj(audio, ('codecs', {str.lower})),
63 'vcodec': 'none',
64 'tbr': float_or_none(audio.get('bandwidth'), scale=1000),
65 'filesize': int_or_none(audio.get('size')),
66 'format_id': str_or_none(audio.get('id')),
67 } for audio in audios]
68
69 formats.extend({
70 'url': traverse_obj(video, 'baseUrl', 'base_url', 'url'),
71 'ext': mimetype2ext(traverse_obj(video, 'mimeType', 'mime_type')),
72 'fps': float_or_none(traverse_obj(video, 'frameRate', 'frame_rate')),
73 'width': int_or_none(video.get('width')),
74 'height': int_or_none(video.get('height')),
75 'vcodec': video.get('codecs'),
76 'acodec': 'none' if audios else None,
77 'dynamic_range': {126: 'DV', 125: 'HDR10'}.get(int_or_none(video.get('id'))),
78 'tbr': float_or_none(video.get('bandwidth'), scale=1000),
79 'filesize': int_or_none(video.get('size')),
80 'quality': int_or_none(video.get('id')),
81 'format_id': traverse_obj(
82 video, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}, 1),
83 ('id', {str_or_none}), get_all=False),
84 'format': format_names.get(video.get('id')),
85 } for video in traverse_obj(play_info, ('dash', 'video', ...)))
86
87 missing_formats = format_names.keys() - set(traverse_obj(formats, (..., 'quality')))
88 if missing_formats:
89 self.to_screen(f'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
90 f'you have to login or become premium member to download them. {self._login_hint()}')
91
92 return formats
93
94 def _download_playinfo(self, video_id, cid):
95 return self._download_json(
96 'https://api.bilibili.com/x/player/playurl', video_id,
97 query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
98 note=f'Downloading video formats for cid {cid}')['data']
99
100 def json2srt(self, json_data):
101 srt_data = ''
102 for idx, line in enumerate(json_data.get('body') or []):
103 srt_data += (f'{idx + 1}\n'
104 f'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
105 f'{line["content"]}\n\n')
106 return srt_data
107
108 def _get_subtitles(self, video_id, cid, aid=None):
109 subtitles = {
110 'danmaku': [{
111 'ext': 'xml',
112 'url': f'https://comment.bilibili.com/{cid}.xml',
113 }]
114 }
115
116 subtitle_info = traverse_obj(self._download_json(
117 'https://api.bilibili.com/x/player/v2', video_id,
118 query={'aid': aid, 'cid': cid} if aid else {'bvid': video_id, 'cid': cid},
119 note=f'Extracting subtitle info {cid}'), ('data', 'subtitle'))
120 subs_list = traverse_obj(subtitle_info, ('subtitles', lambda _, v: v['subtitle_url'] and v['lan']))
121 if not subs_list and traverse_obj(subtitle_info, 'allow_submit'):
122 if not self._get_cookies('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
123 self.report_warning(f'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once=True)
124 for s in subs_list:
125 subtitles.setdefault(s['lan'], []).append({
126 'ext': 'srt',
127 'data': self.json2srt(self._download_json(s['subtitle_url'], video_id))
128 })
129 return subtitles
130
131 def _get_chapters(self, aid, cid):
132 chapters = aid and cid and self._download_json(
133 'https://api.bilibili.com/x/player/v2', aid, query={'aid': aid, 'cid': cid},
134 note='Extracting chapters', fatal=False)
135 return traverse_obj(chapters, ('data', 'view_points', ..., {
136 'title': 'content',
137 'start_time': 'from',
138 'end_time': 'to',
139 })) or None
140
141 def _get_comments(self, aid):
142 for idx in itertools.count(1):
143 replies = traverse_obj(
144 self._download_json(
145 f'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
146 aid, note=f'Extracting comments from page {idx}', fatal=False),
147 ('data', 'replies'))
148 if not replies:
149 return
150 for children in map(self._get_all_children, replies):
151 yield from children
152
153 def _get_all_children(self, reply):
154 yield {
155 'author': traverse_obj(reply, ('member', 'uname')),
156 'author_id': traverse_obj(reply, ('member', 'mid')),
157 'id': reply.get('rpid'),
158 'text': traverse_obj(reply, ('content', 'message')),
159 'timestamp': reply.get('ctime'),
160 'parent': reply.get('parent') or 'root',
161 }
162 for children in map(self._get_all_children, traverse_obj(reply, ('replies', ...))):
163 yield from children
164
165 def _get_episodes_from_season(self, ss_id, url):
166 season_info = self._download_json(
167 'https://api.bilibili.com/pgc/web/season/section', ss_id,
168 note='Downloading season info', query={'season_id': ss_id},
169 headers={'Referer': url, **self.geo_verification_headers()})
170
171 for entry in traverse_obj(season_info, (
172 'result', 'main_section', 'episodes',
173 lambda _, v: url_or_none(v['share_url']) and v['id'])):
174 yield self.url_result(entry['share_url'], BiliBiliBangumiIE, str_or_none(entry.get('id')))
175
176 def _get_divisions(self, video_id, graph_version, edges, edge_id, cid_edges=None):
177 cid_edges = cid_edges or {}
178 division_data = self._download_json(
179 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id,
180 query={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id},
181 note=f'Extracting divisions from edge {edge_id}')
182 edges.setdefault(edge_id, {}).update(
183 traverse_obj(division_data, ('data', 'story_list', lambda _, v: v['edge_id'] == edge_id, {
184 'title': ('title', {str}),
185 'cid': ('cid', {int_or_none}),
186 }), get_all=False))
187
188 edges[edge_id].update(traverse_obj(division_data, ('data', {
189 'title': ('title', {str}),
190 'choices': ('edges', 'questions', ..., 'choices', ..., {
191 'edge_id': ('id', {int_or_none}),
192 'cid': ('cid', {int_or_none}),
193 'text': ('option', {str}),
194 }),
195 })))
196 # use dict to combine edges that use the same video section (same cid)
197 cid_edges.setdefault(edges[edge_id]['cid'], {})[edge_id] = edges[edge_id]
198 for choice in traverse_obj(edges, (edge_id, 'choices', ...)):
199 if choice['edge_id'] not in edges:
200 edges[choice['edge_id']] = {'cid': choice['cid']}
201 self._get_divisions(video_id, graph_version, edges, choice['edge_id'], cid_edges=cid_edges)
202 return cid_edges
203
204 def _get_interactive_entries(self, video_id, cid, metainfo):
205 graph_version = traverse_obj(
206 self._download_json(
207 'https://api.bilibili.com/x/player/wbi/v2', video_id,
208 'Extracting graph version', query={'bvid': video_id, 'cid': cid}),
209 ('data', 'interaction', 'graph_version', {int_or_none}))
210 cid_edges = self._get_divisions(video_id, graph_version, {1: {'cid': cid}}, 1)
211 for cid, edges in cid_edges.items():
212 play_info = self._download_playinfo(video_id, cid)
213 yield {
214 **metainfo,
215 'id': f'{video_id}_{cid}',
216 'title': f'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
217 'formats': self.extract_formats(play_info),
218 'description': f'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
219 'duration': float_or_none(play_info.get('timelength'), scale=1000),
220 'subtitles': self.extract_subtitles(video_id, cid),
221 }
222
223
224 class BiliBiliIE(BilibiliBaseIE):
225 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
226
227 _TESTS = [{
228 'url': 'https://www.bilibili.com/video/BV13x41117TL',
229 'info_dict': {
230 'id': 'BV13x41117TL',
231 'title': '阿滴英文|英文歌分享#6 "Closer',
232 'ext': 'mp4',
233 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
234 'uploader_id': '65880958',
235 'uploader': '阿滴英文',
236 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
237 'duration': 554.117,
238 'tags': list,
239 'comment_count': int,
240 'upload_date': '20170301',
241 'timestamp': 1488353834,
242 'like_count': int,
243 'view_count': int,
244 },
245 }, {
246 'note': 'old av URL version',
247 'url': 'http://www.bilibili.com/video/av1074402/',
248 'info_dict': {
249 'thumbnail': r're:^https?://.*\.(jpg|jpeg)$',
250 'ext': 'mp4',
251 'uploader': '菊子桑',
252 'uploader_id': '156160',
253 'id': 'BV11x411K7CN',
254 'title': '【金坷垃】金泡沫',
255 'duration': 308.36,
256 'upload_date': '20140420',
257 'timestamp': 1397983878,
258 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
259 'like_count': int,
260 'comment_count': int,
261 'view_count': int,
262 'tags': list,
263 },
264 'params': {'skip_download': True},
265 }, {
266 'note': 'Anthology',
267 'url': 'https://www.bilibili.com/video/BV1bK411W797',
268 'info_dict': {
269 'id': 'BV1bK411W797',
270 'title': '物语中的人物是如何吐槽自己的OP的'
271 },
272 'playlist_count': 18,
273 'playlist': [{
274 'info_dict': {
275 'id': 'BV1bK411W797_p1',
276 'ext': 'mp4',
277 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
278 'tags': 'count:10',
279 'timestamp': 1589601697,
280 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
281 'uploader': '打牌还是打桩',
282 'uploader_id': '150259984',
283 'like_count': int,
284 'comment_count': int,
285 'upload_date': '20200516',
286 'view_count': int,
287 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
288 'duration': 90.314,
289 }
290 }]
291 }, {
292 'note': 'Specific page of Anthology',
293 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
294 'info_dict': {
295 'id': 'BV1bK411W797_p1',
296 'ext': 'mp4',
297 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
298 'tags': 'count:10',
299 'timestamp': 1589601697,
300 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
301 'uploader': '打牌还是打桩',
302 'uploader_id': '150259984',
303 'like_count': int,
304 'comment_count': int,
305 'upload_date': '20200516',
306 'view_count': int,
307 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
308 'duration': 90.314,
309 }
310 }, {
311 'note': 'video has subtitles',
312 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
313 'info_dict': {
314 'id': 'BV12N4y1M7rh',
315 'ext': 'mp4',
316 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
317 'tags': list,
318 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
319 'duration': 313.557,
320 'upload_date': '20220709',
321 'uploader': '小夫太渴',
322 'timestamp': 1657347907,
323 'uploader_id': '1326814124',
324 'comment_count': int,
325 'view_count': int,
326 'like_count': int,
327 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
328 'subtitles': 'count:2'
329 },
330 'params': {'listsubtitles': True},
331 }, {
332 'url': 'https://www.bilibili.com/video/av8903802/',
333 'info_dict': {
334 'id': 'BV13x41117TL',
335 'ext': 'mp4',
336 'title': '阿滴英文|英文歌分享#6 "Closer',
337 'upload_date': '20170301',
338 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
339 'timestamp': 1488353834,
340 'uploader_id': '65880958',
341 'uploader': '阿滴英文',
342 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
343 'duration': 554.117,
344 'tags': list,
345 'comment_count': int,
346 'view_count': int,
347 'like_count': int,
348 },
349 'params': {
350 'skip_download': True,
351 },
352 }, {
353 'note': 'video has chapter',
354 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
355 'info_dict': {
356 'id': 'BV1vL411G7N7',
357 'ext': 'mp4',
358 'title': '如何为你的B站视频添加进度条分段',
359 'timestamp': 1634554558,
360 'upload_date': '20211018',
361 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
362 'tags': list,
363 'uploader': '爱喝咖啡的当麻',
364 'duration': 669.482,
365 'uploader_id': '1680903',
366 'chapters': 'count:6',
367 'comment_count': int,
368 'view_count': int,
369 'like_count': int,
370 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
371 },
372 'params': {'skip_download': True},
373 }, {
374 'note': 'video redirects to festival page',
375 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
376 'info_dict': {
377 'id': 'BV1wP4y1P72h',
378 'ext': 'mp4',
379 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
380 'timestamp': 1643947497,
381 'upload_date': '20220204',
382 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
383 'uploader': '叨叨冯聊音乐',
384 'duration': 246.719,
385 'uploader_id': '528182630',
386 'view_count': int,
387 'like_count': int,
388 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
389 },
390 'params': {'skip_download': True},
391 }, {
392 'note': 'newer festival video',
393 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
394 'info_dict': {
395 'id': 'BV1ay4y1d77f',
396 'ext': 'mp4',
397 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
398 'timestamp': 1674273600,
399 'upload_date': '20230121',
400 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
401 'uploader': '果蝇轰',
402 'duration': 1111.722,
403 'uploader_id': '8469526',
404 'view_count': int,
405 'like_count': int,
406 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
407 },
408 'params': {'skip_download': True},
409 }, {
410 'note': 'interactive/split-path video',
411 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
412 'info_dict': {
413 'id': 'BV1af4y1H7ga',
414 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
415 'timestamp': 1630500414,
416 'upload_date': '20210901',
417 'description': 'md5:01113e39ab06e28042d74ac356a08786',
418 'tags': list,
419 'uploader': '钉宫妮妮Ninico',
420 'duration': 1503,
421 'uploader_id': '8881297',
422 'comment_count': int,
423 'view_count': int,
424 'like_count': int,
425 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
426 },
427 'playlist_count': 33,
428 'playlist': [{
429 'info_dict': {
430 'id': 'BV1af4y1H7ga_400950101',
431 'ext': 'mp4',
432 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
433 'timestamp': 1630500414,
434 'upload_date': '20210901',
435 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
436 'tags': list,
437 'uploader': '钉宫妮妮Ninico',
438 'duration': 11.605,
439 'uploader_id': '8881297',
440 'comment_count': int,
441 'view_count': int,
442 'like_count': int,
443 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
444 },
445 }],
446 }, {
447 'note': '301 redirect to bangumi link',
448 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
449 'info_dict': {
450 'id': '288525',
451 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
452 'ext': 'mp4',
453 'series': '我和我的祖国',
454 'series_id': '4780',
455 'season': '幕后纪实',
456 'season_id': '28609',
457 'season_number': 1,
458 'episode': '钱学森弹道和乘波体飞行器是什么?',
459 'episode_id': '288525',
460 'episode_number': 105,
461 'duration': 1183.957,
462 'timestamp': 1571648124,
463 'upload_date': '20191021',
464 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
465 },
466 }, {
467 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
468 'info_dict': {
469 'id': 'BV1jL41167ZG',
470 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
471 'ext': 'mp4',
472 },
473 'skip': 'supporter-only video',
474 }, {
475 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
476 'info_dict': {
477 'id': 'BV1Ks411f7aQ',
478 'title': '【BD1080P】狼与香辛料I【华盟】',
479 'ext': 'mp4',
480 },
481 'skip': 'login required',
482 }, {
483 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
484 'info_dict': {
485 'id': 'BV1GJ411x7h7',
486 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
487 'ext': 'mp4',
488 },
489 'skip': 'geo-restricted',
490 }]
491
492 def _real_extract(self, url):
493 video_id = self._match_id(url)
494 webpage, urlh = self._download_webpage_handle(url, video_id)
495 if not self._match_valid_url(urlh.url):
496 return self.url_result(urlh.url)
497
498 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', video_id)
499
500 is_festival = 'videoData' not in initial_state
501 if is_festival:
502 video_data = initial_state['videoInfo']
503 else:
504 play_info_obj = self._search_json(
505 r'window\.__playinfo__\s*=', webpage, 'play info', video_id, fatal=False)
506 if not play_info_obj:
507 if traverse_obj(initial_state, ('error', 'trueCode')) == -403:
508 self.raise_login_required()
509 if traverse_obj(initial_state, ('error', 'trueCode')) == -404:
510 raise ExtractorError(
511 'This video may be deleted or geo-restricted. '
512 'You might want to try a VPN or a proxy server (with --proxy)', expected=True)
513 play_info = traverse_obj(play_info_obj, ('data', {dict}))
514 if not play_info:
515 if traverse_obj(play_info_obj, 'code') == 87007:
516 toast = get_element_by_class('tips-toast', webpage) or ''
517 msg = clean_html(
518 f'{get_element_by_class("belongs-to", toast) or ""},'
519 + (get_element_by_class('level', toast) or ''))
520 raise ExtractorError(
521 f'This is a supporter-only video: {msg}. {self._login_hint()}', expected=True)
522 raise ExtractorError('Failed to extract play info')
523 video_data = initial_state['videoData']
524
525 video_id, title = video_data['bvid'], video_data.get('title')
526
527 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
528 page_list_json = not is_festival and traverse_obj(
529 self._download_json(
530 'https://api.bilibili.com/x/player/pagelist', video_id,
531 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
532 note='Extracting videos in anthology'),
533 'data', expected_type=list) or []
534 is_anthology = len(page_list_json) > 1
535
536 part_id = int_or_none(parse_qs(url).get('p', [None])[-1])
537 if is_anthology and not part_id and self._yes_playlist(video_id, video_id):
538 return self.playlist_from_matches(
539 page_list_json, video_id, title, ie=BiliBiliIE,
540 getter=lambda entry: f'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
541
542 if is_anthology:
543 part_id = part_id or 1
544 title += f' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
545
546 aid = video_data.get('aid')
547 old_video_id = format_field(aid, None, f'%s_part{part_id or 1}')
548
549 cid = traverse_obj(video_data, ('pages', part_id - 1, 'cid')) if part_id else video_data.get('cid')
550
551 festival_info = {}
552 if is_festival:
553 play_info = self._download_playinfo(video_id, cid)
554
555 festival_info = traverse_obj(initial_state, {
556 'uploader': ('videoInfo', 'upName'),
557 'uploader_id': ('videoInfo', 'upMid', {str_or_none}),
558 'like_count': ('videoStatus', 'like', {int_or_none}),
559 'thumbnail': ('sectionEpisodes', lambda _, v: v['bvid'] == video_id, 'cover'),
560 }, get_all=False)
561
562 metainfo = {
563 **traverse_obj(initial_state, {
564 'uploader': ('upData', 'name'),
565 'uploader_id': ('upData', 'mid', {str_or_none}),
566 'like_count': ('videoData', 'stat', 'like', {int_or_none}),
567 'tags': ('tags', ..., 'tag_name'),
568 'thumbnail': ('videoData', 'pic', {url_or_none}),
569 }),
570 **festival_info,
571 **traverse_obj(video_data, {
572 'description': 'desc',
573 'timestamp': ('pubdate', {int_or_none}),
574 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}),
575 'comment_count': ('stat', 'reply', {int_or_none}),
576 }, get_all=False),
577 'id': f'{video_id}{format_field(part_id, None, "_p%d")}',
578 '_old_archive_ids': [make_archive_id(self, old_video_id)] if old_video_id else None,
579 'title': title,
580 'http_headers': {'Referer': url},
581 }
582
583 is_interactive = traverse_obj(video_data, ('rights', 'is_stein_gate'))
584 if is_interactive:
585 return self.playlist_result(
586 self._get_interactive_entries(video_id, cid, metainfo), **metainfo, **{
587 'duration': traverse_obj(initial_state, ('videoData', 'duration', {int_or_none})),
588 '__post_extractor': self.extract_comments(aid),
589 })
590 else:
591 return {
592 **metainfo,
593 'duration': float_or_none(play_info.get('timelength'), scale=1000),
594 'chapters': self._get_chapters(aid, cid),
595 'subtitles': self.extract_subtitles(video_id, cid),
596 'formats': self.extract_formats(play_info),
597 '__post_extractor': self.extract_comments(aid),
598 }
599
600
601 class BiliBiliBangumiIE(BilibiliBaseIE):
602 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
603
604 _TESTS = [{
605 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
606 'info_dict': {
607 'id': '21495',
608 'ext': 'mp4',
609 'series': '悠久之翼',
610 'series_id': '774',
611 'season': '第二季',
612 'season_id': '1182',
613 'season_number': 2,
614 'episode': 'forever/ef',
615 'episode_id': '21495',
616 'episode_number': 12,
617 'title': '12 forever/ef',
618 'duration': 1420.791,
619 'timestamp': 1320412200,
620 'upload_date': '20111104',
621 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
622 },
623 }, {
624 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
625 'info_dict': {
626 'id': '267851',
627 'ext': 'mp4',
628 'series': '鬼灭之刃',
629 'series_id': '4358',
630 'season': '立志篇',
631 'season_id': '26801',
632 'season_number': 1,
633 'episode': '残酷',
634 'episode_id': '267851',
635 'episode_number': 1,
636 'title': '1 残酷',
637 'duration': 1425.256,
638 'timestamp': 1554566400,
639 'upload_date': '20190406',
640 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
641 },
642 'skip': 'Geo-restricted',
643 }, {
644 'note': 'a making-of which falls outside main section',
645 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
646 'info_dict': {
647 'id': '345120',
648 'ext': 'mp4',
649 'series': '鬼灭之刃',
650 'series_id': '4358',
651 'season': '立志篇',
652 'season_id': '26801',
653 'season_number': 1,
654 'episode': '炭治郎篇',
655 'episode_id': '345120',
656 'episode_number': 27,
657 'title': '#1 炭治郎篇',
658 'duration': 1922.129,
659 'timestamp': 1602853860,
660 'upload_date': '20201016',
661 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
662 },
663 }]
664
665 def _real_extract(self, url):
666 episode_id = self._match_id(url)
667 webpage = self._download_webpage(url, episode_id)
668
669 if '您所在的地区无法观看本片' in webpage:
670 raise GeoRestrictedError('This video is restricted')
671 elif '正在观看预览,大会员免费看全片' in webpage:
672 self.raise_login_required('This video is for premium members only')
673
674 headers = {'Referer': url, **self.geo_verification_headers()}
675 play_info = self._download_json(
676 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
677 'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
678 headers=headers)
679 premium_only = play_info.get('code') == -10403
680 play_info = traverse_obj(play_info, ('result', 'video_info', {dict})) or {}
681
682 formats = self.extract_formats(play_info)
683 if not formats and (premium_only or '成为大会员抢先看' in webpage or '开通大会员观看' in webpage):
684 self.raise_login_required('This video is for premium members only')
685
686 bangumi_info = self._download_json(
687 'https://api.bilibili.com/pgc/view/web/season', episode_id, 'Get episode details',
688 query={'ep_id': episode_id}, headers=headers)['result']
689
690 episode_number, episode_info = next((
691 (idx, ep) for idx, ep in enumerate(traverse_obj(
692 bangumi_info, (('episodes', ('section', ..., 'episodes')), ..., {dict})), 1)
693 if str_or_none(ep.get('id')) == episode_id), (1, {}))
694
695 season_id = bangumi_info.get('season_id')
696 season_number, season_title = season_id and next((
697 (idx + 1, e.get('season_title')) for idx, e in enumerate(
698 traverse_obj(bangumi_info, ('seasons', ...)))
699 if e.get('season_id') == season_id
700 ), (None, None))
701
702 aid = episode_info.get('aid')
703
704 return {
705 'id': episode_id,
706 'formats': formats,
707 **traverse_obj(bangumi_info, {
708 'series': ('series', 'series_title', {str}),
709 'series_id': ('series', 'series_id', {str_or_none}),
710 'thumbnail': ('square_cover', {url_or_none}),
711 }),
712 **traverse_obj(episode_info, {
713 'episode': ('long_title', {str}),
714 'episode_number': ('title', {int_or_none}, {lambda x: x or episode_number}),
715 'timestamp': ('pub_time', {int_or_none}),
716 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)},
717 }),
718 'episode_id': episode_id,
719 'season': str_or_none(season_title),
720 'season_id': str_or_none(season_id),
721 'season_number': season_number,
722 'duration': float_or_none(play_info.get('timelength'), scale=1000),
723 'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
724 '__post_extractor': self.extract_comments(aid),
725 'http_headers': headers,
726 }
727
728
729 class BiliBiliBangumiMediaIE(BilibiliBaseIE):
730 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
731 _TESTS = [{
732 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
733 'info_dict': {
734 'id': '24097891',
735 'title': 'CAROLE & TUESDAY',
736 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
737 },
738 'playlist_mincount': 25,
739 }, {
740 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
741 'info_dict': {
742 'id': '1565',
743 'title': '攻壳机动队 S.A.C. 2nd GIG',
744 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
745 },
746 'playlist_count': 26,
747 'playlist': [{
748 'info_dict': {
749 'id': '68540',
750 'ext': 'mp4',
751 'series': '攻壳机动队',
752 'series_id': '1077',
753 'season': '第二季',
754 'season_id': '1565',
755 'season_number': 2,
756 'episode': '再启动 REEMBODY',
757 'episode_id': '68540',
758 'episode_number': 1,
759 'title': '1 再启动 REEMBODY',
760 'duration': 1525.777,
761 'timestamp': 1425074413,
762 'upload_date': '20150227',
763 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
764 },
765 }],
766 }]
767
768 def _real_extract(self, url):
769 media_id = self._match_id(url)
770 webpage = self._download_webpage(url, media_id)
771
772 initial_state = self._search_json(
773 r'window\.__INITIAL_STATE__\s*=', webpage, 'initial_state', media_id)
774 ss_id = initial_state['mediaInfo']['season_id']
775
776 return self.playlist_result(
777 self._get_episodes_from_season(ss_id, url), media_id,
778 **traverse_obj(initial_state, ('mediaInfo', {
779 'title': ('title', {str}),
780 'description': ('evaluate', {str}),
781 })))
782
783
784 class BiliBiliBangumiSeasonIE(BilibiliBaseIE):
785 _VALID_URL = r'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
786 _TESTS = [{
787 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
788 'info_dict': {
789 'id': '26801',
790 'title': '鬼灭之刃',
791 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
792 },
793 'playlist_mincount': 26
794 }, {
795 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
796 'info_dict': {
797 'id': '2251',
798 'title': '玲音',
799 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
800 },
801 'playlist_count': 13,
802 'playlist': [{
803 'info_dict': {
804 'id': '50188',
805 'ext': 'mp4',
806 'series': '玲音',
807 'series_id': '1526',
808 'season': 'TV',
809 'season_id': '2251',
810 'season_number': 1,
811 'episode': 'WEIRD',
812 'episode_id': '50188',
813 'episode_number': 1,
814 'title': '1 WEIRD',
815 'duration': 1436.992,
816 'timestamp': 1343185080,
817 'upload_date': '20120725',
818 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$'
819 },
820 }],
821 }]
822
823 def _real_extract(self, url):
824 ss_id = self._match_id(url)
825 webpage = self._download_webpage(url, ss_id)
826 metainfo = traverse_obj(
827 self._search_json(r'<script[^>]+type="application/ld\+json"[^>]*>', webpage, 'info', ss_id),
828 ('itemListElement', ..., {
829 'title': ('name', {str}),
830 'description': ('description', {str}),
831 }), get_all=False)
832
833 return self.playlist_result(self._get_episodes_from_season(ss_id, url), ss_id, **metainfo)
834
835
836 class BilibiliCheeseBaseIE(BilibiliBaseIE):
837 _HEADERS = {'Referer': 'https://www.bilibili.com/'}
838
839 def _extract_episode(self, season_info, ep_id):
840 episode_info = traverse_obj(season_info, (
841 'episodes', lambda _, v: v['id'] == int(ep_id)), get_all=False)
842 aid, cid = episode_info['aid'], episode_info['cid']
843
844 if traverse_obj(episode_info, 'ep_status') == -1:
845 raise ExtractorError('This course episode is not yet available.', expected=True)
846 if not traverse_obj(episode_info, 'playable'):
847 self.raise_login_required('You need to purchase the course to download this episode')
848
849 play_info = self._download_json(
850 'https://api.bilibili.com/pugv/player/web/playurl', ep_id,
851 query={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1},
852 headers=self._HEADERS, note='Downloading playinfo')['data']
853
854 return {
855 'id': str_or_none(ep_id),
856 'episode_id': str_or_none(ep_id),
857 'formats': self.extract_formats(play_info),
858 'extractor_key': BilibiliCheeseIE.ie_key(),
859 'extractor': BilibiliCheeseIE.IE_NAME,
860 'webpage_url': f'https://www.bilibili.com/cheese/play/ep{ep_id}',
861 **traverse_obj(episode_info, {
862 'episode': ('title', {str}),
863 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)},
864 'alt_title': ('subtitle', {str}),
865 'duration': ('duration', {int_or_none}),
866 'episode_number': ('index', {int_or_none}),
867 'thumbnail': ('cover', {url_or_none}),
868 'timestamp': ('release_date', {int_or_none}),
869 'view_count': ('play', {int_or_none}),
870 }),
871 **traverse_obj(season_info, {
872 'uploader': ('up_info', 'uname', {str}),
873 'uploader_id': ('up_info', 'mid', {str_or_none}),
874 }),
875 'subtitles': self.extract_subtitles(ep_id, cid, aid=aid),
876 '__post_extractor': self.extract_comments(aid),
877 'http_headers': self._HEADERS,
878 }
879
880 def _download_season_info(self, query_key, video_id):
881 return self._download_json(
882 f'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id,
883 headers=self._HEADERS, note='Downloading season info')['data']
884
885
886 class BilibiliCheeseIE(BilibiliCheeseBaseIE):
887 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
888 _TESTS = [{
889 'url': 'https://www.bilibili.com/cheese/play/ep229832',
890 'info_dict': {
891 'id': '229832',
892 'ext': 'mp4',
893 'title': '1 - 课程先导片',
894 'alt_title': '视频课 · 3分41秒',
895 'uploader': '马督工',
896 'uploader_id': '316568752',
897 'episode': '课程先导片',
898 'episode_id': '229832',
899 'episode_number': 1,
900 'duration': 221,
901 'timestamp': 1695549606,
902 'upload_date': '20230924',
903 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
904 'view_count': int,
905 }
906 }]
907
908 def _real_extract(self, url):
909 ep_id = self._match_id(url)
910 return self._extract_episode(self._download_season_info('ep_id', ep_id), ep_id)
911
912
913 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE):
914 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
915 _TESTS = [{
916 'url': 'https://www.bilibili.com/cheese/play/ss5918',
917 'info_dict': {
918 'id': '5918',
919 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
920 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
921 },
922 'playlist': [{
923 'info_dict': {
924 'id': '229832',
925 'ext': 'mp4',
926 'title': '1 - 课程先导片',
927 'alt_title': '视频课 · 3分41秒',
928 'uploader': '马督工',
929 'uploader_id': '316568752',
930 'episode': '课程先导片',
931 'episode_id': '229832',
932 'episode_number': 1,
933 'duration': 221,
934 'timestamp': 1695549606,
935 'upload_date': '20230924',
936 'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
937 'view_count': int,
938 }
939 }],
940 'params': {'playlist_items': '1'},
941 }, {
942 'url': 'https://www.bilibili.com/cheese/play/ss5918',
943 'info_dict': {
944 'id': '5918',
945 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
946 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
947 },
948 'playlist_mincount': 5,
949 'skip': 'paid video in list',
950 }]
951
952 def _get_cheese_entries(self, season_info):
953 for ep_id in traverse_obj(season_info, ('episodes', lambda _, v: v['episode_can_view'], 'id')):
954 yield self._extract_episode(season_info, ep_id)
955
956 def _real_extract(self, url):
957 season_id = self._match_id(url)
958 season_info = self._download_season_info('season_id', season_id)
959
960 return self.playlist_result(
961 self._get_cheese_entries(season_info), season_id,
962 **traverse_obj(season_info, {
963 'title': ('title', {str}),
964 'description': ('subtitle', {str}),
965 }))
966
967
968 class BilibiliSpaceBaseIE(InfoExtractor):
969 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
970 first_page = fetch_page(0)
971 metadata = get_metadata(first_page)
972
973 paged_list = InAdvancePagedList(
974 lambda idx: get_entries(fetch_page(idx) if idx else first_page),
975 metadata['page_count'], metadata['page_size'])
976
977 return metadata, paged_list
978
979
980 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE):
981 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
982 _TESTS = [{
983 'url': 'https://space.bilibili.com/3985676/video',
984 'info_dict': {
985 'id': '3985676',
986 },
987 'playlist_mincount': 178,
988 }, {
989 'url': 'https://space.bilibili.com/313580179/video',
990 'info_dict': {
991 'id': '313580179',
992 },
993 'playlist_mincount': 92,
994 }]
995
996 def _extract_signature(self, playlist_id):
997 session_data = self._download_json('https://api.bilibili.com/x/web-interface/nav', playlist_id, fatal=False)
998
999 key_from_url = lambda x: x[x.rfind('/') + 1:].split('.')[0]
1000 img_key = traverse_obj(
1001 session_data, ('data', 'wbi_img', 'img_url', {key_from_url})) or '34478ba821254d9d93542680e3b86100'
1002 sub_key = traverse_obj(
1003 session_data, ('data', 'wbi_img', 'sub_url', {key_from_url})) or '7e16a90d190a4355a78fd00b32a38de6'
1004
1005 session_key = img_key + sub_key
1006
1007 signature_values = []
1008 for position in (
1009 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1010 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1011 57, 62, 11, 36, 20, 34, 44, 52
1012 ):
1013 char_at_position = try_call(lambda: session_key[position])
1014 if char_at_position:
1015 signature_values.append(char_at_position)
1016
1017 return ''.join(signature_values)[:32]
1018
1019 def _real_extract(self, url):
1020 playlist_id, is_video_url = self._match_valid_url(url).group('id', 'video')
1021 if not is_video_url:
1022 self.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1023 'To download audios, add a "/audio" to the URL')
1024
1025 signature = self._extract_signature(playlist_id)
1026
1027 def fetch_page(page_idx):
1028 query = {
1029 'keyword': '',
1030 'mid': playlist_id,
1031 'order': 'pubdate',
1032 'order_avoided': 'true',
1033 'platform': 'web',
1034 'pn': page_idx + 1,
1035 'ps': 30,
1036 'tid': 0,
1037 'web_location': 1550101,
1038 'wts': int(time.time()),
1039 }
1040 query['w_rid'] = hashlib.md5(f'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1041
1042 try:
1043 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
1044 playlist_id, note=f'Downloading page {page_idx}', query=query)
1045 except ExtractorError as e:
1046 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
1047 raise ExtractorError(
1048 'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
1049 raise
1050 if response['code'] == -401:
1051 raise ExtractorError(
1052 'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
1053 return response['data']
1054
1055 def get_metadata(page_data):
1056 page_size = page_data['page']['ps']
1057 entry_count = page_data['page']['count']
1058 return {
1059 'page_count': math.ceil(entry_count / page_size),
1060 'page_size': page_size,
1061 }
1062
1063 def get_entries(page_data):
1064 for entry in traverse_obj(page_data, ('list', 'vlist')) or []:
1065 yield self.url_result(f'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE, entry['bvid'])
1066
1067 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1068 return self.playlist_result(paged_list, playlist_id)
1069
1070
1071 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE):
1072 _VALID_URL = r'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1073 _TESTS = [{
1074 'url': 'https://space.bilibili.com/313580179/audio',
1075 'info_dict': {
1076 'id': '313580179',
1077 },
1078 'playlist_mincount': 1,
1079 }]
1080
1081 def _real_extract(self, url):
1082 playlist_id = self._match_id(url)
1083
1084 def fetch_page(page_idx):
1085 return self._download_json(
1086 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id,
1087 note=f'Downloading page {page_idx}',
1088 query={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'})['data']
1089
1090 def get_metadata(page_data):
1091 return {
1092 'page_count': page_data['pageCount'],
1093 'page_size': page_data['pageSize'],
1094 }
1095
1096 def get_entries(page_data):
1097 for entry in page_data.get('data', []):
1098 yield self.url_result(f'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE, entry['id'])
1099
1100 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1101 return self.playlist_result(paged_list, playlist_id)
1102
1103
1104 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE):
1105 def _get_entries(self, page_data, bvid_keys, ending_key='bvid'):
1106 for bvid in traverse_obj(page_data, (*variadic(bvid_keys, (str, bytes, dict, set)), ..., ending_key, {str})):
1107 yield self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE, bvid)
1108
1109 def _get_uploader(self, uid, playlist_id):
1110 webpage = self._download_webpage(f'https://space.bilibili.com/{uid}', playlist_id, fatal=False)
1111 return self._search_regex(r'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage, 'uploader', fatal=False)
1112
1113 def _extract_playlist(self, fetch_page, get_metadata, get_entries):
1114 metadata, page_list = super()._extract_playlist(fetch_page, get_metadata, get_entries)
1115 metadata.pop('page_count', None)
1116 metadata.pop('page_size', None)
1117 return metadata, page_list
1118
1119
1120 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE):
1121 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1122 _TESTS = [{
1123 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1124 'info_dict': {
1125 'id': '2142762_57445',
1126 'title': '【完结】《底特律 变人》全结局流程解说',
1127 'description': '',
1128 'uploader': '老戴在此',
1129 'uploader_id': '2142762',
1130 'timestamp': int,
1131 'upload_date': str,
1132 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1133 },
1134 'playlist_mincount': 31,
1135 }]
1136
1137 def _real_extract(self, url):
1138 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1139 playlist_id = f'{mid}_{sid}'
1140
1141 def fetch_page(page_idx):
1142 return self._download_json(
1143 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1144 playlist_id, note=f'Downloading page {page_idx}',
1145 query={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30})['data']
1146
1147 def get_metadata(page_data):
1148 page_size = page_data['page']['page_size']
1149 entry_count = page_data['page']['total']
1150 return {
1151 'page_count': math.ceil(entry_count / page_size),
1152 'page_size': page_size,
1153 'uploader': self._get_uploader(mid, playlist_id),
1154 **traverse_obj(page_data, {
1155 'title': ('meta', 'name', {str}),
1156 'description': ('meta', 'description', {str}),
1157 'uploader_id': ('meta', 'mid', {str_or_none}),
1158 'timestamp': ('meta', 'ptime', {int_or_none}),
1159 'thumbnail': ('meta', 'cover', {url_or_none}),
1160 })
1161 }
1162
1163 def get_entries(page_data):
1164 return self._get_entries(page_data, 'archives')
1165
1166 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1167 return self.playlist_result(paged_list, playlist_id, **metadata)
1168
1169
1170 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE):
1171 _VALID_URL = r'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1172 _TESTS = [{
1173 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1174 'info_dict': {
1175 'id': '1958703906_547718',
1176 'title': '直播回放',
1177 'description': '直播回放',
1178 'uploader': '靡烟miya',
1179 'uploader_id': '1958703906',
1180 'timestamp': 1637985853,
1181 'upload_date': '20211127',
1182 'modified_timestamp': int,
1183 'modified_date': str,
1184 },
1185 'playlist_mincount': 513,
1186 }]
1187
1188 def _real_extract(self, url):
1189 mid, sid = self._match_valid_url(url).group('mid', 'sid')
1190 playlist_id = f'{mid}_{sid}'
1191 playlist_meta = traverse_obj(self._download_json(
1192 f'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id, fatal=False
1193 ), {
1194 'title': ('data', 'meta', 'name', {str}),
1195 'description': ('data', 'meta', 'description', {str}),
1196 'uploader_id': ('data', 'meta', 'mid', {str_or_none}),
1197 'timestamp': ('data', 'meta', 'ctime', {int_or_none}),
1198 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}),
1199 })
1200
1201 def fetch_page(page_idx):
1202 return self._download_json(
1203 'https://api.bilibili.com/x/series/archives',
1204 playlist_id, note=f'Downloading page {page_idx}',
1205 query={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30})['data']
1206
1207 def get_metadata(page_data):
1208 page_size = page_data['page']['size']
1209 entry_count = page_data['page']['total']
1210 return {
1211 'page_count': math.ceil(entry_count / page_size),
1212 'page_size': page_size,
1213 'uploader': self._get_uploader(mid, playlist_id),
1214 **playlist_meta
1215 }
1216
1217 def get_entries(page_data):
1218 return self._get_entries(page_data, 'archives')
1219
1220 metadata, paged_list = self._extract_playlist(fetch_page, get_metadata, get_entries)
1221 return self.playlist_result(paged_list, playlist_id, **metadata)
1222
1223
1224 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE):
1225 _VALID_URL = r'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1226 _TESTS = [{
1227 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1228 'info_dict': {
1229 'id': '1103407912',
1230 'title': '【V2】(旧)',
1231 'description': '',
1232 'uploader': '晓月春日',
1233 'uploader_id': '84912',
1234 'timestamp': 1604905176,
1235 'upload_date': '20201109',
1236 'modified_timestamp': int,
1237 'modified_date': str,
1238 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1239 'view_count': int,
1240 'like_count': int,
1241 },
1242 'playlist_mincount': 22,
1243 }, {
1244 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1245 'only_matching': True,
1246 }]
1247
1248 def _real_extract(self, url):
1249 fid = self._match_id(url)
1250
1251 list_info = self._download_json(
1252 f'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1253 fid, note='Downloading favlist metadata')
1254 if list_info['code'] == -403:
1255 self.raise_login_required(msg='This is a private favorites list. You need to log in as its owner')
1256
1257 entries = self._get_entries(self._download_json(
1258 f'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1259 fid, note='Download favlist entries'), 'data')
1260
1261 return self.playlist_result(entries, fid, **traverse_obj(list_info, ('data', 'info', {
1262 'title': ('title', {str}),
1263 'description': ('intro', {str}),
1264 'uploader': ('upper', 'name', {str}),
1265 'uploader_id': ('upper', 'mid', {str_or_none}),
1266 'timestamp': ('ctime', {int_or_none}),
1267 'modified_timestamp': ('mtime', {int_or_none}),
1268 'thumbnail': ('cover', {url_or_none}),
1269 'view_count': ('cnt_info', 'play', {int_or_none}),
1270 'like_count': ('cnt_info', 'thumb_up', {int_or_none}),
1271 })))
1272
1273
1274 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE):
1275 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1276 _TESTS = [{
1277 'url': 'https://www.bilibili.com/watchlater/#/list',
1278 'info_dict': {'id': 'watchlater'},
1279 'playlist_mincount': 0,
1280 'skip': 'login required',
1281 }]
1282
1283 def _real_extract(self, url):
1284 list_id = getattr(self._get_cookies(url).get('DedeUserID'), 'value', 'watchlater')
1285 watchlater_info = self._download_json(
1286 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id)
1287 if watchlater_info['code'] == -101:
1288 self.raise_login_required(msg='You need to login to access your watchlater list')
1289 entries = self._get_entries(watchlater_info, ('data', 'list'))
1290 return self.playlist_result(entries, id=list_id, title='稍后再看')
1291
1292
1293 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
1294 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1295 _TESTS = [{
1296 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1297 'info_dict': {
1298 'id': '5_547718',
1299 'title': '直播回放',
1300 'uploader': '靡烟miya',
1301 'uploader_id': '1958703906',
1302 'timestamp': 1637985853,
1303 'upload_date': '20211127',
1304 },
1305 'playlist_mincount': 513,
1306 }, {
1307 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1308 'info_dict': {
1309 'id': '5_547718',
1310 },
1311 'playlist_mincount': 513,
1312 'skip': 'redirect url',
1313 }, {
1314 'url': 'https://www.bilibili.com/list/ml1103407912',
1315 'info_dict': {
1316 'id': '3_1103407912',
1317 'title': '【V2】(旧)',
1318 'uploader': '晓月春日',
1319 'uploader_id': '84912',
1320 'timestamp': 1604905176,
1321 'upload_date': '20201109',
1322 'thumbnail': r"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1323 },
1324 'playlist_mincount': 22,
1325 }, {
1326 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1327 'info_dict': {
1328 'id': '3_1103407912',
1329 },
1330 'playlist_mincount': 22,
1331 'skip': 'redirect url',
1332 }, {
1333 'url': 'https://www.bilibili.com/list/watchlater',
1334 'info_dict': {'id': 'watchlater'},
1335 'playlist_mincount': 0,
1336 'skip': 'login required',
1337 }, {
1338 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1339 'info_dict': {'id': 'watchlater'},
1340 'playlist_mincount': 0,
1341 'skip': 'login required',
1342 }]
1343
1344 def _extract_medialist(self, query, list_id):
1345 for page_num in itertools.count(1):
1346 page_data = self._download_json(
1347 'https://api.bilibili.com/x/v2/medialist/resource/list',
1348 list_id, query=query, note=f'getting playlist {query["biz_id"]} page {page_num}'
1349 )['data']
1350 yield from self._get_entries(page_data, 'media_list', ending_key='bv_id')
1351 query['oid'] = traverse_obj(page_data, ('media_list', -1, 'id'))
1352 if not page_data.get('has_more', False):
1353 break
1354
1355 def _real_extract(self, url):
1356 list_id = self._match_id(url)
1357 webpage = self._download_webpage(url, list_id)
1358 initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
1359 if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
1360 error_code = traverse_obj(initial_state, ('error', 'trueCode', {int_or_none}))
1361 error_message = traverse_obj(initial_state, ('error', 'message', {str_or_none}))
1362 if error_code == -400 and list_id == 'watchlater':
1363 self.raise_login_required('You need to login to access your watchlater playlist')
1364 elif error_code == -403:
1365 self.raise_login_required('This is a private playlist. You need to login as its owner')
1366 elif error_code == 11010:
1367 raise ExtractorError('Playlist is no longer available', expected=True)
1368 raise ExtractorError(f'Could not access playlist: {error_code} {error_message}')
1369
1370 query = {
1371 'ps': 20,
1372 'with_current': False,
1373 **traverse_obj(initial_state, {
1374 'type': ('playlist', 'type', {int_or_none}),
1375 'biz_id': ('playlist', 'id', {int_or_none}),
1376 'tid': ('tid', {int_or_none}),
1377 'sort_field': ('sortFiled', {int_or_none}),
1378 'desc': ('desc', {bool_or_none}, {str_or_none}, {str.lower}),
1379 })
1380 }
1381 metadata = {
1382 'id': f'{query["type"]}_{query["biz_id"]}',
1383 **traverse_obj(initial_state, ('mediaListInfo', {
1384 'title': ('title', {str}),
1385 'uploader': ('upper', 'name', {str}),
1386 'uploader_id': ('upper', 'mid', {str_or_none}),
1387 'timestamp': ('ctime', {int_or_none}),
1388 'thumbnail': ('cover', {url_or_none}),
1389 })),
1390 }
1391 return self.playlist_result(self._extract_medialist(query, list_id), **metadata)
1392
1393
1394 class BilibiliCategoryIE(InfoExtractor):
1395 IE_NAME = 'Bilibili category extractor'
1396 _MAX_RESULTS = 1000000
1397 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1398 _TESTS = [{
1399 'url': 'https://www.bilibili.com/v/kichiku/mad',
1400 'info_dict': {
1401 'id': 'kichiku: mad',
1402 'title': 'kichiku: mad'
1403 },
1404 'playlist_mincount': 45,
1405 'params': {
1406 'playlistend': 45
1407 }
1408 }]
1409
1410 def _fetch_page(self, api_url, num_pages, query, page_num):
1411 parsed_json = self._download_json(
1412 api_url, query, query={'Search_key': query, 'pn': page_num},
1413 note='Extracting results from page %s of %s' % (page_num, num_pages))
1414
1415 video_list = traverse_obj(parsed_json, ('data', 'archives'), expected_type=list)
1416 if not video_list:
1417 raise ExtractorError('Failed to retrieve video list for page %d' % page_num)
1418
1419 for video in video_list:
1420 yield self.url_result(
1421 'https://www.bilibili.com/video/%s' % video['bvid'], 'BiliBili', video['bvid'])
1422
1423 def _entries(self, category, subcategory, query):
1424 # map of categories : subcategories : RIDs
1425 rid_map = {
1426 'kichiku': {
1427 'mad': 26,
1428 'manual_vocaloid': 126,
1429 'guide': 22,
1430 'theatre': 216,
1431 'course': 127
1432 },
1433 }
1434
1435 if category not in rid_map:
1436 raise ExtractorError(
1437 f'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1438 if subcategory not in rid_map[category]:
1439 raise ExtractorError(
1440 f'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1441 rid_value = rid_map[category][subcategory]
1442
1443 api_url = 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1444 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
1445 page_data = traverse_obj(page_json, ('data', 'page'), expected_type=dict)
1446 count, size = int_or_none(page_data.get('count')), int_or_none(page_data.get('size'))
1447 if count is None or not size:
1448 raise ExtractorError('Failed to calculate either page count or size')
1449
1450 num_pages = math.ceil(count / size)
1451
1452 return OnDemandPagedList(functools.partial(
1453 self._fetch_page, api_url, num_pages, query), size)
1454
1455 def _real_extract(self, url):
1456 category, subcategory = urllib.parse.urlparse(url).path.split('/')[2:4]
1457 query = '%s: %s' % (category, subcategory)
1458
1459 return self.playlist_result(self._entries(category, subcategory, query), query, query)
1460
1461
1462 class BiliBiliSearchIE(SearchInfoExtractor):
1463 IE_DESC = 'Bilibili video search'
1464 _MAX_RESULTS = 100000
1465 _SEARCH_KEY = 'bilisearch'
1466
1467 def _search_results(self, query):
1468 for page_num in itertools.count(1):
1469 videos = self._download_json(
1470 'https://api.bilibili.com/x/web-interface/search/type', query,
1471 note=f'Extracting results from page {page_num}', query={
1472 'Search_key': query,
1473 'keyword': query,
1474 'page': page_num,
1475 'context': '',
1476 'duration': 0,
1477 'tids_2': '',
1478 '__refresh__': 'true',
1479 'search_type': 'video',
1480 'tids': 0,
1481 'highlight': 1,
1482 })['data'].get('result')
1483 if not videos:
1484 break
1485 for video in videos:
1486 yield self.url_result(video['arcurl'], 'BiliBili', str(video['aid']))
1487
1488
1489 class BilibiliAudioBaseIE(InfoExtractor):
1490 def _call_api(self, path, sid, query=None):
1491 if not query:
1492 query = {'sid': sid}
1493 return self._download_json(
1494 'https://www.bilibili.com/audio/music-service-c/web/' + path,
1495 sid, query=query)['data']
1496
1497
1498 class BilibiliAudioIE(BilibiliAudioBaseIE):
1499 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1500 _TEST = {
1501 'url': 'https://www.bilibili.com/audio/au1003142',
1502 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1503 'info_dict': {
1504 'id': '1003142',
1505 'ext': 'm4a',
1506 'title': '【tsukimi】YELLOW / 神山羊',
1507 'artist': 'tsukimi',
1508 'comment_count': int,
1509 'description': 'YELLOW的mp3版!',
1510 'duration': 183,
1511 'subtitles': {
1512 'origin': [{
1513 'ext': 'lrc',
1514 }],
1515 },
1516 'thumbnail': r're:^https?://.+\.jpg',
1517 'timestamp': 1564836614,
1518 'upload_date': '20190803',
1519 'uploader': 'tsukimi-つきみぐー',
1520 'view_count': int,
1521 },
1522 }
1523
1524 def _real_extract(self, url):
1525 au_id = self._match_id(url)
1526
1527 play_data = self._call_api('url', au_id)
1528 formats = [{
1529 'url': play_data['cdns'][0],
1530 'filesize': int_or_none(play_data.get('size')),
1531 'vcodec': 'none'
1532 }]
1533
1534 for a_format in formats:
1535 a_format.setdefault('http_headers', {}).update({
1536 'Referer': url,
1537 })
1538
1539 song = self._call_api('song/info', au_id)
1540 title = song['title']
1541 statistic = song.get('statistic') or {}
1542
1543 subtitles = None
1544 lyric = song.get('lyric')
1545 if lyric:
1546 subtitles = {
1547 'origin': [{
1548 'url': lyric,
1549 }]
1550 }
1551
1552 return {
1553 'id': au_id,
1554 'title': title,
1555 'formats': formats,
1556 'artist': song.get('author'),
1557 'comment_count': int_or_none(statistic.get('comment')),
1558 'description': song.get('intro'),
1559 'duration': int_or_none(song.get('duration')),
1560 'subtitles': subtitles,
1561 'thumbnail': song.get('cover'),
1562 'timestamp': int_or_none(song.get('passtime')),
1563 'uploader': song.get('uname'),
1564 'view_count': int_or_none(statistic.get('play')),
1565 }
1566
1567
1568 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
1569 _VALID_URL = r'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1570 _TEST = {
1571 'url': 'https://www.bilibili.com/audio/am10624',
1572 'info_dict': {
1573 'id': '10624',
1574 'title': '每日新曲推荐(每日11:00更新)',
1575 'description': '每天11:00更新,为你推送最新音乐',
1576 },
1577 'playlist_count': 19,
1578 }
1579
1580 def _real_extract(self, url):
1581 am_id = self._match_id(url)
1582
1583 songs = self._call_api(
1584 'song/of-menu', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data']
1585
1586 entries = []
1587 for song in songs:
1588 sid = str_or_none(song.get('id'))
1589 if not sid:
1590 continue
1591 entries.append(self.url_result(
1592 'https://www.bilibili.com/audio/au' + sid,
1593 BilibiliAudioIE.ie_key(), sid))
1594
1595 if entries:
1596 album_data = self._call_api('menu/info', am_id) or {}
1597 album_title = album_data.get('title')
1598 if album_title:
1599 for entry in entries:
1600 entry['album'] = album_title
1601 return self.playlist_result(
1602 entries, am_id, album_title, album_data.get('intro'))
1603
1604 return self.playlist_result(entries, am_id)
1605
1606
1607 class BiliBiliPlayerIE(InfoExtractor):
1608 _VALID_URL = r'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1609 _TEST = {
1610 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1611 'only_matching': True,
1612 }
1613
1614 def _real_extract(self, url):
1615 video_id = self._match_id(url)
1616 return self.url_result(
1617 'http://www.bilibili.tv/video/av%s/' % video_id,
1618 ie=BiliBiliIE.ie_key(), video_id=video_id)
1619
1620
1621 class BiliIntlBaseIE(InfoExtractor):
1622 _API_URL = 'https://api.bilibili.tv/intl/gateway'
1623 _NETRC_MACHINE = 'biliintl'
1624
1625 def _call_api(self, endpoint, *args, **kwargs):
1626 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
1627 if json.get('code'):
1628 if json['code'] in (10004004, 10004005, 10023006):
1629 self.raise_login_required()
1630 elif json['code'] == 10004001:
1631 self.raise_geo_restricted()
1632 else:
1633 if json.get('message') and str(json['code']) != json['message']:
1634 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1635 else:
1636 errmsg = kwargs.get('errnote', 'Unable to download JSON metadata')
1637 if kwargs.get('fatal'):
1638 raise ExtractorError(errmsg)
1639 else:
1640 self.report_warning(errmsg)
1641 return json.get('data')
1642
1643 def json2srt(self, json):
1644 data = '\n\n'.join(
1645 f'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1646 for i, line in enumerate(traverse_obj(json, (
1647 'body', lambda _, l: l['content'] and l['from'] and l['to']))))
1648 return data
1649
1650 def _get_subtitles(self, *, ep_id=None, aid=None):
1651 sub_json = self._call_api(
1652 '/web/v2/subtitle', ep_id or aid, fatal=False,
1653 note='Downloading subtitles list', errnote='Unable to download subtitles list',
1654 query=filter_dict({
1655 'platform': 'web',
1656 's_locale': 'en_US',
1657 'episode_id': ep_id,
1658 'aid': aid,
1659 })) or {}
1660 subtitles = {}
1661 for sub in sub_json.get('subtitles') or []:
1662 sub_url = sub.get('url')
1663 if not sub_url:
1664 continue
1665 sub_data = self._download_json(
1666 sub_url, ep_id or aid, errnote='Unable to download subtitles', fatal=False,
1667 note='Downloading subtitles%s' % f' for {sub["lang"]}' if sub.get('lang') else '')
1668 if not sub_data:
1669 continue
1670 subtitles.setdefault(sub.get('lang_key', 'en'), []).append({
1671 'ext': 'srt',
1672 'data': self.json2srt(sub_data)
1673 })
1674 return subtitles
1675
1676 def _get_formats(self, *, ep_id=None, aid=None):
1677 video_json = self._call_api(
1678 '/web/playurl', ep_id or aid, note='Downloading video formats',
1679 errnote='Unable to download video formats', query=filter_dict({
1680 'platform': 'web',
1681 'ep_id': ep_id,
1682 'aid': aid,
1683 }))
1684 video_json = video_json['playurl']
1685 formats = []
1686 for vid in video_json.get('video') or []:
1687 video_res = vid.get('video_resource') or {}
1688 video_info = vid.get('stream_info') or {}
1689 if not video_res.get('url'):
1690 continue
1691 formats.append({
1692 'url': video_res['url'],
1693 'ext': 'mp4',
1694 'format_note': video_info.get('desc_words'),
1695 'width': video_res.get('width'),
1696 'height': video_res.get('height'),
1697 'vbr': video_res.get('bandwidth'),
1698 'acodec': 'none',
1699 'vcodec': video_res.get('codecs'),
1700 'filesize': video_res.get('size'),
1701 })
1702 for aud in video_json.get('audio_resource') or []:
1703 if not aud.get('url'):
1704 continue
1705 formats.append({
1706 'url': aud['url'],
1707 'ext': 'mp4',
1708 'abr': aud.get('bandwidth'),
1709 'acodec': aud.get('codecs'),
1710 'vcodec': 'none',
1711 'filesize': aud.get('size'),
1712 })
1713
1714 return formats
1715
1716 def _parse_video_metadata(self, video_data):
1717 return {
1718 'title': video_data.get('title_display') or video_data.get('title'),
1719 'thumbnail': video_data.get('cover'),
1720 'episode_number': int_or_none(self._search_regex(
1721 r'^E(\d+)(?:$| - )', video_data.get('title_display') or '', 'episode number', default=None)),
1722 }
1723
1724 def _perform_login(self, username, password):
1725 if not Cryptodome.RSA:
1726 raise ExtractorError('pycryptodomex not found. Please install', expected=True)
1727
1728 key_data = self._download_json(
1729 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1730 note='Downloading login key', errnote='Unable to download login key')['data']
1731
1732 public_key = Cryptodome.RSA.importKey(key_data['key'])
1733 password_hash = Cryptodome.PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf-8'))
1734 login_post = self._download_json(
1735 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data=urlencode_postdata({
1736 'username': username,
1737 'password': base64.b64encode(password_hash).decode('ascii'),
1738 'keep_me': 'true',
1739 's_locale': 'en_US',
1740 'isTrusted': 'true'
1741 }), note='Logging in', errnote='Unable to log in')
1742 if login_post.get('code'):
1743 if login_post.get('message'):
1744 raise ExtractorError(f'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected=True)
1745 else:
1746 raise ExtractorError('Unable to log in')
1747
1748
1749 class BiliIntlIE(BiliIntlBaseIE):
1750 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1751 _TESTS = [{
1752 # Bstation page
1753 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1754 'info_dict': {
1755 'id': '341736',
1756 'ext': 'mp4',
1757 'title': 'E2 - The First Night',
1758 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1759 'episode_number': 2,
1760 'upload_date': '20201009',
1761 'episode': 'Episode 2',
1762 'timestamp': 1602259500,
1763 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1764 'chapters': [{
1765 'start_time': 0,
1766 'end_time': 76.242,
1767 'title': '<Untitled Chapter 1>'
1768 }, {
1769 'start_time': 76.242,
1770 'end_time': 161.161,
1771 'title': 'Intro'
1772 }, {
1773 'start_time': 1325.742,
1774 'end_time': 1403.903,
1775 'title': 'Outro'
1776 }],
1777 }
1778 }, {
1779 # Non-Bstation page
1780 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1781 'info_dict': {
1782 'id': '11005006',
1783 'ext': 'mp4',
1784 'title': 'E3 - Who?',
1785 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1786 'episode_number': 3,
1787 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1788 'episode': 'Episode 3',
1789 'upload_date': '20211219',
1790 'timestamp': 1639928700,
1791 'chapters': [{
1792 'start_time': 0,
1793 'end_time': 88.0,
1794 'title': '<Untitled Chapter 1>'
1795 }, {
1796 'start_time': 88.0,
1797 'end_time': 156.0,
1798 'title': 'Intro'
1799 }, {
1800 'start_time': 1173.0,
1801 'end_time': 1259.535,
1802 'title': 'Outro'
1803 }],
1804 }
1805 }, {
1806 # Subtitle with empty content
1807 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1808 'info_dict': {
1809 'id': '10131790',
1810 'ext': 'mp4',
1811 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1812 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1813 'episode_number': 140,
1814 },
1815 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1816 }, {
1817 'url': 'https://www.bilibili.tv/en/video/2041863208',
1818 'info_dict': {
1819 'id': '2041863208',
1820 'ext': 'mp4',
1821 'timestamp': 1670874843,
1822 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1823 'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1824 'upload_date': '20221212',
1825 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1826 },
1827 }, {
1828 # episode comment extraction
1829 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1830 'info_dict': {
1831 'id': '340317',
1832 'ext': 'mp4',
1833 'timestamp': 1604057820,
1834 'upload_date': '20201030',
1835 'episode_number': 5,
1836 'title': 'E5 - My Own Steel',
1837 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1838 'thumbnail': r're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1839 'episode': 'Episode 5',
1840 'comment_count': int,
1841 'chapters': [{
1842 'start_time': 0,
1843 'end_time': 61.0,
1844 'title': '<Untitled Chapter 1>'
1845 }, {
1846 'start_time': 61.0,
1847 'end_time': 134.0,
1848 'title': 'Intro'
1849 }, {
1850 'start_time': 1290.0,
1851 'end_time': 1379.0,
1852 'title': 'Outro'
1853 }],
1854 },
1855 'params': {
1856 'getcomments': True
1857 }
1858 }, {
1859 # user generated content comment extraction
1860 'url': 'https://www.bilibili.tv/en/video/2045730385',
1861 'info_dict': {
1862 'id': '2045730385',
1863 'ext': 'mp4',
1864 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1865 'timestamp': 1667891924,
1866 'upload_date': '20221108',
1867 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1868 'comment_count': int,
1869 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1870 },
1871 'params': {
1872 'getcomments': True
1873 }
1874 }, {
1875 # episode id without intro and outro
1876 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1877 'info_dict': {
1878 'id': '11246489',
1879 'ext': 'mp4',
1880 'title': 'E1 - Operation \'Strix\' <Owl>',
1881 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1882 'timestamp': 1649516400,
1883 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1884 'episode': 'Episode 1',
1885 'episode_number': 1,
1886 'upload_date': '20220409',
1887 },
1888 }, {
1889 'url': 'https://www.biliintl.com/en/play/34613/341736',
1890 'only_matching': True,
1891 }, {
1892 # User-generated content (as opposed to a series licensed from a studio)
1893 'url': 'https://bilibili.tv/en/video/2019955076',
1894 'only_matching': True,
1895 }, {
1896 # No language in URL
1897 'url': 'https://www.bilibili.tv/video/2019955076',
1898 'only_matching': True,
1899 }, {
1900 # Uppercase language in URL
1901 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1902 'only_matching': True,
1903 }]
1904
1905 def _make_url(video_id, series_id=None):
1906 if series_id:
1907 return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1908 return f'https://www.bilibili.tv/en/video/{video_id}'
1909
1910 def _extract_video_metadata(self, url, video_id, season_id):
1911 url, smuggled_data = unsmuggle_url(url, {})
1912 if smuggled_data.get('title'):
1913 return smuggled_data
1914
1915 webpage = self._download_webpage(url, video_id)
1916 # Bstation layout
1917 initial_data = (
1918 self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
1919 or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
1920 video_data = traverse_obj(
1921 initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
1922
1923 if season_id and not video_data:
1924 # Non-Bstation layout, read through episode list
1925 season_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id)
1926 video_data = traverse_obj(season_json, (
1927 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
1928 ), expected_type=dict, get_all=False)
1929
1930 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1931 return merge_dicts(
1932 self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id, fatal=False), {
1933 'title': self._html_search_meta('og:title', webpage),
1934 'description': self._html_search_meta('og:description', webpage)
1935 })
1936
1937 def _get_comments_reply(self, root_id, next_id=0, display_id=None):
1938 comment_api_raw_data = self._download_json(
1939 'https://api.bilibili.tv/reply/web/detail', display_id,
1940 note=f'Downloading reply comment of {root_id} - {next_id}',
1941 query={
1942 'platform': 'web',
1943 'ps': 20, # comment's reply per page (default: 3)
1944 'root': root_id,
1945 'next': next_id,
1946 })
1947
1948 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1949 yield {
1950 'author': traverse_obj(replies, ('member', 'name')),
1951 'author_id': traverse_obj(replies, ('member', 'mid')),
1952 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1953 'text': traverse_obj(replies, ('content', 'message')),
1954 'id': replies.get('rpid'),
1955 'like_count': int_or_none(replies.get('like_count')),
1956 'parent': replies.get('parent'),
1957 'timestamp': unified_timestamp(replies.get('ctime_text'))
1958 }
1959
1960 if not traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1961 yield from self._get_comments_reply(
1962 root_id, comment_api_raw_data['data']['cursor']['next'], display_id)
1963
1964 def _get_comments(self, video_id, ep_id):
1965 for i in itertools.count(0):
1966 comment_api_raw_data = self._download_json(
1967 'https://api.bilibili.tv/reply/web/root', video_id,
1968 note=f'Downloading comment page {i + 1}',
1969 query={
1970 'platform': 'web',
1971 'pn': i, # page number
1972 'ps': 20, # comment per page (default: 20)
1973 'oid': video_id,
1974 'type': 3 if ep_id else 1, # 1: user generated content, 3: series content
1975 'sort_type': 1, # 1: best, 2: recent
1976 })
1977
1978 for replies in traverse_obj(comment_api_raw_data, ('data', 'replies', ...)):
1979 yield {
1980 'author': traverse_obj(replies, ('member', 'name')),
1981 'author_id': traverse_obj(replies, ('member', 'mid')),
1982 'author_thumbnail': traverse_obj(replies, ('member', 'face')),
1983 'text': traverse_obj(replies, ('content', 'message')),
1984 'id': replies.get('rpid'),
1985 'like_count': int_or_none(replies.get('like_count')),
1986 'timestamp': unified_timestamp(replies.get('ctime_text')),
1987 'author_is_uploader': bool(traverse_obj(replies, ('member', 'type'))),
1988 }
1989 if replies.get('count'):
1990 yield from self._get_comments_reply(replies.get('rpid'), display_id=video_id)
1991
1992 if traverse_obj(comment_api_raw_data, ('data', 'cursor', 'is_end')):
1993 break
1994
1995 def _real_extract(self, url):
1996 season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')
1997 video_id = ep_id or aid
1998 chapters = None
1999
2000 if ep_id:
2001 intro_ending_json = self._call_api(
2002 f'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2003 video_id, fatal=False) or {}
2004 if intro_ending_json.get('skip'):
2005 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2006 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2007 chapters = [{
2008 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_start_time')), 1000),
2009 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'opening_end_time')), 1000),
2010 'title': 'Intro'
2011 }, {
2012 'start_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_start_time')), 1000),
2013 'end_time': float_or_none(traverse_obj(intro_ending_json, ('skip', 'ending_end_time')), 1000),
2014 'title': 'Outro'
2015 }]
2016
2017 return {
2018 'id': video_id,
2019 **self._extract_video_metadata(url, video_id, season_id),
2020 'formats': self._get_formats(ep_id=ep_id, aid=aid),
2021 'subtitles': self.extract_subtitles(ep_id=ep_id, aid=aid),
2022 'chapters': chapters,
2023 '__post_extractor': self.extract_comments(video_id, ep_id)
2024 }
2025
2026
2027 class BiliIntlSeriesIE(BiliIntlBaseIE):
2028 IE_NAME = 'biliIntl:series'
2029 _VALID_URL = r'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2030 _TESTS = [{
2031 'url': 'https://www.bilibili.tv/en/play/34613',
2032 'playlist_mincount': 15,
2033 'info_dict': {
2034 'id': '34613',
2035 'title': 'TONIKAWA: Over the Moon For You',
2036 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2037 'categories': ['Slice of life', 'Comedy', 'Romance'],
2038 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2039 'view_count': int,
2040 },
2041 'params': {
2042 'skip_download': True,
2043 },
2044 }, {
2045 'url': 'https://www.bilibili.tv/en/media/1048837',
2046 'info_dict': {
2047 'id': '1048837',
2048 'title': 'SPY×FAMILY',
2049 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2050 'categories': ['Adventure', 'Action', 'Comedy'],
2051 'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2052 'view_count': int,
2053 },
2054 'playlist_mincount': 25,
2055 }, {
2056 'url': 'https://www.biliintl.com/en/play/34613',
2057 'only_matching': True,
2058 }, {
2059 'url': 'https://www.biliintl.com/EN/play/34613',
2060 'only_matching': True,
2061 }]
2062
2063 def _entries(self, series_id):
2064 series_json = self._call_api(f'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id)
2065 for episode in traverse_obj(series_json, ('sections', ..., 'episodes', ...), expected_type=dict):
2066 episode_id = str(episode['episode_id'])
2067 yield self.url_result(smuggle_url(
2068 BiliIntlIE._make_url(episode_id, series_id),
2069 self._parse_video_metadata(episode)
2070 ), BiliIntlIE, episode_id)
2071
2072 def _real_extract(self, url):
2073 series_id = self._match_id(url)
2074 series_info = self._call_api(f'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id).get('season') or {}
2075 return self.playlist_result(
2076 self._entries(series_id), series_id, series_info.get('title'), series_info.get('description'),
2077 categories=traverse_obj(series_info, ('styles', ..., 'title'), expected_type=str_or_none),
2078 thumbnail=url_or_none(series_info.get('horizontal_cover')), view_count=parse_count(series_info.get('view')))
2079
2080
2081 class BiliLiveIE(InfoExtractor):
2082 _VALID_URL = r'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2083
2084 _TESTS = [{
2085 'url': 'https://live.bilibili.com/196',
2086 'info_dict': {
2087 'id': '33989',
2088 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2089 'ext': 'flv',
2090 'title': "太空狼人杀联动,不被爆杀就算赢",
2091 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2092 'timestamp': 1650802769,
2093 },
2094 'skip': 'not live'
2095 }, {
2096 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2097 'only_matching': True
2098 }, {
2099 'url': 'https://live.bilibili.com/blanc/196',
2100 'only_matching': True
2101 }]
2102
2103 _FORMATS = {
2104 80: {'format_id': 'low', 'format_note': '流畅'},
2105 150: {'format_id': 'high_res', 'format_note': '高清'},
2106 250: {'format_id': 'ultra_high_res', 'format_note': '超清'},
2107 400: {'format_id': 'blue_ray', 'format_note': '蓝光'},
2108 10000: {'format_id': 'source', 'format_note': '原画'},
2109 20000: {'format_id': '4K', 'format_note': '4K'},
2110 30000: {'format_id': 'dolby', 'format_note': '杜比'},
2111 }
2112
2113 _quality = staticmethod(qualities(list(_FORMATS)))
2114
2115 def _call_api(self, path, room_id, query):
2116 api_result = self._download_json(f'https://api.live.bilibili.com/{path}', room_id, query=query)
2117 if api_result.get('code') != 0:
2118 raise ExtractorError(api_result.get('message') or 'Unable to download JSON metadata')
2119 return api_result.get('data') or {}
2120
2121 def _parse_formats(self, qn, fmt):
2122 for codec in fmt.get('codec') or []:
2123 if codec.get('current_qn') != qn:
2124 continue
2125 for url_info in codec['url_info']:
2126 yield {
2127 'url': f'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2128 'ext': fmt.get('format_name'),
2129 'vcodec': codec.get('codec_name'),
2130 'quality': self._quality(qn),
2131 **self._FORMATS[qn],
2132 }
2133
2134 def _real_extract(self, url):
2135 room_id = self._match_id(url)
2136 room_data = self._call_api('room/v1/Room/get_info', room_id, {'id': room_id})
2137 if room_data.get('live_status') == 0:
2138 raise ExtractorError('Streamer is not live', expected=True)
2139
2140 formats = []
2141 for qn in self._FORMATS.keys():
2142 stream_data = self._call_api('xlive/web-room/v2/index/getRoomPlayInfo', room_id, {
2143 'room_id': room_id,
2144 'qn': qn,
2145 'codec': '0,1',
2146 'format': '0,2',
2147 'mask': '0',
2148 'no_playurl': '0',
2149 'platform': 'web',
2150 'protocol': '0,1',
2151 })
2152 for fmt in traverse_obj(stream_data, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2153 formats.extend(self._parse_formats(qn, fmt))
2154
2155 return {
2156 'id': room_id,
2157 'title': room_data.get('title'),
2158 'description': room_data.get('description'),
2159 'thumbnail': room_data.get('user_cover'),
2160 'timestamp': stream_data.get('live_time'),
2161 'formats': formats,
2162 'is_live': True,
2163 'http_headers': {
2164 'Referer': url,
2165 },
2166 }