12 from .common
import InfoExtractor
, SearchInfoExtractor
13 from ..dependencies
import Cryptodome
14 from ..networking
.exceptions
import HTTPError
36 srt_subtitles_timecode
,
48 class BilibiliBaseIE(InfoExtractor
):
49 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
51 def extract_formats(self
, play_info
):
53 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
54 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
57 audios
= traverse_obj(play_info
, ('dash', (None, 'dolby'), 'audio', ..., {dict}
))
58 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
60 audios
.append(flac_audio
)
62 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
64 'acodec': traverse_obj(audio
, ('codecs', {str.lower}
)),
66 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
67 'filesize': int_or_none(audio
.get('size')),
68 'format_id': str_or_none(audio
.get('id')),
69 } for audio
in audios
]
72 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video
.get('width')),
76 'height': int_or_none(video
.get('height')),
77 'vcodec': video
.get('codecs'),
78 'acodec': 'none' if audios
else None,
79 'dynamic_range': {126: 'DV', 125: 'HDR10'}
.get(int_or_none(video
.get('id'))),
80 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
81 'filesize': int_or_none(video
.get('size')),
82 'quality': int_or_none(video
.get('id')),
83 'format_id': traverse_obj(
84 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
85 ('id', {str_or_none}
), get_all
=False),
86 'format': format_names
.get(video
.get('id')),
87 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
89 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
91 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
92 f
'you have to login or become premium member to download them. {self._login_hint()}')
96 def _download_playinfo(self
, video_id
, cid
, headers
=None):
97 return self
._download
_json
(
98 'https://api.bilibili.com/x/player/playurl', video_id
,
99 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
100 note
=f
'Downloading video formats for cid {cid}', headers
=headers
)['data']
102 def json2srt(self
, json_data
):
104 for idx
, line
in enumerate(json_data
.get('body') or []):
105 srt_data
+= (f
'{idx + 1}\n'
106 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f
'{line["content"]}\n\n')
110 def _get_subtitles(self
, video_id
, cid
, aid
=None):
114 'url': f
'https://comment.bilibili.com/{cid}.xml',
118 subtitle_info
= traverse_obj(self
._download
_json
(
119 'https://api.bilibili.com/x/player/v2', video_id
,
120 query
={'aid': aid, 'cid': cid}
if aid
else {'bvid': video_id, 'cid': cid}
,
121 note
=f
'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list
= traverse_obj(subtitle_info
, ('subtitles', lambda _
, v
: v
['subtitle_url'] and v
['lan']))
123 if not subs_list
and traverse_obj(subtitle_info
, 'allow_submit'):
124 if not self
._get
_cookies
('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self
.report_warning(f
'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once
=True)
127 subtitles
.setdefault(s
['lan'], []).append({
129 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
)),
133 def _get_chapters(self
, aid
, cid
):
134 chapters
= aid
and cid
and self
._download
_json
(
135 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
136 note
='Extracting chapters', fatal
=False)
137 return traverse_obj(chapters
, ('data', 'view_points', ..., {
139 'start_time': 'from',
143 def _get_comments(self
, aid
):
144 for idx
in itertools
.count(1):
145 replies
= traverse_obj(
147 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
152 for children
in map(self
._get
_all
_children
, replies
):
155 def _get_all_children(self
, reply
):
157 'author': traverse_obj(reply
, ('member', 'uname')),
158 'author_id': traverse_obj(reply
, ('member', 'mid')),
159 'id': reply
.get('rpid'),
160 'text': traverse_obj(reply
, ('content', 'message')),
161 'timestamp': reply
.get('ctime'),
162 'parent': reply
.get('parent') or 'root',
164 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
167 def _get_episodes_from_season(self
, ss_id
, url
):
168 season_info
= self
._download
_json
(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
170 note
='Downloading season info', query
={'season_id': ss_id}
,
171 headers
={'Referer': url, **self.geo_verification_headers()}
)
173 for entry
in traverse_obj(season_info
, (
174 'result', 'main_section', 'episodes',
175 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
176 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, str_or_none(entry
.get('id')))
178 def _get_divisions(self
, video_id
, graph_version
, edges
, edge_id
, cid_edges
=None):
179 cid_edges
= cid_edges
or {}
180 division_data
= self
._download
_json
(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id
,
182 query
={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id}
,
183 note
=f
'Extracting divisions from edge {edge_id}')
184 edges
.setdefault(edge_id
, {}).update(
185 traverse_obj(division_data
, ('data', 'story_list', lambda _
, v
: v
['edge_id'] == edge_id
, {
186 'title': ('title', {str}
),
187 'cid': ('cid', {int_or_none}
),
190 edges
[edge_id
].update(traverse_obj(division_data
, ('data', {
191 'title': ('title', {str}
),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}
),
194 'cid': ('cid', {int_or_none}
),
195 'text': ('option', {str}
),
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges
.setdefault(edges
[edge_id
]['cid'], {})[edge_id
] = edges
[edge_id
]
200 for choice
in traverse_obj(edges
, (edge_id
, 'choices', ...)):
201 if choice
['edge_id'] not in edges
:
202 edges
[choice
['edge_id']] = {'cid': choice['cid']}
203 self
._get
_divisions
(video_id
, graph_version
, edges
, choice
['edge_id'], cid_edges
=cid_edges
)
206 def _get_interactive_entries(self
, video_id
, cid
, metainfo
):
207 graph_version
= traverse_obj(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id
,
210 'Extracting graph version', query
={'bvid': video_id, 'cid': cid}
),
211 ('data', 'interaction', 'graph_version', {int_or_none}
))
212 cid_edges
= self
._get
_divisions
(video_id
, graph_version
, {1: {'cid': cid}
}, 1)
213 for cid
, edges
in cid_edges
.items():
214 play_info
= self
._download
_playinfo
(video_id
, cid
)
217 'id': f
'{video_id}_{cid}',
218 'title': f
'{metainfo.get("title")} - {next(iter(edges.values())).get("title")}',
219 'formats': self
.extract_formats(play_info
),
220 'description': f
'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
222 'subtitles': self
.extract_subtitles(video_id
, cid
),
226 class BiliBiliIE(BilibiliBaseIE
):
227 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
238 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
248 'note': 'old av URL version',
249 'url': 'http://www.bilibili.com/video/av1074402/',
251 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
258 'upload_date': '20140420',
259 'timestamp': 1397983878,
260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
262 'comment_count': int,
266 'params': {'skip_download': True}
,
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的',
274 'playlist_count': 18,
277 'id': 'BV1bK411W797_p1',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
281 'timestamp': 1589601697,
282 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
286 'comment_count': int,
287 'upload_date': '20200516',
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
297 'id': 'BV1bK411W797_p1',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
301 'timestamp': 1589601697,
302 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
306 'comment_count': int,
307 'upload_date': '20200516',
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
316 'id': 'BV12N4y1M7rh',
318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
322 'upload_date': '20220709',
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
329 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2',
332 'params': {'listsubtitles': True}
,
334 'url': 'https://www.bilibili.com/video/av8903802/',
336 'id': 'BV13x41117TL',
338 'title': '阿滴英文|英文歌分享#6 "Closer',
339 'upload_date': '20170301',
340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
341 'timestamp': 1488353834,
342 'uploader_id': '65880958',
344 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
347 'comment_count': int,
352 'skip_download': True,
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
358 'id': 'BV1vL411G7N7',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
365 'uploader': '爱喝咖啡的当麻',
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
372 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
374 'params': {'skip_download': True}
,
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
379 'id': 'BV1wP4y1P72h',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
387 'uploader_id': '528182630',
390 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
392 'params': {'skip_download': True}
,
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
397 'id': 'BV1ay4y1d77f',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
408 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
410 'params': {'skip_download': True}
,
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
421 'uploader': '钉宫妮妮Ninico',
423 'uploader_id': '8881297',
424 'comment_count': int,
427 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
429 'playlist_count': 33,
432 'id': 'BV1af4y1H7ga_400950101',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
439 'uploader': '钉宫妮妮Ninico',
441 'uploader_id': '8881297',
442 'comment_count': int,
445 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
458 'season_id': '28609',
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
475 'skip': 'supporter-only video',
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
483 'skip': 'login required',
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
491 'skip': 'geo-restricted',
494 def _real_extract(self
, url
):
495 video_id
= self
._match
_id
(url
)
496 headers
= self
.geo_verification_headers()
497 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
, headers
=headers
)
498 if not self
._match
_valid
_url
(urlh
.url
):
499 return self
.url_result(urlh
.url
)
501 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
503 is_festival
= 'videoData' not in initial_state
505 video_data
= initial_state
['videoInfo']
507 play_info_obj
= self
._search
_json
(
508 r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
, fatal
=False)
509 if not play_info_obj
:
510 if traverse_obj(initial_state
, ('error', 'trueCode')) == -403:
511 self
.raise_login_required()
512 if traverse_obj(initial_state
, ('error', 'trueCode')) == -404:
513 raise ExtractorError(
514 'This video may be deleted or geo-restricted. '
515 'You might want to try a VPN or a proxy server (with --proxy)', expected
=True)
516 play_info
= traverse_obj(play_info_obj
, ('data', {dict}
))
518 if traverse_obj(play_info_obj
, 'code') == 87007:
519 toast
= get_element_by_class('tips-toast', webpage
) or ''
521 f
'{get_element_by_class("belongs-to", toast) or ""},'
522 + (get_element_by_class('level', toast
) or ''))
523 raise ExtractorError(
524 f
'This is a supporter-only video: {msg}. {self._login_hint()}', expected
=True)
525 raise ExtractorError('Failed to extract play info')
526 video_data
= initial_state
['videoData']
528 video_id
, title
= video_data
['bvid'], video_data
.get('title')
530 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
531 page_list_json
= not is_festival
and traverse_obj(
533 'https://api.bilibili.com/x/player/pagelist', video_id
,
534 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
535 note
='Extracting videos in anthology', headers
=headers
),
536 'data', expected_type
=list) or []
537 is_anthology
= len(page_list_json
) > 1
539 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
540 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
541 return self
.playlist_from_matches(
542 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
543 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
546 part_id
= part_id
or 1
547 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
549 aid
= video_data
.get('aid')
550 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
552 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
556 play_info
= self
._download
_playinfo
(video_id
, cid
, headers
=headers
)
558 festival_info
= traverse_obj(initial_state
, {
559 'uploader': ('videoInfo', 'upName'),
560 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
561 'like_count': ('videoStatus', 'like', {int_or_none}
),
562 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
566 **traverse_obj(initial_state
, {
567 'uploader': ('upData', 'name'),
568 'uploader_id': ('upData', 'mid', {str_or_none}
),
569 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
570 'tags': ('tags', ..., 'tag_name'),
571 'thumbnail': ('videoData', 'pic', {url_or_none}
),
574 **traverse_obj(video_data
, {
575 'description': 'desc',
576 'timestamp': ('pubdate', {int_or_none}
),
577 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
578 'comment_count': ('stat', 'reply', {int_or_none}
),
580 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
581 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
583 'http_headers': {'Referer': url}
,
586 is_interactive
= traverse_obj(video_data
, ('rights', 'is_stein_gate'))
588 return self
.playlist_result(
589 self
._get
_interactive
_entries
(video_id
, cid
, metainfo
), **metainfo
,
590 duration
=traverse_obj(initial_state
, ('videoData', 'duration', {int_or_none}
)),
591 __post_extractor
=self
.extract_comments(aid
))
595 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
596 'chapters': self
._get
_chapters
(aid
, cid
),
597 'subtitles': self
.extract_subtitles(video_id
, cid
),
598 'formats': self
.extract_formats(play_info
),
599 '__post_extractor': self
.extract_comments(aid
),
603 class BiliBiliBangumiIE(BilibiliBaseIE
):
604 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
607 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
616 'episode': 'forever/ef',
617 'episode_id': '21495',
618 'episode_number': 12,
619 'title': '12 forever/ef',
620 'duration': 1420.791,
621 'timestamp': 1320412200,
622 'upload_date': '20111104',
623 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
626 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
633 'season_id': '26801',
636 'episode_id': '267851',
639 'duration': 1425.256,
640 'timestamp': 1554566400,
641 'upload_date': '20190406',
642 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
644 'skip': 'Geo-restricted',
646 'note': 'a making-of which falls outside main section',
647 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
654 'season_id': '26801',
657 'episode_id': '345120',
658 'episode_number': 27,
660 'duration': 1922.129,
661 'timestamp': 1602853860,
662 'upload_date': '20201016',
663 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
667 def _real_extract(self
, url
):
668 episode_id
= self
._match
_id
(url
)
669 headers
= self
.geo_verification_headers()
670 webpage
= self
._download
_webpage
(url
, episode_id
, headers
=headers
)
672 if '您所在的地区无法观看本片' in webpage
:
673 raise GeoRestrictedError('This video is restricted')
674 elif '正在观看预览,大会员免费看全片' in webpage
:
675 self
.raise_login_required('This video is for premium members only')
677 headers
['Referer'] = url
678 play_info
= self
._download
_json
(
679 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id
,
680 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
682 premium_only
= play_info
.get('code') == -10403
683 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
685 formats
= self
.extract_formats(play_info
)
686 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
687 self
.raise_login_required('This video is for premium members only')
689 bangumi_info
= self
._download
_json
(
690 'https://api.bilibili.com/pgc/view/web/season', episode_id
, 'Get episode details',
691 query
={'ep_id': episode_id}
, headers
=headers
)['result']
693 episode_number
, episode_info
= next((
694 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
695 bangumi_info
, (('episodes', ('section', ..., 'episodes')), ..., {dict}
)), 1)
696 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
698 season_id
= bangumi_info
.get('season_id')
699 season_number
, season_title
= season_id
and next((
700 (idx
+ 1, e
.get('season_title')) for idx
, e
in enumerate(
701 traverse_obj(bangumi_info
, ('seasons', ...)))
702 if e
.get('season_id') == season_id
705 aid
= episode_info
.get('aid')
710 **traverse_obj(bangumi_info
, {
711 'series': ('series', 'series_title', {str}
),
712 'series_id': ('series', 'series_id', {str_or_none}
),
713 'thumbnail': ('square_cover', {url_or_none}
),
715 **traverse_obj(episode_info
, {
716 'episode': ('long_title', {str}
),
717 'episode_number': ('title', {int_or_none}
, {lambda x: x or episode_number}
),
718 'timestamp': ('pub_time', {int_or_none}
),
719 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)}
,
721 'episode_id': episode_id
,
722 'season': str_or_none(season_title
),
723 'season_id': str_or_none(season_id
),
724 'season_number': season_number
,
725 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
726 'subtitles': self
.extract_subtitles(episode_id
, episode_info
.get('cid'), aid
=aid
),
727 '__post_extractor': self
.extract_comments(aid
),
728 'http_headers': {'Referer': url}
,
732 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
733 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
735 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
738 'title': 'CAROLE & TUESDAY',
739 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
741 'playlist_mincount': 25,
743 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
746 'title': '攻壳机动队 S.A.C. 2nd GIG',
747 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
749 'playlist_count': 26,
759 'episode': '再启动 REEMBODY',
760 'episode_id': '68540',
762 'title': '1 再启动 REEMBODY',
763 'duration': 1525.777,
764 'timestamp': 1425074413,
765 'upload_date': '20150227',
766 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
771 def _real_extract(self
, url
):
772 media_id
= self
._match
_id
(url
)
773 webpage
= self
._download
_webpage
(url
, media_id
)
775 initial_state
= self
._search
_json
(
776 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)
777 ss_id
= initial_state
['mediaInfo']['season_id']
779 return self
.playlist_result(
780 self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
,
781 **traverse_obj(initial_state
, ('mediaInfo', {
782 'title': ('title', {str}
),
783 'description': ('evaluate', {str}
),
787 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
788 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
790 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
794 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
796 'playlist_mincount': 26,
798 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
802 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
804 'playlist_count': 13,
815 'episode_id': '50188',
818 'duration': 1436.992,
819 'timestamp': 1343185080,
820 'upload_date': '20120725',
821 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
826 def _real_extract(self
, url
):
827 ss_id
= self
._match
_id
(url
)
828 webpage
= self
._download
_webpage
(url
, ss_id
)
829 metainfo
= traverse_obj(
830 self
._search
_json
(r
'<script[^>]+type="application/ld\+json"[^>]*>', webpage
, 'info', ss_id
),
831 ('itemListElement', ..., {
832 'title': ('name', {str}
),
833 'description': ('description', {str}
),
836 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
, **metainfo
)
839 class BilibiliCheeseBaseIE(BilibiliBaseIE
):
840 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
842 def _extract_episode(self
, season_info
, ep_id
):
843 episode_info
= traverse_obj(season_info
, (
844 'episodes', lambda _
, v
: v
['id'] == int(ep_id
)), get_all
=False)
845 aid
, cid
= episode_info
['aid'], episode_info
['cid']
847 if traverse_obj(episode_info
, 'ep_status') == -1:
848 raise ExtractorError('This course episode is not yet available.', expected
=True)
849 if not traverse_obj(episode_info
, 'playable'):
850 self
.raise_login_required('You need to purchase the course to download this episode')
852 play_info
= self
._download
_json
(
853 'https://api.bilibili.com/pugv/player/web/playurl', ep_id
,
854 query
={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}
,
855 headers
=self
._HEADERS
, note
='Downloading playinfo')['data']
858 'id': str_or_none(ep_id
),
859 'episode_id': str_or_none(ep_id
),
860 'formats': self
.extract_formats(play_info
),
861 'extractor_key': BilibiliCheeseIE
.ie_key(),
862 'extractor': BilibiliCheeseIE
.IE_NAME
,
863 'webpage_url': f
'https://www.bilibili.com/cheese/play/ep{ep_id}',
864 **traverse_obj(episode_info
, {
865 'episode': ('title', {str}
),
866 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}
,
867 'alt_title': ('subtitle', {str}
),
868 'duration': ('duration', {int_or_none}
),
869 'episode_number': ('index', {int_or_none}
),
870 'thumbnail': ('cover', {url_or_none}
),
871 'timestamp': ('release_date', {int_or_none}
),
872 'view_count': ('play', {int_or_none}
),
874 **traverse_obj(season_info
, {
875 'uploader': ('up_info', 'uname', {str}
),
876 'uploader_id': ('up_info', 'mid', {str_or_none}
),
878 'subtitles': self
.extract_subtitles(ep_id
, cid
, aid
=aid
),
879 '__post_extractor': self
.extract_comments(aid
),
880 'http_headers': self
._HEADERS
,
883 def _download_season_info(self
, query_key
, video_id
):
884 return self
._download
_json
(
885 f
'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id
,
886 headers
=self
._HEADERS
, note
='Downloading season info')['data']
889 class BilibiliCheeseIE(BilibiliCheeseBaseIE
):
890 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
892 'url': 'https://www.bilibili.com/cheese/play/ep229832',
896 'title': '1 - 课程先导片',
897 'alt_title': '视频课 · 3分41秒',
899 'uploader_id': '316568752',
901 'episode_id': '229832',
904 'timestamp': 1695549606,
905 'upload_date': '20230924',
906 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
911 def _real_extract(self
, url
):
912 ep_id
= self
._match
_id
(url
)
913 return self
._extract
_episode
(self
._download
_season
_info
('ep_id', ep_id
), ep_id
)
916 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE
):
917 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
919 'url': 'https://www.bilibili.com/cheese/play/ss5918',
922 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
923 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
929 'title': '1 - 课程先导片',
930 'alt_title': '视频课 · 3分41秒',
932 'uploader_id': '316568752',
934 'episode_id': '229832',
937 'timestamp': 1695549606,
938 'upload_date': '20230924',
939 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
943 'params': {'playlist_items': '1'}
,
945 'url': 'https://www.bilibili.com/cheese/play/ss5918',
948 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
949 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
951 'playlist_mincount': 5,
952 'skip': 'paid video in list',
955 def _get_cheese_entries(self
, season_info
):
956 for ep_id
in traverse_obj(season_info
, ('episodes', lambda _
, v
: v
['episode_can_view'], 'id')):
957 yield self
._extract
_episode
(season_info
, ep_id
)
959 def _real_extract(self
, url
):
960 season_id
= self
._match
_id
(url
)
961 season_info
= self
._download
_season
_info
('season_id', season_id
)
963 return self
.playlist_result(
964 self
._get
_cheese
_entries
(season_info
), season_id
,
965 **traverse_obj(season_info
, {
966 'title': ('title', {str}
),
967 'description': ('subtitle', {str}
),
971 class BilibiliSpaceBaseIE(InfoExtractor
):
972 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
973 first_page
= fetch_page(0)
974 metadata
= get_metadata(first_page
)
976 paged_list
= InAdvancePagedList(
977 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
978 metadata
['page_count'], metadata
['page_size'])
980 return metadata
, paged_list
983 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
984 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
986 'url': 'https://space.bilibili.com/3985676/video',
990 'playlist_mincount': 178,
992 'url': 'https://space.bilibili.com/313580179/video',
996 'playlist_mincount': 92,
999 def _extract_signature(self
, playlist_id
):
1000 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
1002 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
1003 img_key
= traverse_obj(
1004 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
1005 sub_key
= traverse_obj(
1006 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
1008 session_key
= img_key
+ sub_key
1010 signature_values
= []
1012 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1013 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1014 57, 62, 11, 36, 20, 34, 44, 52,
1016 char_at_position
= try_call(lambda: session_key
[position
])
1017 if char_at_position
:
1018 signature_values
.append(char_at_position
)
1020 return ''.join(signature_values
)[:32]
1022 def _real_extract(self
, url
):
1023 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
1024 if not is_video_url
:
1025 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1026 'To download audios, add a "/audio" to the URL')
1028 signature
= self
._extract
_signature
(playlist_id
)
1030 def fetch_page(page_idx
):
1035 'order_avoided': 'true',
1040 'web_location': 1550101,
1041 'wts': int(time
.time()),
1043 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1046 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
1047 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
,
1048 headers
={'referer': url}
)
1049 except ExtractorError
as e
:
1050 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
1051 raise ExtractorError(
1052 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
1054 if response
['code'] in (-352, -401):
1055 raise ExtractorError(
1056 f
'Request is blocked by server ({-response["code"]}), '
1057 'please add cookies, wait and try later.', expected
=True)
1058 return response
['data']
1060 def get_metadata(page_data
):
1061 page_size
= page_data
['page']['ps']
1062 entry_count
= page_data
['page']['count']
1064 'page_count': math
.ceil(entry_count
/ page_size
),
1065 'page_size': page_size
,
1068 def get_entries(page_data
):
1069 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
1070 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
1072 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1073 return self
.playlist_result(paged_list
, playlist_id
)
1076 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
1077 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1079 'url': 'https://space.bilibili.com/313580179/audio',
1083 'playlist_mincount': 1,
1086 def _real_extract(self
, url
):
1087 playlist_id
= self
._match
_id
(url
)
1089 def fetch_page(page_idx
):
1090 return self
._download
_json
(
1091 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
1092 note
=f
'Downloading page {page_idx}',
1093 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
1095 def get_metadata(page_data
):
1097 'page_count': page_data
['pageCount'],
1098 'page_size': page_data
['pageSize'],
1101 def get_entries(page_data
):
1102 for entry
in page_data
.get('data', []):
1103 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
1105 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1106 return self
.playlist_result(paged_list
, playlist_id
)
1109 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
1110 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
1111 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
1112 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
1114 def _get_uploader(self
, uid
, playlist_id
):
1115 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
1116 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
1118 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
1119 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1120 metadata
.pop('page_count', None)
1121 metadata
.pop('page_size', None)
1122 return metadata
, page_list
1125 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
1126 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1128 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1130 'id': '2142762_57445',
1131 'title': '【完结】《底特律 变人》全结局流程解说',
1134 'uploader_id': '2142762',
1137 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1139 'playlist_mincount': 31,
1142 def _real_extract(self
, url
):
1143 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1144 playlist_id
= f
'{mid}_{sid}'
1146 def fetch_page(page_idx
):
1147 return self
._download
_json
(
1148 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1149 playlist_id
, note
=f
'Downloading page {page_idx}',
1150 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
1152 def get_metadata(page_data
):
1153 page_size
= page_data
['page']['page_size']
1154 entry_count
= page_data
['page']['total']
1156 'page_count': math
.ceil(entry_count
/ page_size
),
1157 'page_size': page_size
,
1158 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1159 **traverse_obj(page_data
, {
1160 'title': ('meta', 'name', {str}
),
1161 'description': ('meta', 'description', {str}
),
1162 'uploader_id': ('meta', 'mid', {str_or_none}
),
1163 'timestamp': ('meta', 'ptime', {int_or_none}
),
1164 'thumbnail': ('meta', 'cover', {url_or_none}
),
1168 def get_entries(page_data
):
1169 return self
._get
_entries
(page_data
, 'archives')
1171 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1172 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1175 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
1176 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1178 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1180 'id': '1958703906_547718',
1182 'description': '直播回放',
1183 'uploader': '靡烟miya',
1184 'uploader_id': '1958703906',
1185 'timestamp': 1637985853,
1186 'upload_date': '20211127',
1187 'modified_timestamp': int,
1188 'modified_date': str,
1190 'playlist_mincount': 513,
1193 def _real_extract(self
, url
):
1194 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1195 playlist_id
= f
'{mid}_{sid}'
1196 playlist_meta
= traverse_obj(self
._download
_json
(
1197 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False,
1199 'title': ('data', 'meta', 'name', {str}
),
1200 'description': ('data', 'meta', 'description', {str}
),
1201 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
1202 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
1203 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
1206 def fetch_page(page_idx
):
1207 return self
._download
_json
(
1208 'https://api.bilibili.com/x/series/archives',
1209 playlist_id
, note
=f
'Downloading page {page_idx}',
1210 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
1212 def get_metadata(page_data
):
1213 page_size
= page_data
['page']['size']
1214 entry_count
= page_data
['page']['total']
1216 'page_count': math
.ceil(entry_count
/ page_size
),
1217 'page_size': page_size
,
1218 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1222 def get_entries(page_data
):
1223 return self
._get
_entries
(page_data
, 'archives')
1225 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1226 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1229 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
1230 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1232 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1238 'uploader_id': '84912',
1239 'timestamp': 1604905176,
1240 'upload_date': '20201109',
1241 'modified_timestamp': int,
1242 'modified_date': str,
1243 'thumbnail': r
're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1247 'playlist_mincount': 22,
1249 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1250 'only_matching': True,
1253 def _real_extract(self
, url
):
1254 fid
= self
._match
_id
(url
)
1256 list_info
= self
._download
_json
(
1257 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1258 fid
, note
='Downloading favlist metadata')
1259 if list_info
['code'] == -403:
1260 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
1262 entries
= self
._get
_entries
(self
._download
_json
(
1263 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1264 fid
, note
='Download favlist entries'), 'data')
1266 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
1267 'title': ('title', {str}
),
1268 'description': ('intro', {str}
),
1269 'uploader': ('upper', 'name', {str}
),
1270 'uploader_id': ('upper', 'mid', {str_or_none}
),
1271 'timestamp': ('ctime', {int_or_none}
),
1272 'modified_timestamp': ('mtime', {int_or_none}
),
1273 'thumbnail': ('cover', {url_or_none}
),
1274 'view_count': ('cnt_info', 'play', {int_or_none}
),
1275 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
1279 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
1280 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1282 'url': 'https://www.bilibili.com/watchlater/#/list',
1283 'info_dict': {'id': 'watchlater'}
,
1284 'playlist_mincount': 0,
1285 'skip': 'login required',
1288 def _real_extract(self
, url
):
1289 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
1290 watchlater_info
= self
._download
_json
(
1291 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
1292 if watchlater_info
['code'] == -101:
1293 self
.raise_login_required(msg
='You need to login to access your watchlater list')
1294 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
1295 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
1298 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
1299 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1301 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1305 'uploader': '靡烟miya',
1306 'uploader_id': '1958703906',
1307 'timestamp': 1637985853,
1308 'upload_date': '20211127',
1310 'playlist_mincount': 513,
1312 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1314 'id': 'BV1DU4y1r7tz',
1316 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1317 'upload_date': '20220820',
1319 'timestamp': 1661016330,
1320 'uploader_id': '1958703906',
1321 'uploader': '靡烟miya',
1322 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1323 'duration': 9552.903,
1325 'comment_count': int,
1328 '_old_archive_ids': ['bilibili 687146339_part1'],
1330 'params': {'noplaylist': True}
,
1332 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1336 'playlist_mincount': 513,
1337 'skip': 'redirect url',
1339 'url': 'https://www.bilibili.com/list/ml1103407912',
1341 'id': '3_1103407912',
1344 'uploader_id': '84912',
1345 'timestamp': 1604905176,
1346 'upload_date': '20201109',
1347 'thumbnail': r
're:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg',
1349 'playlist_mincount': 22,
1351 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1353 'id': '3_1103407912',
1355 'playlist_mincount': 22,
1356 'skip': 'redirect url',
1358 'url': 'https://www.bilibili.com/list/watchlater',
1359 'info_dict': {'id': 'watchlater'}
,
1360 'playlist_mincount': 0,
1361 'skip': 'login required',
1363 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1364 'info_dict': {'id': 'watchlater'}
,
1365 'playlist_mincount': 0,
1366 'skip': 'login required',
1369 def _extract_medialist(self
, query
, list_id
):
1370 for page_num
in itertools
.count(1):
1371 page_data
= self
._download
_json
(
1372 'https://api.bilibili.com/x/v2/medialist/resource/list',
1373 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}',
1375 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
1376 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
1377 if not page_data
.get('has_more', False):
1380 def _real_extract(self
, url
):
1381 list_id
= self
._match
_id
(url
)
1383 bvid
= traverse_obj(parse_qs(url
), ('bvid', 0))
1384 if not self
._yes
_playlist
(list_id
, bvid
):
1385 return self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
)
1387 webpage
= self
._download
_webpage
(url
, list_id
)
1388 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
1389 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
1390 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
1391 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
1392 if error_code
== -400 and list_id
== 'watchlater':
1393 self
.raise_login_required('You need to login to access your watchlater playlist')
1394 elif error_code
== -403:
1395 self
.raise_login_required('This is a private playlist. You need to login as its owner')
1396 elif error_code
== 11010:
1397 raise ExtractorError('Playlist is no longer available', expected
=True)
1398 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
1402 'with_current': False,
1403 **traverse_obj(initial_state
, {
1404 'type': ('playlist', 'type', {int_or_none}
),
1405 'biz_id': ('playlist', 'id', {int_or_none}
),
1406 'tid': ('tid', {int_or_none}
),
1407 'sort_field': ('sortFiled', {int_or_none}
),
1408 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
1412 'id': f
'{query["type"]}_{query["biz_id"]}',
1413 **traverse_obj(initial_state
, ('mediaListInfo', {
1414 'title': ('title', {str}
),
1415 'uploader': ('upper', 'name', {str}
),
1416 'uploader_id': ('upper', 'mid', {str_or_none}
),
1417 'timestamp': ('ctime', {int_or_none}
),
1418 'thumbnail': ('cover', {url_or_none}
),
1421 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
1424 class BilibiliCategoryIE(InfoExtractor
):
1425 IE_NAME
= 'Bilibili category extractor'
1426 _MAX_RESULTS
= 1000000
1427 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1429 'url': 'https://www.bilibili.com/v/kichiku/mad',
1431 'id': 'kichiku: mad',
1432 'title': 'kichiku: mad',
1434 'playlist_mincount': 45,
1440 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
1441 parsed_json
= self
._download
_json
(
1442 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
1443 note
=f
'Extracting results from page {page_num} of {num_pages}')
1445 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
1447 raise ExtractorError(f
'Failed to retrieve video list for page {page_num}')
1449 for video
in video_list
:
1450 yield self
.url_result(
1451 'https://www.bilibili.com/video/{}'.format(video
['bvid']), 'BiliBili', video
['bvid'])
1453 def _entries(self
, category
, subcategory
, query
):
1454 # map of categories : subcategories : RIDs
1458 'manual_vocaloid': 126,
1465 if category
not in rid_map
:
1466 raise ExtractorError(
1467 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1468 if subcategory
not in rid_map
[category
]:
1469 raise ExtractorError(
1470 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1471 rid_value
= rid_map
[category
][subcategory
]
1473 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1474 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1475 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1476 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1477 if count
is None or not size
:
1478 raise ExtractorError('Failed to calculate either page count or size')
1480 num_pages
= math
.ceil(count
/ size
)
1482 return OnDemandPagedList(functools
.partial(
1483 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1485 def _real_extract(self
, url
):
1486 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1487 query
= f
'{category}: {subcategory}'
1489 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1492 class BiliBiliSearchIE(SearchInfoExtractor
):
1493 IE_DESC
= 'Bilibili video search'
1494 _MAX_RESULTS
= 100000
1495 _SEARCH_KEY
= 'bilisearch'
1497 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1498 'playlist_count': 3,
1500 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1501 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1505 'id': 'BV1n44y1Q7sc',
1507 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1508 'timestamp': 1669889987,
1509 'upload_date': '20221201',
1510 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1512 'uploader': '靡烟miya',
1513 'duration': 123.156,
1514 'uploader_id': '1958703906',
1515 'comment_count': int,
1518 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1519 '_old_archive_ids': ['bilibili 988222410_part1'],
1524 def _search_results(self
, query
):
1525 if not self
._get
_cookies
('https://api.bilibili.com').get('buvid3'):
1526 self
._set
_cookie
('.bilibili.com', 'buvid3', f
'{uuid.uuid4()}infoc')
1527 for page_num
in itertools
.count(1):
1528 videos
= self
._download
_json
(
1529 'https://api.bilibili.com/x/web-interface/search/type', query
,
1530 note
=f
'Extracting results from page {page_num}', query
={
1531 'Search_key': query
,
1537 '__refresh__': 'true',
1538 'search_type': 'video',
1541 })['data'].get('result')
1544 for video
in videos
:
1545 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1548 class BilibiliAudioBaseIE(InfoExtractor
):
1549 def _call_api(self
, path
, sid
, query
=None):
1551 query
= {'sid': sid}
1552 return self
._download
_json
(
1553 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1554 sid
, query
=query
)['data']
1557 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1558 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1560 'url': 'https://www.bilibili.com/audio/au1003142',
1561 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1565 'title': '【tsukimi】YELLOW / 神山羊',
1566 'artist': 'tsukimi',
1567 'comment_count': int,
1568 'description': 'YELLOW的mp3版!',
1575 'thumbnail': r
're:^https?://.+\.jpg',
1576 'timestamp': 1564836614,
1577 'upload_date': '20190803',
1578 'uploader': 'tsukimi-つきみぐー',
1583 def _real_extract(self
, url
):
1584 au_id
= self
._match
_id
(url
)
1586 play_data
= self
._call
_api
('url', au_id
)
1588 'url': play_data
['cdns'][0],
1589 'filesize': int_or_none(play_data
.get('size')),
1593 for a_format
in formats
:
1594 a_format
.setdefault('http_headers', {}).update({
1598 song
= self
._call
_api
('song/info', au_id
)
1599 title
= song
['title']
1600 statistic
= song
.get('statistic') or {}
1603 lyric
= song
.get('lyric')
1615 'artist': song
.get('author'),
1616 'comment_count': int_or_none(statistic
.get('comment')),
1617 'description': song
.get('intro'),
1618 'duration': int_or_none(song
.get('duration')),
1619 'subtitles': subtitles
,
1620 'thumbnail': song
.get('cover'),
1621 'timestamp': int_or_none(song
.get('passtime')),
1622 'uploader': song
.get('uname'),
1623 'view_count': int_or_none(statistic
.get('play')),
1627 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1628 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1630 'url': 'https://www.bilibili.com/audio/am10624',
1633 'title': '每日新曲推荐(每日11:00更新)',
1634 'description': '每天11:00更新,为你推送最新音乐',
1636 'playlist_count': 19,
1639 def _real_extract(self
, url
):
1640 am_id
= self
._match
_id
(url
)
1642 songs
= self
._call
_api
(
1643 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1647 sid
= str_or_none(song
.get('id'))
1650 entries
.append(self
.url_result(
1651 'https://www.bilibili.com/audio/au' + sid
,
1652 BilibiliAudioIE
.ie_key(), sid
))
1655 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1656 album_title
= album_data
.get('title')
1658 for entry
in entries
:
1659 entry
['album'] = album_title
1660 return self
.playlist_result(
1661 entries
, am_id
, album_title
, album_data
.get('intro'))
1663 return self
.playlist_result(entries
, am_id
)
1666 class BiliBiliPlayerIE(InfoExtractor
):
1667 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1669 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1670 'only_matching': True,
1673 def _real_extract(self
, url
):
1674 video_id
= self
._match
_id
(url
)
1675 return self
.url_result(
1676 f
'http://www.bilibili.tv/video/av{video_id}/',
1677 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1680 class BiliIntlBaseIE(InfoExtractor
):
1681 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1682 _NETRC_MACHINE
= 'biliintl'
1683 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
1685 def _call_api(self
, endpoint
, *args
, **kwargs
):
1686 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1687 if json
.get('code'):
1688 if json
['code'] in (10004004, 10004005, 10023006):
1689 self
.raise_login_required()
1690 elif json
['code'] == 10004001:
1691 self
.raise_geo_restricted()
1693 if json
.get('message') and str(json
['code']) != json
['message']:
1694 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1696 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1697 if kwargs
.get('fatal'):
1698 raise ExtractorError(errmsg
)
1700 self
.report_warning(errmsg
)
1701 return json
.get('data')
1703 def json2srt(self
, json
):
1705 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1706 for i
, line
in enumerate(traverse_obj(json
, (
1707 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1709 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1710 sub_json
= self
._call
_api
(
1711 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1712 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1715 's_locale': 'en_US',
1716 'episode_id': ep_id
,
1720 fetched_urls
= set()
1721 for sub
in traverse_obj(sub_json
, (('subtitles', 'video_subtitle'), ..., {dict}
)):
1722 for url
in traverse_obj(sub
, ((None, 'ass', 'srt'), 'url', {url_or_none}
)):
1723 if url
in fetched_urls
:
1725 fetched_urls
.add(url
)
1726 sub_ext
= determine_ext(url
)
1727 sub_lang
= sub
.get('lang_key') or 'en'
1729 if sub_ext
== 'ass':
1730 subtitles
.setdefault(sub_lang
, []).append({
1734 elif sub_ext
== 'json':
1735 sub_data
= self
._download
_json
(
1736 url
, ep_id
or aid
, fatal
=False,
1737 note
=f
'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1738 errnote
='Unable to download subtitles')
1741 subtitles
.setdefault(sub_lang
, []).append({
1743 'data': self
.json2srt(sub_data
),
1746 self
.report_warning('Unexpected subtitle extension', ep_id
or aid
)
1750 def _get_formats(self
, *, ep_id
=None, aid
=None):
1751 video_json
= self
._call
_api
(
1752 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1753 errnote
='Unable to download video formats', query
=filter_dict({
1758 video_json
= video_json
['playurl']
1760 for vid
in video_json
.get('video') or []:
1761 video_res
= vid
.get('video_resource') or {}
1762 video_info
= vid
.get('stream_info') or {}
1763 if not video_res
.get('url'):
1766 'url': video_res
['url'],
1768 'format_note': video_info
.get('desc_words'),
1769 'width': video_res
.get('width'),
1770 'height': video_res
.get('height'),
1771 'vbr': video_res
.get('bandwidth'),
1773 'vcodec': video_res
.get('codecs'),
1774 'filesize': video_res
.get('size'),
1776 for aud
in video_json
.get('audio_resource') or []:
1777 if not aud
.get('url'):
1782 'abr': aud
.get('bandwidth'),
1783 'acodec': aud
.get('codecs'),
1785 'filesize': aud
.get('size'),
1790 def _parse_video_metadata(self
, video_data
):
1792 'title': video_data
.get('title_display') or video_data
.get('title'),
1793 'description': video_data
.get('desc'),
1794 'thumbnail': video_data
.get('cover'),
1795 'timestamp': unified_timestamp(video_data
.get('formatted_pub_date')),
1796 'episode_number': int_or_none(self
._search
_regex
(
1797 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1800 def _perform_login(self
, username
, password
):
1801 if not Cryptodome
.RSA
:
1802 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1804 key_data
= self
._download
_json
(
1805 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1806 note
='Downloading login key', errnote
='Unable to download login key')['data']
1808 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1809 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode())
1810 login_post
= self
._download
_json
(
1811 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1812 'username': username
,
1813 'password': base64
.b64encode(password_hash
).decode('ascii'),
1815 's_locale': 'en_US',
1816 'isTrusted': 'true',
1817 }), note
='Logging in', errnote
='Unable to log in')
1818 if login_post
.get('code'):
1819 if login_post
.get('message'):
1820 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1822 raise ExtractorError('Unable to log in')
1825 class BiliIntlIE(BiliIntlBaseIE
):
1826 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1829 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1833 'title': 'E2 - The First Night',
1834 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1835 'episode_number': 2,
1836 'upload_date': '20201009',
1837 'episode': 'Episode 2',
1838 'timestamp': 1602259500,
1839 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1843 'title': '<Untitled Chapter 1>',
1845 'start_time': 76.242,
1846 'end_time': 161.161,
1849 'start_time': 1325.742,
1850 'end_time': 1403.903,
1856 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1860 'title': 'E3 - Who?',
1861 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1862 'episode_number': 3,
1863 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1864 'episode': 'Episode 3',
1865 'upload_date': '20211219',
1866 'timestamp': 1639928700,
1870 'title': '<Untitled Chapter 1>',
1876 'start_time': 1173.0,
1877 'end_time': 1259.535,
1882 # Subtitle with empty content
1883 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1887 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1888 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1889 'episode_number': 140,
1891 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.',
1893 # episode comment extraction
1894 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1898 'timestamp': 1604057820,
1899 'upload_date': '20201030',
1900 'episode_number': 5,
1901 'title': 'E5 - My Own Steel',
1902 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1903 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1904 'episode': 'Episode 5',
1905 'comment_count': int,
1909 'title': '<Untitled Chapter 1>',
1915 'start_time': 1290.0,
1921 'getcomments': True,
1924 # user generated content comment extraction
1925 'url': 'https://www.bilibili.tv/en/video/2045730385',
1929 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1930 'timestamp': 1667891924,
1931 'upload_date': '20221108',
1932 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1933 'comment_count': int,
1934 'thumbnail': r
're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1937 'getcomments': True,
1940 # episode id without intro and outro
1941 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1945 'title': 'E1 - Operation \'Strix\' <Owl>',
1946 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1947 'timestamp': 1649516400,
1948 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1949 'episode': 'Episode 1',
1950 'episode_number': 1,
1951 'upload_date': '20220409',
1954 'url': 'https://www.biliintl.com/en/play/34613/341736',
1955 'only_matching': True,
1957 # User-generated content (as opposed to a series licensed from a studio)
1958 'url': 'https://bilibili.tv/en/video/2019955076',
1959 'only_matching': True,
1961 # No language in URL
1962 'url': 'https://www.bilibili.tv/video/2019955076',
1963 'only_matching': True,
1965 # Uppercase language in URL
1966 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1967 'only_matching': True,
1971 def _make_url(video_id
, series_id
=None):
1973 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1974 return f
'https://www.bilibili.tv/en/video/{video_id}'
1976 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1977 url
, smuggled_data
= unsmuggle_url(url
, {})
1978 if smuggled_data
.get('title'):
1979 return smuggled_data
1981 webpage
= self
._download
_webpage
(url
, video_id
)
1984 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1985 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1986 video_data
= traverse_obj(
1987 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1989 if season_id
and not video_data
:
1990 # Non-Bstation layout, read through episode list
1991 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1992 video_data
= traverse_obj(season_json
, (
1993 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
,
1994 ), expected_type
=dict, get_all
=False)
1996 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1998 self
._parse
_video
_metadata
(video_data
), {
1999 'title': get_element_by_class(
2000 'bstar-meta__title', webpage
) or self
._html
_search
_meta
('og:title', webpage
),
2001 'description': get_element_by_class(
2002 'bstar-meta__desc', webpage
) or self
._html
_search
_meta
('og:description', webpage
),
2003 }, self
._search
_json
_ld
(webpage
, video_id
, default
={}))
2005 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
2006 comment_api_raw_data
= self
._download
_json
(
2007 'https://api.bilibili.tv/reply/web/detail', display_id
,
2008 note
=f
'Downloading reply comment of {root_id} - {next_id}',
2011 'ps': 20, # comment's reply per page (default: 3)
2016 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2018 'author': traverse_obj(replies
, ('member', 'name')),
2019 'author_id': traverse_obj(replies
, ('member', 'mid')),
2020 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2021 'text': traverse_obj(replies
, ('content', 'message')),
2022 'id': replies
.get('rpid'),
2023 'like_count': int_or_none(replies
.get('like_count')),
2024 'parent': replies
.get('parent'),
2025 'timestamp': unified_timestamp(replies
.get('ctime_text')),
2028 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2029 yield from self
._get
_comments
_reply
(
2030 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
2032 def _get_comments(self
, video_id
, ep_id
):
2033 for i
in itertools
.count(0):
2034 comment_api_raw_data
= self
._download
_json
(
2035 'https://api.bilibili.tv/reply/web/root', video_id
,
2036 note
=f
'Downloading comment page {i + 1}',
2039 'pn': i
, # page number
2040 'ps': 20, # comment per page (default: 20)
2042 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
2043 'sort_type': 1, # 1: best, 2: recent
2046 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2048 'author': traverse_obj(replies
, ('member', 'name')),
2049 'author_id': traverse_obj(replies
, ('member', 'mid')),
2050 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2051 'text': traverse_obj(replies
, ('content', 'message')),
2052 'id': replies
.get('rpid'),
2053 'like_count': int_or_none(replies
.get('like_count')),
2054 'timestamp': unified_timestamp(replies
.get('ctime_text')),
2055 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
2057 if replies
.get('count'):
2058 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
2060 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2063 def _real_extract(self
, url
):
2064 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
2065 video_id
= ep_id
or aid
2069 intro_ending_json
= self
._call
_api
(
2070 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2071 video_id
, fatal
=False) or {}
2072 if intro_ending_json
.get('skip'):
2073 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2074 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2076 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
2077 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
2080 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
2081 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
2087 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
2088 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
2089 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
2090 'chapters': chapters
,
2091 '__post_extractor': self
.extract_comments(video_id
, ep_id
),
2092 'http_headers': self
._HEADERS
,
2096 class BiliIntlSeriesIE(BiliIntlBaseIE
):
2097 IE_NAME
= 'biliIntl:series'
2098 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2100 'url': 'https://www.bilibili.tv/en/play/34613',
2101 'playlist_mincount': 15,
2104 'title': 'TONIKAWA: Over the Moon For You',
2105 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2106 'categories': ['Slice of life', 'Comedy', 'Romance'],
2107 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2111 'skip_download': True,
2114 'url': 'https://www.bilibili.tv/en/media/1048837',
2117 'title': 'SPY×FAMILY',
2118 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2119 'categories': ['Adventure', 'Action', 'Comedy'],
2120 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2123 'playlist_mincount': 25,
2125 'url': 'https://www.biliintl.com/en/play/34613',
2126 'only_matching': True,
2128 'url': 'https://www.biliintl.com/EN/play/34613',
2129 'only_matching': True,
2132 def _entries(self
, series_id
):
2133 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
2134 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
2135 episode_id
= str(episode
['episode_id'])
2136 yield self
.url_result(smuggle_url(
2137 BiliIntlIE
._make
_url
(episode_id
, series_id
),
2138 self
._parse
_video
_metadata
(episode
),
2139 ), BiliIntlIE
, episode_id
)
2141 def _real_extract(self
, url
):
2142 series_id
= self
._match
_id
(url
)
2143 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
2144 return self
.playlist_result(
2145 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
2146 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
2147 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
2150 class BiliLiveIE(InfoExtractor
):
2151 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2154 'url': 'https://live.bilibili.com/196',
2157 'description': '周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)',
2159 'title': '太空狼人杀联动,不被爆杀就算赢',
2160 'thumbnail': 'https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg',
2161 'timestamp': 1650802769,
2165 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2166 'only_matching': True,
2168 'url': 'https://live.bilibili.com/blanc/196',
2169 'only_matching': True,
2173 80: {'format_id': 'low', 'format_note': '流畅'}
,
2174 150: {'format_id': 'high_res', 'format_note': '高清'}
,
2175 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
2176 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
2177 10000: {'format_id': 'source', 'format_note': '原画'}
,
2178 20000: {'format_id': '4K', 'format_note': '4K'}
,
2179 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
2182 _quality
= staticmethod(qualities(list(_FORMATS
)))
2184 def _call_api(self
, path
, room_id
, query
):
2185 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
2186 if api_result
.get('code') != 0:
2187 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
2188 return api_result
.get('data') or {}
2190 def _parse_formats(self
, qn
, fmt
):
2191 for codec
in fmt
.get('codec') or []:
2192 if codec
.get('current_qn') != qn
:
2194 for url_info
in codec
['url_info']:
2196 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2197 'ext': fmt
.get('format_name'),
2198 'vcodec': codec
.get('codec_name'),
2199 'quality': self
._quality
(qn
),
2200 **self
._FORMATS
[qn
],
2203 def _real_extract(self
, url
):
2204 room_id
= self
._match
_id
(url
)
2205 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
2206 if room_data
.get('live_status') == 0:
2207 raise ExtractorError('Streamer is not live', expected
=True)
2210 for qn
in self
._FORMATS
:
2211 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
2221 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2222 formats
.extend(self
._parse
_formats
(qn
, fmt
))
2226 'title': room_data
.get('title'),
2227 'description': room_data
.get('description'),
2228 'thumbnail': room_data
.get('user_cover'),
2229 'timestamp': stream_data
.get('live_time'),