12 from .common
import InfoExtractor
, SearchInfoExtractor
13 from ..dependencies
import Cryptodome
14 from ..networking
.exceptions
import HTTPError
36 srt_subtitles_timecode
,
48 class BilibiliBaseIE(InfoExtractor
):
49 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
51 def extract_formats(self
, play_info
):
53 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
54 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
57 audios
= traverse_obj(play_info
, ('dash', (None, 'dolby'), 'audio', ..., {dict}
))
58 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
60 audios
.append(flac_audio
)
62 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
64 'acodec': traverse_obj(audio
, ('codecs', {str.lower}
)),
66 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
67 'filesize': int_or_none(audio
.get('size')),
68 'format_id': str_or_none(audio
.get('id')),
69 } for audio
in audios
]
72 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video
.get('width')),
76 'height': int_or_none(video
.get('height')),
77 'vcodec': video
.get('codecs'),
78 'acodec': 'none' if audios
else None,
79 'dynamic_range': {126: 'DV', 125: 'HDR10'}
.get(int_or_none(video
.get('id'))),
80 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
81 'filesize': int_or_none(video
.get('size')),
82 'quality': int_or_none(video
.get('id')),
83 'format_id': traverse_obj(
84 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
85 ('id', {str_or_none}
), get_all
=False),
86 'format': format_names
.get(video
.get('id')),
87 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
89 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
91 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
92 f
'you have to login or become premium member to download them. {self._login_hint()}')
96 def _download_playinfo(self
, video_id
, cid
, headers
=None):
97 return self
._download
_json
(
98 'https://api.bilibili.com/x/player/playurl', video_id
,
99 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
100 note
=f
'Downloading video formats for cid {cid}', headers
=headers
)['data']
102 def json2srt(self
, json_data
):
104 for idx
, line
in enumerate(json_data
.get('body') or []):
105 srt_data
+= (f
'{idx + 1}\n'
106 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f
'{line["content"]}\n\n')
110 def _get_subtitles(self
, video_id
, cid
, aid
=None):
114 'url': f
'https://comment.bilibili.com/{cid}.xml',
118 subtitle_info
= traverse_obj(self
._download
_json
(
119 'https://api.bilibili.com/x/player/v2', video_id
,
120 query
={'aid': aid, 'cid': cid}
if aid
else {'bvid': video_id, 'cid': cid}
,
121 note
=f
'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list
= traverse_obj(subtitle_info
, ('subtitles', lambda _
, v
: v
['subtitle_url'] and v
['lan']))
123 if not subs_list
and traverse_obj(subtitle_info
, 'allow_submit'):
124 if not self
._get
_cookies
('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self
.report_warning(f
'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once
=True)
127 subtitles
.setdefault(s
['lan'], []).append({
129 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
))
133 def _get_chapters(self
, aid
, cid
):
134 chapters
= aid
and cid
and self
._download
_json
(
135 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
136 note
='Extracting chapters', fatal
=False)
137 return traverse_obj(chapters
, ('data', 'view_points', ..., {
139 'start_time': 'from',
143 def _get_comments(self
, aid
):
144 for idx
in itertools
.count(1):
145 replies
= traverse_obj(
147 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
152 for children
in map(self
._get
_all
_children
, replies
):
155 def _get_all_children(self
, reply
):
157 'author': traverse_obj(reply
, ('member', 'uname')),
158 'author_id': traverse_obj(reply
, ('member', 'mid')),
159 'id': reply
.get('rpid'),
160 'text': traverse_obj(reply
, ('content', 'message')),
161 'timestamp': reply
.get('ctime'),
162 'parent': reply
.get('parent') or 'root',
164 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
167 def _get_episodes_from_season(self
, ss_id
, url
):
168 season_info
= self
._download
_json
(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
170 note
='Downloading season info', query
={'season_id': ss_id}
,
171 headers
={'Referer': url, **self.geo_verification_headers()}
)
173 for entry
in traverse_obj(season_info
, (
174 'result', 'main_section', 'episodes',
175 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
176 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, str_or_none(entry
.get('id')))
178 def _get_divisions(self
, video_id
, graph_version
, edges
, edge_id
, cid_edges
=None):
179 cid_edges
= cid_edges
or {}
180 division_data
= self
._download
_json
(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id
,
182 query
={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id}
,
183 note
=f
'Extracting divisions from edge {edge_id}')
184 edges
.setdefault(edge_id
, {}).update(
185 traverse_obj(division_data
, ('data', 'story_list', lambda _
, v
: v
['edge_id'] == edge_id
, {
186 'title': ('title', {str}
),
187 'cid': ('cid', {int_or_none}
),
190 edges
[edge_id
].update(traverse_obj(division_data
, ('data', {
191 'title': ('title', {str}
),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}
),
194 'cid': ('cid', {int_or_none}
),
195 'text': ('option', {str}
),
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges
.setdefault(edges
[edge_id
]['cid'], {})[edge_id
] = edges
[edge_id
]
200 for choice
in traverse_obj(edges
, (edge_id
, 'choices', ...)):
201 if choice
['edge_id'] not in edges
:
202 edges
[choice
['edge_id']] = {'cid': choice['cid']}
203 self
._get
_divisions
(video_id
, graph_version
, edges
, choice
['edge_id'], cid_edges
=cid_edges
)
206 def _get_interactive_entries(self
, video_id
, cid
, metainfo
):
207 graph_version
= traverse_obj(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id
,
210 'Extracting graph version', query
={'bvid': video_id, 'cid': cid}
),
211 ('data', 'interaction', 'graph_version', {int_or_none}
))
212 cid_edges
= self
._get
_divisions
(video_id
, graph_version
, {1: {'cid': cid}
}, 1)
213 for cid
, edges
in cid_edges
.items():
214 play_info
= self
._download
_playinfo
(video_id
, cid
)
217 'id': f
'{video_id}_{cid}',
218 'title': f
'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
219 'formats': self
.extract_formats(play_info
),
220 'description': f
'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
222 'subtitles': self
.extract_subtitles(video_id
, cid
),
226 class BiliBiliIE(BilibiliBaseIE
):
227 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
238 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
248 'note': 'old av URL version',
249 'url': 'http://www.bilibili.com/video/av1074402/',
251 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
258 'upload_date': '20140420',
259 'timestamp': 1397983878,
260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
262 'comment_count': int,
266 'params': {'skip_download': True}
,
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的'
274 'playlist_count': 18,
277 'id': 'BV1bK411W797_p1',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
281 'timestamp': 1589601697,
282 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
286 'comment_count': int,
287 'upload_date': '20200516',
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
297 'id': 'BV1bK411W797_p1',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
301 'timestamp': 1589601697,
302 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
306 'comment_count': int,
307 'upload_date': '20200516',
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
316 'id': 'BV12N4y1M7rh',
318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
322 'upload_date': '20220709',
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
329 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2'
332 'params': {'listsubtitles': True}
,
334 'url': 'https://www.bilibili.com/video/av8903802/',
336 'id': 'BV13x41117TL',
338 'title': '阿滴英文|英文歌分享#6 "Closer',
339 'upload_date': '20170301',
340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
341 'timestamp': 1488353834,
342 'uploader_id': '65880958',
344 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
347 'comment_count': int,
352 'skip_download': True,
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
358 'id': 'BV1vL411G7N7',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
365 'uploader': '爱喝咖啡的当麻',
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
372 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
374 'params': {'skip_download': True}
,
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
379 'id': 'BV1wP4y1P72h',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
387 'uploader_id': '528182630',
390 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
392 'params': {'skip_download': True}
,
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
397 'id': 'BV1ay4y1d77f',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
408 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
410 'params': {'skip_download': True}
,
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
421 'uploader': '钉宫妮妮Ninico',
423 'uploader_id': '8881297',
424 'comment_count': int,
427 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
429 'playlist_count': 33,
432 'id': 'BV1af4y1H7ga_400950101',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
439 'uploader': '钉宫妮妮Ninico',
441 'uploader_id': '8881297',
442 'comment_count': int,
445 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
458 'season_id': '28609',
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
475 'skip': 'supporter-only video',
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
483 'skip': 'login required',
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
491 'skip': 'geo-restricted',
494 def _real_extract(self
, url
):
495 video_id
= self
._match
_id
(url
)
496 headers
= self
.geo_verification_headers()
497 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
, headers
=headers
)
498 if not self
._match
_valid
_url
(urlh
.url
):
499 return self
.url_result(urlh
.url
)
501 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
503 is_festival
= 'videoData' not in initial_state
505 video_data
= initial_state
['videoInfo']
507 play_info_obj
= self
._search
_json
(
508 r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
, fatal
=False)
509 if not play_info_obj
:
510 if traverse_obj(initial_state
, ('error', 'trueCode')) == -403:
511 self
.raise_login_required()
512 if traverse_obj(initial_state
, ('error', 'trueCode')) == -404:
513 raise ExtractorError(
514 'This video may be deleted or geo-restricted. '
515 'You might want to try a VPN or a proxy server (with --proxy)', expected
=True)
516 play_info
= traverse_obj(play_info_obj
, ('data', {dict}
))
518 if traverse_obj(play_info_obj
, 'code') == 87007:
519 toast
= get_element_by_class('tips-toast', webpage
) or ''
521 f
'{get_element_by_class("belongs-to", toast) or ""},'
522 + (get_element_by_class('level', toast
) or ''))
523 raise ExtractorError(
524 f
'This is a supporter-only video: {msg}. {self._login_hint()}', expected
=True)
525 raise ExtractorError('Failed to extract play info')
526 video_data
= initial_state
['videoData']
528 video_id
, title
= video_data
['bvid'], video_data
.get('title')
530 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
531 page_list_json
= not is_festival
and traverse_obj(
533 'https://api.bilibili.com/x/player/pagelist', video_id
,
534 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
535 note
='Extracting videos in anthology', headers
=headers
),
536 'data', expected_type
=list) or []
537 is_anthology
= len(page_list_json
) > 1
539 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
540 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
541 return self
.playlist_from_matches(
542 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
543 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
546 part_id
= part_id
or 1
547 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
549 aid
= video_data
.get('aid')
550 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
552 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
556 play_info
= self
._download
_playinfo
(video_id
, cid
, headers
=headers
)
558 festival_info
= traverse_obj(initial_state
, {
559 'uploader': ('videoInfo', 'upName'),
560 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
561 'like_count': ('videoStatus', 'like', {int_or_none}
),
562 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
566 **traverse_obj(initial_state
, {
567 'uploader': ('upData', 'name'),
568 'uploader_id': ('upData', 'mid', {str_or_none}
),
569 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
570 'tags': ('tags', ..., 'tag_name'),
571 'thumbnail': ('videoData', 'pic', {url_or_none}
),
574 **traverse_obj(video_data
, {
575 'description': 'desc',
576 'timestamp': ('pubdate', {int_or_none}
),
577 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
578 'comment_count': ('stat', 'reply', {int_or_none}
),
580 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
581 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
583 'http_headers': {'Referer': url}
,
586 is_interactive
= traverse_obj(video_data
, ('rights', 'is_stein_gate'))
588 return self
.playlist_result(
589 self
._get
_interactive
_entries
(video_id
, cid
, metainfo
), **metainfo
, **{
590 'duration': traverse_obj(initial_state
, ('videoData', 'duration', {int_or_none}
)),
591 '__post_extractor': self
.extract_comments(aid
),
596 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
597 'chapters': self
._get
_chapters
(aid
, cid
),
598 'subtitles': self
.extract_subtitles(video_id
, cid
),
599 'formats': self
.extract_formats(play_info
),
600 '__post_extractor': self
.extract_comments(aid
),
604 class BiliBiliBangumiIE(BilibiliBaseIE
):
605 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
608 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
617 'episode': 'forever/ef',
618 'episode_id': '21495',
619 'episode_number': 12,
620 'title': '12 forever/ef',
621 'duration': 1420.791,
622 'timestamp': 1320412200,
623 'upload_date': '20111104',
624 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
627 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
634 'season_id': '26801',
637 'episode_id': '267851',
640 'duration': 1425.256,
641 'timestamp': 1554566400,
642 'upload_date': '20190406',
643 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
645 'skip': 'Geo-restricted',
647 'note': 'a making-of which falls outside main section',
648 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
655 'season_id': '26801',
658 'episode_id': '345120',
659 'episode_number': 27,
661 'duration': 1922.129,
662 'timestamp': 1602853860,
663 'upload_date': '20201016',
664 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
668 def _real_extract(self
, url
):
669 episode_id
= self
._match
_id
(url
)
670 headers
= self
.geo_verification_headers()
671 webpage
= self
._download
_webpage
(url
, episode_id
, headers
=headers
)
673 if '您所在的地区无法观看本片' in webpage
:
674 raise GeoRestrictedError('This video is restricted')
675 elif '正在观看预览,大会员免费看全片' in webpage
:
676 self
.raise_login_required('This video is for premium members only')
678 headers
['Referer'] = url
679 play_info
= self
._download
_json
(
680 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id
,
681 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
683 premium_only
= play_info
.get('code') == -10403
684 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
686 formats
= self
.extract_formats(play_info
)
687 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
688 self
.raise_login_required('This video is for premium members only')
690 bangumi_info
= self
._download
_json
(
691 'https://api.bilibili.com/pgc/view/web/season', episode_id
, 'Get episode details',
692 query
={'ep_id': episode_id}
, headers
=headers
)['result']
694 episode_number
, episode_info
= next((
695 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
696 bangumi_info
, (('episodes', ('section', ..., 'episodes')), ..., {dict}
)), 1)
697 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
699 season_id
= bangumi_info
.get('season_id')
700 season_number
, season_title
= season_id
and next((
701 (idx
+ 1, e
.get('season_title')) for idx
, e
in enumerate(
702 traverse_obj(bangumi_info
, ('seasons', ...)))
703 if e
.get('season_id') == season_id
706 aid
= episode_info
.get('aid')
711 **traverse_obj(bangumi_info
, {
712 'series': ('series', 'series_title', {str}
),
713 'series_id': ('series', 'series_id', {str_or_none}
),
714 'thumbnail': ('square_cover', {url_or_none}
),
716 **traverse_obj(episode_info
, {
717 'episode': ('long_title', {str}
),
718 'episode_number': ('title', {int_or_none}
, {lambda x: x or episode_number}
),
719 'timestamp': ('pub_time', {int_or_none}
),
720 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)}
,
722 'episode_id': episode_id
,
723 'season': str_or_none(season_title
),
724 'season_id': str_or_none(season_id
),
725 'season_number': season_number
,
726 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
727 'subtitles': self
.extract_subtitles(episode_id
, episode_info
.get('cid'), aid
=aid
),
728 '__post_extractor': self
.extract_comments(aid
),
729 'http_headers': {'Referer': url}
,
733 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
734 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
736 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
739 'title': 'CAROLE & TUESDAY',
740 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
742 'playlist_mincount': 25,
744 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
747 'title': '攻壳机动队 S.A.C. 2nd GIG',
748 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
750 'playlist_count': 26,
760 'episode': '再启动 REEMBODY',
761 'episode_id': '68540',
763 'title': '1 再启动 REEMBODY',
764 'duration': 1525.777,
765 'timestamp': 1425074413,
766 'upload_date': '20150227',
767 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
772 def _real_extract(self
, url
):
773 media_id
= self
._match
_id
(url
)
774 webpage
= self
._download
_webpage
(url
, media_id
)
776 initial_state
= self
._search
_json
(
777 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)
778 ss_id
= initial_state
['mediaInfo']['season_id']
780 return self
.playlist_result(
781 self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
,
782 **traverse_obj(initial_state
, ('mediaInfo', {
783 'title': ('title', {str}
),
784 'description': ('evaluate', {str}
),
788 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
789 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
791 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
795 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
797 'playlist_mincount': 26
799 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
803 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
805 'playlist_count': 13,
816 'episode_id': '50188',
819 'duration': 1436.992,
820 'timestamp': 1343185080,
821 'upload_date': '20120725',
822 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
827 def _real_extract(self
, url
):
828 ss_id
= self
._match
_id
(url
)
829 webpage
= self
._download
_webpage
(url
, ss_id
)
830 metainfo
= traverse_obj(
831 self
._search
_json
(r
'<script[^>]+type="application/ld\+json"[^>]*>', webpage
, 'info', ss_id
),
832 ('itemListElement', ..., {
833 'title': ('name', {str}
),
834 'description': ('description', {str}
),
837 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
, **metainfo
)
840 class BilibiliCheeseBaseIE(BilibiliBaseIE
):
841 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
843 def _extract_episode(self
, season_info
, ep_id
):
844 episode_info
= traverse_obj(season_info
, (
845 'episodes', lambda _
, v
: v
['id'] == int(ep_id
)), get_all
=False)
846 aid
, cid
= episode_info
['aid'], episode_info
['cid']
848 if traverse_obj(episode_info
, 'ep_status') == -1:
849 raise ExtractorError('This course episode is not yet available.', expected
=True)
850 if not traverse_obj(episode_info
, 'playable'):
851 self
.raise_login_required('You need to purchase the course to download this episode')
853 play_info
= self
._download
_json
(
854 'https://api.bilibili.com/pugv/player/web/playurl', ep_id
,
855 query
={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}
,
856 headers
=self
._HEADERS
, note
='Downloading playinfo')['data']
859 'id': str_or_none(ep_id
),
860 'episode_id': str_or_none(ep_id
),
861 'formats': self
.extract_formats(play_info
),
862 'extractor_key': BilibiliCheeseIE
.ie_key(),
863 'extractor': BilibiliCheeseIE
.IE_NAME
,
864 'webpage_url': f
'https://www.bilibili.com/cheese/play/ep{ep_id}',
865 **traverse_obj(episode_info
, {
866 'episode': ('title', {str}
),
867 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}
,
868 'alt_title': ('subtitle', {str}
),
869 'duration': ('duration', {int_or_none}
),
870 'episode_number': ('index', {int_or_none}
),
871 'thumbnail': ('cover', {url_or_none}
),
872 'timestamp': ('release_date', {int_or_none}
),
873 'view_count': ('play', {int_or_none}
),
875 **traverse_obj(season_info
, {
876 'uploader': ('up_info', 'uname', {str}
),
877 'uploader_id': ('up_info', 'mid', {str_or_none}
),
879 'subtitles': self
.extract_subtitles(ep_id
, cid
, aid
=aid
),
880 '__post_extractor': self
.extract_comments(aid
),
881 'http_headers': self
._HEADERS
,
884 def _download_season_info(self
, query_key
, video_id
):
885 return self
._download
_json
(
886 f
'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id
,
887 headers
=self
._HEADERS
, note
='Downloading season info')['data']
890 class BilibiliCheeseIE(BilibiliCheeseBaseIE
):
891 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
893 'url': 'https://www.bilibili.com/cheese/play/ep229832',
897 'title': '1 - 课程先导片',
898 'alt_title': '视频课 · 3分41秒',
900 'uploader_id': '316568752',
902 'episode_id': '229832',
905 'timestamp': 1695549606,
906 'upload_date': '20230924',
907 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
912 def _real_extract(self
, url
):
913 ep_id
= self
._match
_id
(url
)
914 return self
._extract
_episode
(self
._download
_season
_info
('ep_id', ep_id
), ep_id
)
917 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE
):
918 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
920 'url': 'https://www.bilibili.com/cheese/play/ss5918',
923 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
924 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
930 'title': '1 - 课程先导片',
931 'alt_title': '视频课 · 3分41秒',
933 'uploader_id': '316568752',
935 'episode_id': '229832',
938 'timestamp': 1695549606,
939 'upload_date': '20230924',
940 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
944 'params': {'playlist_items': '1'}
,
946 'url': 'https://www.bilibili.com/cheese/play/ss5918',
949 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
950 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
952 'playlist_mincount': 5,
953 'skip': 'paid video in list',
956 def _get_cheese_entries(self
, season_info
):
957 for ep_id
in traverse_obj(season_info
, ('episodes', lambda _
, v
: v
['episode_can_view'], 'id')):
958 yield self
._extract
_episode
(season_info
, ep_id
)
960 def _real_extract(self
, url
):
961 season_id
= self
._match
_id
(url
)
962 season_info
= self
._download
_season
_info
('season_id', season_id
)
964 return self
.playlist_result(
965 self
._get
_cheese
_entries
(season_info
), season_id
,
966 **traverse_obj(season_info
, {
967 'title': ('title', {str}
),
968 'description': ('subtitle', {str}
),
972 class BilibiliSpaceBaseIE(InfoExtractor
):
973 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
974 first_page
= fetch_page(0)
975 metadata
= get_metadata(first_page
)
977 paged_list
= InAdvancePagedList(
978 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
979 metadata
['page_count'], metadata
['page_size'])
981 return metadata
, paged_list
984 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
985 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
987 'url': 'https://space.bilibili.com/3985676/video',
991 'playlist_mincount': 178,
993 'url': 'https://space.bilibili.com/313580179/video',
997 'playlist_mincount': 92,
1000 def _extract_signature(self
, playlist_id
):
1001 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
1003 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
1004 img_key
= traverse_obj(
1005 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
1006 sub_key
= traverse_obj(
1007 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
1009 session_key
= img_key
+ sub_key
1011 signature_values
= []
1013 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1014 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1015 57, 62, 11, 36, 20, 34, 44, 52
1017 char_at_position
= try_call(lambda: session_key
[position
])
1018 if char_at_position
:
1019 signature_values
.append(char_at_position
)
1021 return ''.join(signature_values
)[:32]
1023 def _real_extract(self
, url
):
1024 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
1025 if not is_video_url
:
1026 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1027 'To download audios, add a "/audio" to the URL')
1029 signature
= self
._extract
_signature
(playlist_id
)
1031 def fetch_page(page_idx
):
1036 'order_avoided': 'true',
1041 'web_location': 1550101,
1042 'wts': int(time
.time()),
1044 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1047 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
1048 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
,
1049 headers
={'referer': url}
)
1050 except ExtractorError
as e
:
1051 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
1052 raise ExtractorError(
1053 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
1055 if response
['code'] in (-352, -401):
1056 raise ExtractorError(
1057 f
'Request is blocked by server ({-response["code"]}), '
1058 'please add cookies, wait and try later.', expected
=True)
1059 return response
['data']
1061 def get_metadata(page_data
):
1062 page_size
= page_data
['page']['ps']
1063 entry_count
= page_data
['page']['count']
1065 'page_count': math
.ceil(entry_count
/ page_size
),
1066 'page_size': page_size
,
1069 def get_entries(page_data
):
1070 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
1071 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
1073 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1074 return self
.playlist_result(paged_list
, playlist_id
)
1077 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
1078 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1080 'url': 'https://space.bilibili.com/313580179/audio',
1084 'playlist_mincount': 1,
1087 def _real_extract(self
, url
):
1088 playlist_id
= self
._match
_id
(url
)
1090 def fetch_page(page_idx
):
1091 return self
._download
_json
(
1092 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
1093 note
=f
'Downloading page {page_idx}',
1094 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
1096 def get_metadata(page_data
):
1098 'page_count': page_data
['pageCount'],
1099 'page_size': page_data
['pageSize'],
1102 def get_entries(page_data
):
1103 for entry
in page_data
.get('data', []):
1104 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
1106 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1107 return self
.playlist_result(paged_list
, playlist_id
)
1110 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
1111 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
1112 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
1113 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
1115 def _get_uploader(self
, uid
, playlist_id
):
1116 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
1117 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
1119 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
1120 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1121 metadata
.pop('page_count', None)
1122 metadata
.pop('page_size', None)
1123 return metadata
, page_list
1126 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
1127 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1129 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1131 'id': '2142762_57445',
1132 'title': '【完结】《底特律 变人》全结局流程解说',
1135 'uploader_id': '2142762',
1138 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1140 'playlist_mincount': 31,
1143 def _real_extract(self
, url
):
1144 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1145 playlist_id
= f
'{mid}_{sid}'
1147 def fetch_page(page_idx
):
1148 return self
._download
_json
(
1149 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1150 playlist_id
, note
=f
'Downloading page {page_idx}',
1151 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
1153 def get_metadata(page_data
):
1154 page_size
= page_data
['page']['page_size']
1155 entry_count
= page_data
['page']['total']
1157 'page_count': math
.ceil(entry_count
/ page_size
),
1158 'page_size': page_size
,
1159 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1160 **traverse_obj(page_data
, {
1161 'title': ('meta', 'name', {str}
),
1162 'description': ('meta', 'description', {str}
),
1163 'uploader_id': ('meta', 'mid', {str_or_none}
),
1164 'timestamp': ('meta', 'ptime', {int_or_none}
),
1165 'thumbnail': ('meta', 'cover', {url_or_none}
),
1169 def get_entries(page_data
):
1170 return self
._get
_entries
(page_data
, 'archives')
1172 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1173 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1176 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
1177 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1179 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1181 'id': '1958703906_547718',
1183 'description': '直播回放',
1184 'uploader': '靡烟miya',
1185 'uploader_id': '1958703906',
1186 'timestamp': 1637985853,
1187 'upload_date': '20211127',
1188 'modified_timestamp': int,
1189 'modified_date': str,
1191 'playlist_mincount': 513,
1194 def _real_extract(self
, url
):
1195 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1196 playlist_id
= f
'{mid}_{sid}'
1197 playlist_meta
= traverse_obj(self
._download
_json
(
1198 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False
1200 'title': ('data', 'meta', 'name', {str}
),
1201 'description': ('data', 'meta', 'description', {str}
),
1202 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
1203 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
1204 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
1207 def fetch_page(page_idx
):
1208 return self
._download
_json
(
1209 'https://api.bilibili.com/x/series/archives',
1210 playlist_id
, note
=f
'Downloading page {page_idx}',
1211 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
1213 def get_metadata(page_data
):
1214 page_size
= page_data
['page']['size']
1215 entry_count
= page_data
['page']['total']
1217 'page_count': math
.ceil(entry_count
/ page_size
),
1218 'page_size': page_size
,
1219 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1223 def get_entries(page_data
):
1224 return self
._get
_entries
(page_data
, 'archives')
1226 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1227 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1230 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
1231 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1233 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1239 'uploader_id': '84912',
1240 'timestamp': 1604905176,
1241 'upload_date': '20201109',
1242 'modified_timestamp': int,
1243 'modified_date': str,
1244 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1248 'playlist_mincount': 22,
1250 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1251 'only_matching': True,
1254 def _real_extract(self
, url
):
1255 fid
= self
._match
_id
(url
)
1257 list_info
= self
._download
_json
(
1258 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1259 fid
, note
='Downloading favlist metadata')
1260 if list_info
['code'] == -403:
1261 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
1263 entries
= self
._get
_entries
(self
._download
_json
(
1264 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1265 fid
, note
='Download favlist entries'), 'data')
1267 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
1268 'title': ('title', {str}
),
1269 'description': ('intro', {str}
),
1270 'uploader': ('upper', 'name', {str}
),
1271 'uploader_id': ('upper', 'mid', {str_or_none}
),
1272 'timestamp': ('ctime', {int_or_none}
),
1273 'modified_timestamp': ('mtime', {int_or_none}
),
1274 'thumbnail': ('cover', {url_or_none}
),
1275 'view_count': ('cnt_info', 'play', {int_or_none}
),
1276 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
1280 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
1281 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1283 'url': 'https://www.bilibili.com/watchlater/#/list',
1284 'info_dict': {'id': 'watchlater'}
,
1285 'playlist_mincount': 0,
1286 'skip': 'login required',
1289 def _real_extract(self
, url
):
1290 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
1291 watchlater_info
= self
._download
_json
(
1292 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
1293 if watchlater_info
['code'] == -101:
1294 self
.raise_login_required(msg
='You need to login to access your watchlater list')
1295 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
1296 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
1299 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
1300 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1302 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1306 'uploader': '靡烟miya',
1307 'uploader_id': '1958703906',
1308 'timestamp': 1637985853,
1309 'upload_date': '20211127',
1311 'playlist_mincount': 513,
1313 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1315 'id': 'BV1DU4y1r7tz',
1317 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1318 'upload_date': '20220820',
1320 'timestamp': 1661016330,
1321 'uploader_id': '1958703906',
1322 'uploader': '靡烟miya',
1323 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1324 'duration': 9552.903,
1326 'comment_count': int,
1329 '_old_archive_ids': ['bilibili 687146339_part1'],
1331 'params': {'noplaylist': True}
,
1333 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1337 'playlist_mincount': 513,
1338 'skip': 'redirect url',
1340 'url': 'https://www.bilibili.com/list/ml1103407912',
1342 'id': '3_1103407912',
1345 'uploader_id': '84912',
1346 'timestamp': 1604905176,
1347 'upload_date': '20201109',
1348 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1350 'playlist_mincount': 22,
1352 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1354 'id': '3_1103407912',
1356 'playlist_mincount': 22,
1357 'skip': 'redirect url',
1359 'url': 'https://www.bilibili.com/list/watchlater',
1360 'info_dict': {'id': 'watchlater'}
,
1361 'playlist_mincount': 0,
1362 'skip': 'login required',
1364 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1365 'info_dict': {'id': 'watchlater'}
,
1366 'playlist_mincount': 0,
1367 'skip': 'login required',
1370 def _extract_medialist(self
, query
, list_id
):
1371 for page_num
in itertools
.count(1):
1372 page_data
= self
._download
_json
(
1373 'https://api.bilibili.com/x/v2/medialist/resource/list',
1374 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}'
1376 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
1377 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
1378 if not page_data
.get('has_more', False):
1381 def _real_extract(self
, url
):
1382 list_id
= self
._match
_id
(url
)
1384 bvid
= traverse_obj(parse_qs(url
), ('bvid', 0))
1385 if not self
._yes
_playlist
(list_id
, bvid
):
1386 return self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
)
1388 webpage
= self
._download
_webpage
(url
, list_id
)
1389 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
1390 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
1391 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
1392 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
1393 if error_code
== -400 and list_id
== 'watchlater':
1394 self
.raise_login_required('You need to login to access your watchlater playlist')
1395 elif error_code
== -403:
1396 self
.raise_login_required('This is a private playlist. You need to login as its owner')
1397 elif error_code
== 11010:
1398 raise ExtractorError('Playlist is no longer available', expected
=True)
1399 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
1403 'with_current': False,
1404 **traverse_obj(initial_state
, {
1405 'type': ('playlist', 'type', {int_or_none}
),
1406 'biz_id': ('playlist', 'id', {int_or_none}
),
1407 'tid': ('tid', {int_or_none}
),
1408 'sort_field': ('sortFiled', {int_or_none}
),
1409 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
1413 'id': f
'{query["type"]}_{query["biz_id"]}',
1414 **traverse_obj(initial_state
, ('mediaListInfo', {
1415 'title': ('title', {str}
),
1416 'uploader': ('upper', 'name', {str}
),
1417 'uploader_id': ('upper', 'mid', {str_or_none}
),
1418 'timestamp': ('ctime', {int_or_none}
),
1419 'thumbnail': ('cover', {url_or_none}
),
1422 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
1425 class BilibiliCategoryIE(InfoExtractor
):
1426 IE_NAME
= 'Bilibili category extractor'
1427 _MAX_RESULTS
= 1000000
1428 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1430 'url': 'https://www.bilibili.com/v/kichiku/mad',
1432 'id': 'kichiku: mad',
1433 'title': 'kichiku: mad'
1435 'playlist_mincount': 45,
1441 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
1442 parsed_json
= self
._download
_json
(
1443 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
1444 note
='Extracting results from page %s of %s' % (page_num
, num_pages
))
1446 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
1448 raise ExtractorError('Failed to retrieve video list for page %d' % page_num
)
1450 for video
in video_list
:
1451 yield self
.url_result(
1452 'https://www.bilibili.com/video/%s' % video
['bvid'], 'BiliBili', video
['bvid'])
1454 def _entries(self
, category
, subcategory
, query
):
1455 # map of categories : subcategories : RIDs
1459 'manual_vocaloid': 126,
1466 if category
not in rid_map
:
1467 raise ExtractorError(
1468 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1469 if subcategory
not in rid_map
[category
]:
1470 raise ExtractorError(
1471 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1472 rid_value
= rid_map
[category
][subcategory
]
1474 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1475 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1476 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1477 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1478 if count
is None or not size
:
1479 raise ExtractorError('Failed to calculate either page count or size')
1481 num_pages
= math
.ceil(count
/ size
)
1483 return OnDemandPagedList(functools
.partial(
1484 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1486 def _real_extract(self
, url
):
1487 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1488 query
= '%s: %s' % (category
, subcategory
)
1490 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1493 class BiliBiliSearchIE(SearchInfoExtractor
):
1494 IE_DESC
= 'Bilibili video search'
1495 _MAX_RESULTS
= 100000
1496 _SEARCH_KEY
= 'bilisearch'
1498 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1499 'playlist_count': 3,
1501 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1502 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1506 'id': 'BV1n44y1Q7sc',
1508 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1509 'timestamp': 1669889987,
1510 'upload_date': '20221201',
1511 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1513 'uploader': '靡烟miya',
1514 'duration': 123.156,
1515 'uploader_id': '1958703906',
1516 'comment_count': int,
1519 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1520 '_old_archive_ids': ['bilibili 988222410_part1'],
1525 def _search_results(self
, query
):
1526 if not self
._get
_cookies
('https://api.bilibili.com').get('buvid3'):
1527 self
._set
_cookie
('.bilibili.com', 'buvid3', f
'{uuid.uuid4()}infoc')
1528 for page_num
in itertools
.count(1):
1529 videos
= self
._download
_json
(
1530 'https://api.bilibili.com/x/web-interface/search/type', query
,
1531 note
=f
'Extracting results from page {page_num}', query
={
1532 'Search_key': query
,
1538 '__refresh__': 'true',
1539 'search_type': 'video',
1542 })['data'].get('result')
1545 for video
in videos
:
1546 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1549 class BilibiliAudioBaseIE(InfoExtractor
):
1550 def _call_api(self
, path
, sid
, query
=None):
1552 query
= {'sid': sid}
1553 return self
._download
_json
(
1554 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1555 sid
, query
=query
)['data']
1558 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1559 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1561 'url': 'https://www.bilibili.com/audio/au1003142',
1562 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1566 'title': '【tsukimi】YELLOW / 神山羊',
1567 'artist': 'tsukimi',
1568 'comment_count': int,
1569 'description': 'YELLOW的mp3版!',
1576 'thumbnail': r
're:^https?://.+\.jpg',
1577 'timestamp': 1564836614,
1578 'upload_date': '20190803',
1579 'uploader': 'tsukimi-つきみぐー',
1584 def _real_extract(self
, url
):
1585 au_id
= self
._match
_id
(url
)
1587 play_data
= self
._call
_api
('url', au_id
)
1589 'url': play_data
['cdns'][0],
1590 'filesize': int_or_none(play_data
.get('size')),
1594 for a_format
in formats
:
1595 a_format
.setdefault('http_headers', {}).update({
1599 song
= self
._call
_api
('song/info', au_id
)
1600 title
= song
['title']
1601 statistic
= song
.get('statistic') or {}
1604 lyric
= song
.get('lyric')
1616 'artist': song
.get('author'),
1617 'comment_count': int_or_none(statistic
.get('comment')),
1618 'description': song
.get('intro'),
1619 'duration': int_or_none(song
.get('duration')),
1620 'subtitles': subtitles
,
1621 'thumbnail': song
.get('cover'),
1622 'timestamp': int_or_none(song
.get('passtime')),
1623 'uploader': song
.get('uname'),
1624 'view_count': int_or_none(statistic
.get('play')),
1628 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1629 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1631 'url': 'https://www.bilibili.com/audio/am10624',
1634 'title': '每日新曲推荐(每日11:00更新)',
1635 'description': '每天11:00更新,为你推送最新音乐',
1637 'playlist_count': 19,
1640 def _real_extract(self
, url
):
1641 am_id
= self
._match
_id
(url
)
1643 songs
= self
._call
_api
(
1644 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1648 sid
= str_or_none(song
.get('id'))
1651 entries
.append(self
.url_result(
1652 'https://www.bilibili.com/audio/au' + sid
,
1653 BilibiliAudioIE
.ie_key(), sid
))
1656 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1657 album_title
= album_data
.get('title')
1659 for entry
in entries
:
1660 entry
['album'] = album_title
1661 return self
.playlist_result(
1662 entries
, am_id
, album_title
, album_data
.get('intro'))
1664 return self
.playlist_result(entries
, am_id
)
1667 class BiliBiliPlayerIE(InfoExtractor
):
1668 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1670 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1671 'only_matching': True,
1674 def _real_extract(self
, url
):
1675 video_id
= self
._match
_id
(url
)
1676 return self
.url_result(
1677 'http://www.bilibili.tv/video/av%s/' % video_id
,
1678 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1681 class BiliIntlBaseIE(InfoExtractor
):
1682 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1683 _NETRC_MACHINE
= 'biliintl'
1684 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
1686 def _call_api(self
, endpoint
, *args
, **kwargs
):
1687 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1688 if json
.get('code'):
1689 if json
['code'] in (10004004, 10004005, 10023006):
1690 self
.raise_login_required()
1691 elif json
['code'] == 10004001:
1692 self
.raise_geo_restricted()
1694 if json
.get('message') and str(json
['code']) != json
['message']:
1695 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1697 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1698 if kwargs
.get('fatal'):
1699 raise ExtractorError(errmsg
)
1701 self
.report_warning(errmsg
)
1702 return json
.get('data')
1704 def json2srt(self
, json
):
1706 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1707 for i
, line
in enumerate(traverse_obj(json
, (
1708 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1711 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1712 sub_json
= self
._call
_api
(
1713 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1714 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1717 's_locale': 'en_US',
1718 'episode_id': ep_id
,
1722 fetched_urls
= set()
1723 for sub
in traverse_obj(sub_json
, (('subtitles', 'video_subtitle'), ..., {dict}
)):
1724 for url
in traverse_obj(sub
, ((None, 'ass', 'srt'), 'url', {url_or_none}
)):
1725 if url
in fetched_urls
:
1727 fetched_urls
.add(url
)
1728 sub_ext
= determine_ext(url
)
1729 sub_lang
= sub
.get('lang_key') or 'en'
1731 if sub_ext
== 'ass':
1732 subtitles
.setdefault(sub_lang
, []).append({
1736 elif sub_ext
== 'json':
1737 sub_data
= self
._download
_json
(
1738 url
, ep_id
or aid
, fatal
=False,
1739 note
=f
'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1740 errnote
='Unable to download subtitles')
1743 subtitles
.setdefault(sub_lang
, []).append({
1745 'data': self
.json2srt(sub_data
),
1748 self
.report_warning('Unexpected subtitle extension', ep_id
or aid
)
1752 def _get_formats(self
, *, ep_id
=None, aid
=None):
1753 video_json
= self
._call
_api
(
1754 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1755 errnote
='Unable to download video formats', query
=filter_dict({
1760 video_json
= video_json
['playurl']
1762 for vid
in video_json
.get('video') or []:
1763 video_res
= vid
.get('video_resource') or {}
1764 video_info
= vid
.get('stream_info') or {}
1765 if not video_res
.get('url'):
1768 'url': video_res
['url'],
1770 'format_note': video_info
.get('desc_words'),
1771 'width': video_res
.get('width'),
1772 'height': video_res
.get('height'),
1773 'vbr': video_res
.get('bandwidth'),
1775 'vcodec': video_res
.get('codecs'),
1776 'filesize': video_res
.get('size'),
1778 for aud
in video_json
.get('audio_resource') or []:
1779 if not aud
.get('url'):
1784 'abr': aud
.get('bandwidth'),
1785 'acodec': aud
.get('codecs'),
1787 'filesize': aud
.get('size'),
1792 def _parse_video_metadata(self
, video_data
):
1794 'title': video_data
.get('title_display') or video_data
.get('title'),
1795 'description': video_data
.get('desc'),
1796 'thumbnail': video_data
.get('cover'),
1797 'timestamp': unified_timestamp(video_data
.get('formatted_pub_date')),
1798 'episode_number': int_or_none(self
._search
_regex
(
1799 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1802 def _perform_login(self
, username
, password
):
1803 if not Cryptodome
.RSA
:
1804 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1806 key_data
= self
._download
_json
(
1807 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1808 note
='Downloading login key', errnote
='Unable to download login key')['data']
1810 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1811 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode('utf-8'))
1812 login_post
= self
._download
_json
(
1813 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1814 'username': username
,
1815 'password': base64
.b64encode(password_hash
).decode('ascii'),
1817 's_locale': 'en_US',
1819 }), note
='Logging in', errnote
='Unable to log in')
1820 if login_post
.get('code'):
1821 if login_post
.get('message'):
1822 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1824 raise ExtractorError('Unable to log in')
1827 class BiliIntlIE(BiliIntlBaseIE
):
1828 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1831 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1835 'title': 'E2 - The First Night',
1836 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1837 'episode_number': 2,
1838 'upload_date': '20201009',
1839 'episode': 'Episode 2',
1840 'timestamp': 1602259500,
1841 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1845 'title': '<Untitled Chapter 1>'
1847 'start_time': 76.242,
1848 'end_time': 161.161,
1851 'start_time': 1325.742,
1852 'end_time': 1403.903,
1858 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1862 'title': 'E3 - Who?',
1863 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1864 'episode_number': 3,
1865 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1866 'episode': 'Episode 3',
1867 'upload_date': '20211219',
1868 'timestamp': 1639928700,
1872 'title': '<Untitled Chapter 1>'
1878 'start_time': 1173.0,
1879 'end_time': 1259.535,
1884 # Subtitle with empty content
1885 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1889 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1890 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1891 'episode_number': 140,
1893 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1895 # episode comment extraction
1896 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1900 'timestamp': 1604057820,
1901 'upload_date': '20201030',
1902 'episode_number': 5,
1903 'title': 'E5 - My Own Steel',
1904 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1905 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1906 'episode': 'Episode 5',
1907 'comment_count': int,
1911 'title': '<Untitled Chapter 1>'
1917 'start_time': 1290.0,
1926 # user generated content comment extraction
1927 'url': 'https://www.bilibili.tv/en/video/2045730385',
1931 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1932 'timestamp': 1667891924,
1933 'upload_date': '20221108',
1934 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1935 'comment_count': int,
1936 'thumbnail': r
're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1942 # episode id without intro and outro
1943 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1947 'title': 'E1 - Operation \'Strix\' <Owl>',
1948 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1949 'timestamp': 1649516400,
1950 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1951 'episode': 'Episode 1',
1952 'episode_number': 1,
1953 'upload_date': '20220409',
1956 'url': 'https://www.biliintl.com/en/play/34613/341736',
1957 'only_matching': True,
1959 # User-generated content (as opposed to a series licensed from a studio)
1960 'url': 'https://bilibili.tv/en/video/2019955076',
1961 'only_matching': True,
1963 # No language in URL
1964 'url': 'https://www.bilibili.tv/video/2019955076',
1965 'only_matching': True,
1967 # Uppercase language in URL
1968 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1969 'only_matching': True,
1973 def _make_url(video_id
, series_id
=None):
1975 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1976 return f
'https://www.bilibili.tv/en/video/{video_id}'
1978 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1979 url
, smuggled_data
= unsmuggle_url(url
, {})
1980 if smuggled_data
.get('title'):
1981 return smuggled_data
1983 webpage
= self
._download
_webpage
(url
, video_id
)
1986 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1987 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1988 video_data
= traverse_obj(
1989 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1991 if season_id
and not video_data
:
1992 # Non-Bstation layout, read through episode list
1993 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1994 video_data
= traverse_obj(season_json
, (
1995 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
1996 ), expected_type
=dict, get_all
=False)
1998 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
2000 self
._parse
_video
_metadata
(video_data
), {
2001 'title': get_element_by_class(
2002 'bstar-meta__title', webpage
) or self
._html
_search
_meta
('og:title', webpage
),
2003 'description': get_element_by_class(
2004 'bstar-meta__desc', webpage
) or self
._html
_search
_meta
('og:description', webpage
),
2005 }, self
._search
_json
_ld
(webpage
, video_id
, default
={}))
2007 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
2008 comment_api_raw_data
= self
._download
_json
(
2009 'https://api.bilibili.tv/reply/web/detail', display_id
,
2010 note
=f
'Downloading reply comment of {root_id} - {next_id}',
2013 'ps': 20, # comment's reply per page (default: 3)
2018 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2020 'author': traverse_obj(replies
, ('member', 'name')),
2021 'author_id': traverse_obj(replies
, ('member', 'mid')),
2022 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2023 'text': traverse_obj(replies
, ('content', 'message')),
2024 'id': replies
.get('rpid'),
2025 'like_count': int_or_none(replies
.get('like_count')),
2026 'parent': replies
.get('parent'),
2027 'timestamp': unified_timestamp(replies
.get('ctime_text'))
2030 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2031 yield from self
._get
_comments
_reply
(
2032 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
2034 def _get_comments(self
, video_id
, ep_id
):
2035 for i
in itertools
.count(0):
2036 comment_api_raw_data
= self
._download
_json
(
2037 'https://api.bilibili.tv/reply/web/root', video_id
,
2038 note
=f
'Downloading comment page {i + 1}',
2041 'pn': i
, # page number
2042 'ps': 20, # comment per page (default: 20)
2044 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
2045 'sort_type': 1, # 1: best, 2: recent
2048 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2050 'author': traverse_obj(replies
, ('member', 'name')),
2051 'author_id': traverse_obj(replies
, ('member', 'mid')),
2052 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2053 'text': traverse_obj(replies
, ('content', 'message')),
2054 'id': replies
.get('rpid'),
2055 'like_count': int_or_none(replies
.get('like_count')),
2056 'timestamp': unified_timestamp(replies
.get('ctime_text')),
2057 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
2059 if replies
.get('count'):
2060 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
2062 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2065 def _real_extract(self
, url
):
2066 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
2067 video_id
= ep_id
or aid
2071 intro_ending_json
= self
._call
_api
(
2072 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2073 video_id
, fatal
=False) or {}
2074 if intro_ending_json
.get('skip'):
2075 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2076 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2078 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
2079 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
2082 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
2083 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
2089 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
2090 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
2091 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
2092 'chapters': chapters
,
2093 '__post_extractor': self
.extract_comments(video_id
, ep_id
),
2094 'http_headers': self
._HEADERS
,
2098 class BiliIntlSeriesIE(BiliIntlBaseIE
):
2099 IE_NAME
= 'biliIntl:series'
2100 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2102 'url': 'https://www.bilibili.tv/en/play/34613',
2103 'playlist_mincount': 15,
2106 'title': 'TONIKAWA: Over the Moon For You',
2107 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2108 'categories': ['Slice of life', 'Comedy', 'Romance'],
2109 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2113 'skip_download': True,
2116 'url': 'https://www.bilibili.tv/en/media/1048837',
2119 'title': 'SPY×FAMILY',
2120 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2121 'categories': ['Adventure', 'Action', 'Comedy'],
2122 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2125 'playlist_mincount': 25,
2127 'url': 'https://www.biliintl.com/en/play/34613',
2128 'only_matching': True,
2130 'url': 'https://www.biliintl.com/EN/play/34613',
2131 'only_matching': True,
2134 def _entries(self
, series_id
):
2135 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
2136 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
2137 episode_id
= str(episode
['episode_id'])
2138 yield self
.url_result(smuggle_url(
2139 BiliIntlIE
._make
_url
(episode_id
, series_id
),
2140 self
._parse
_video
_metadata
(episode
)
2141 ), BiliIntlIE
, episode_id
)
2143 def _real_extract(self
, url
):
2144 series_id
= self
._match
_id
(url
)
2145 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
2146 return self
.playlist_result(
2147 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
2148 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
2149 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
2152 class BiliLiveIE(InfoExtractor
):
2153 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2156 'url': 'https://live.bilibili.com/196',
2159 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2161 'title': "太空狼人杀联动,不被爆杀就算赢",
2162 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2163 'timestamp': 1650802769,
2167 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2168 'only_matching': True
2170 'url': 'https://live.bilibili.com/blanc/196',
2171 'only_matching': True
2175 80: {'format_id': 'low', 'format_note': '流畅'}
,
2176 150: {'format_id': 'high_res', 'format_note': '高清'}
,
2177 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
2178 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
2179 10000: {'format_id': 'source', 'format_note': '原画'}
,
2180 20000: {'format_id': '4K', 'format_note': '4K'}
,
2181 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
2184 _quality
= staticmethod(qualities(list(_FORMATS
)))
2186 def _call_api(self
, path
, room_id
, query
):
2187 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
2188 if api_result
.get('code') != 0:
2189 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
2190 return api_result
.get('data') or {}
2192 def _parse_formats(self
, qn
, fmt
):
2193 for codec
in fmt
.get('codec') or []:
2194 if codec
.get('current_qn') != qn
:
2196 for url_info
in codec
['url_info']:
2198 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2199 'ext': fmt
.get('format_name'),
2200 'vcodec': codec
.get('codec_name'),
2201 'quality': self
._quality
(qn
),
2202 **self
._FORMATS
[qn
],
2205 def _real_extract(self
, url
):
2206 room_id
= self
._match
_id
(url
)
2207 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
2208 if room_data
.get('live_status') == 0:
2209 raise ExtractorError('Streamer is not live', expected
=True)
2212 for qn
in self
._FORMATS
.keys():
2213 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
2223 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2224 formats
.extend(self
._parse
_formats
(qn
, fmt
))
2228 'title': room_data
.get('title'),
2229 'description': room_data
.get('description'),
2230 'thumbnail': room_data
.get('user_cover'),
2231 'timestamp': stream_data
.get('live_time'),