11 from .common
import InfoExtractor
, SearchInfoExtractor
12 from ..dependencies
import Cryptodome
13 from ..networking
.exceptions
import HTTPError
34 srt_subtitles_timecode
,
46 class BilibiliBaseIE(InfoExtractor
):
47 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
49 def extract_formats(self
, play_info
):
51 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
52 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
55 audios
= traverse_obj(play_info
, ('dash', (None, 'dolby'), 'audio', ..., {dict}
))
56 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
58 audios
.append(flac_audio
)
60 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
61 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
62 'acodec': traverse_obj(audio
, ('codecs', {str.lower}
)),
64 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
65 'filesize': int_or_none(audio
.get('size')),
66 'format_id': str_or_none(audio
.get('id')),
67 } for audio
in audios
]
70 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
71 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
72 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
73 'width': int_or_none(video
.get('width')),
74 'height': int_or_none(video
.get('height')),
75 'vcodec': video
.get('codecs'),
76 'acodec': 'none' if audios
else None,
77 'dynamic_range': {126: 'DV', 125: 'HDR10'}
.get(int_or_none(video
.get('id'))),
78 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
79 'filesize': int_or_none(video
.get('size')),
80 'quality': int_or_none(video
.get('id')),
81 'format_id': traverse_obj(
82 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
83 ('id', {str_or_none}
), get_all
=False),
84 'format': format_names
.get(video
.get('id')),
85 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
87 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
89 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
90 f
'you have to login or become premium member to download them. {self._login_hint()}')
94 def _download_playinfo(self
, video_id
, cid
):
95 return self
._download
_json
(
96 'https://api.bilibili.com/x/player/playurl', video_id
,
97 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
98 note
=f
'Downloading video formats for cid {cid}')['data']
100 def json2srt(self
, json_data
):
102 for idx
, line
in enumerate(json_data
.get('body') or []):
103 srt_data
+= (f
'{idx + 1}\n'
104 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
105 f
'{line["content"]}\n\n')
108 def _get_subtitles(self
, video_id
, cid
, aid
=None):
112 'url': f
'https://comment.bilibili.com/{cid}.xml',
116 subtitle_info
= traverse_obj(self
._download
_json
(
117 'https://api.bilibili.com/x/player/v2', video_id
,
118 query
={'aid': aid, 'cid': cid}
if aid
else {'bvid': video_id, 'cid': cid}
,
119 note
=f
'Extracting subtitle info {cid}'), ('data', 'subtitle'))
120 subs_list
= traverse_obj(subtitle_info
, ('subtitles', lambda _
, v
: v
['subtitle_url'] and v
['lan']))
121 if not subs_list
and traverse_obj(subtitle_info
, 'allow_submit'):
122 if not self
._get
_cookies
('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
123 self
.report_warning(f
'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once
=True)
125 subtitles
.setdefault(s
['lan'], []).append({
127 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
))
131 def _get_chapters(self
, aid
, cid
):
132 chapters
= aid
and cid
and self
._download
_json
(
133 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
134 note
='Extracting chapters', fatal
=False)
135 return traverse_obj(chapters
, ('data', 'view_points', ..., {
137 'start_time': 'from',
141 def _get_comments(self
, aid
):
142 for idx
in itertools
.count(1):
143 replies
= traverse_obj(
145 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
146 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
150 for children
in map(self
._get
_all
_children
, replies
):
153 def _get_all_children(self
, reply
):
155 'author': traverse_obj(reply
, ('member', 'uname')),
156 'author_id': traverse_obj(reply
, ('member', 'mid')),
157 'id': reply
.get('rpid'),
158 'text': traverse_obj(reply
, ('content', 'message')),
159 'timestamp': reply
.get('ctime'),
160 'parent': reply
.get('parent') or 'root',
162 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
165 def _get_episodes_from_season(self
, ss_id
, url
):
166 season_info
= self
._download
_json
(
167 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
168 note
='Downloading season info', query
={'season_id': ss_id}
,
169 headers
={'Referer': url, **self.geo_verification_headers()}
)
171 for entry
in traverse_obj(season_info
, (
172 'result', 'main_section', 'episodes',
173 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
174 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, str_or_none(entry
.get('id')))
176 def _get_divisions(self
, video_id
, graph_version
, edges
, edge_id
, cid_edges
=None):
177 cid_edges
= cid_edges
or {}
178 division_data
= self
._download
_json
(
179 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id
,
180 query
={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id}
,
181 note
=f
'Extracting divisions from edge {edge_id}')
182 edges
.setdefault(edge_id
, {}).update(
183 traverse_obj(division_data
, ('data', 'story_list', lambda _
, v
: v
['edge_id'] == edge_id
, {
184 'title': ('title', {str}
),
185 'cid': ('cid', {int_or_none}
),
188 edges
[edge_id
].update(traverse_obj(division_data
, ('data', {
189 'title': ('title', {str}
),
190 'choices': ('edges', 'questions', ..., 'choices', ..., {
191 'edge_id': ('id', {int_or_none}
),
192 'cid': ('cid', {int_or_none}
),
193 'text': ('option', {str}
),
196 # use dict to combine edges that use the same video section (same cid)
197 cid_edges
.setdefault(edges
[edge_id
]['cid'], {})[edge_id
] = edges
[edge_id
]
198 for choice
in traverse_obj(edges
, (edge_id
, 'choices', ...)):
199 if choice
['edge_id'] not in edges
:
200 edges
[choice
['edge_id']] = {'cid': choice['cid']}
201 self
._get
_divisions
(video_id
, graph_version
, edges
, choice
['edge_id'], cid_edges
=cid_edges
)
204 def _get_interactive_entries(self
, video_id
, cid
, metainfo
):
205 graph_version
= traverse_obj(
207 'https://api.bilibili.com/x/player/wbi/v2', video_id
,
208 'Extracting graph version', query
={'bvid': video_id, 'cid': cid}
),
209 ('data', 'interaction', 'graph_version', {int_or_none}
))
210 cid_edges
= self
._get
_divisions
(video_id
, graph_version
, {1: {'cid': cid}
}, 1)
211 for cid
, edges
in cid_edges
.items():
212 play_info
= self
._download
_playinfo
(video_id
, cid
)
215 'id': f
'{video_id}_{cid}',
216 'title': f
'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
217 'formats': self
.extract_formats(play_info
),
218 'description': f
'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
219 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
220 'subtitles': self
.extract_subtitles(video_id
, cid
),
224 class BiliBiliIE(BilibiliBaseIE
):
225 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
228 'url': 'https://www.bilibili.com/video/BV13x41117TL',
230 'id': 'BV13x41117TL',
231 'title': '阿滴英文|英文歌分享#6 "Closer',
233 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
234 'uploader_id': '65880958',
236 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
239 'comment_count': int,
240 'upload_date': '20170301',
241 'timestamp': 1488353834,
246 'note': 'old av URL version',
247 'url': 'http://www.bilibili.com/video/av1074402/',
249 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
252 'uploader_id': '156160',
253 'id': 'BV11x411K7CN',
256 'upload_date': '20140420',
257 'timestamp': 1397983878,
258 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
260 'comment_count': int,
264 'params': {'skip_download': True}
,
267 'url': 'https://www.bilibili.com/video/BV1bK411W797',
269 'id': 'BV1bK411W797',
270 'title': '物语中的人物是如何吐槽自己的OP的'
272 'playlist_count': 18,
275 'id': 'BV1bK411W797_p1',
277 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
279 'timestamp': 1589601697,
280 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
281 'uploader': '打牌还是打桩',
282 'uploader_id': '150259984',
284 'comment_count': int,
285 'upload_date': '20200516',
287 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
292 'note': 'Specific page of Anthology',
293 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
295 'id': 'BV1bK411W797_p1',
297 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
299 'timestamp': 1589601697,
300 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
301 'uploader': '打牌还是打桩',
302 'uploader_id': '150259984',
304 'comment_count': int,
305 'upload_date': '20200516',
307 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
311 'note': 'video has subtitles',
312 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
314 'id': 'BV12N4y1M7rh',
316 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
318 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
320 'upload_date': '20220709',
322 'timestamp': 1657347907,
323 'uploader_id': '1326814124',
324 'comment_count': int,
327 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
328 'subtitles': 'count:2'
330 'params': {'listsubtitles': True}
,
332 'url': 'https://www.bilibili.com/video/av8903802/',
334 'id': 'BV13x41117TL',
336 'title': '阿滴英文|英文歌分享#6 "Closer',
337 'upload_date': '20170301',
338 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
339 'timestamp': 1488353834,
340 'uploader_id': '65880958',
342 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
345 'comment_count': int,
350 'skip_download': True,
353 'note': 'video has chapter',
354 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
356 'id': 'BV1vL411G7N7',
358 'title': '如何为你的B站视频添加进度条分段',
359 'timestamp': 1634554558,
360 'upload_date': '20211018',
361 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
363 'uploader': '爱喝咖啡的当麻',
365 'uploader_id': '1680903',
366 'chapters': 'count:6',
367 'comment_count': int,
370 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
372 'params': {'skip_download': True}
,
374 'note': 'video redirects to festival page',
375 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
377 'id': 'BV1wP4y1P72h',
379 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
380 'timestamp': 1643947497,
381 'upload_date': '20220204',
382 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
383 'uploader': '叨叨冯聊音乐',
385 'uploader_id': '528182630',
388 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
390 'params': {'skip_download': True}
,
392 'note': 'newer festival video',
393 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
395 'id': 'BV1ay4y1d77f',
397 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
398 'timestamp': 1674273600,
399 'upload_date': '20230121',
400 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
402 'duration': 1111.722,
403 'uploader_id': '8469526',
406 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
408 'params': {'skip_download': True}
,
410 'note': 'interactive/split-path video',
411 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
413 'id': 'BV1af4y1H7ga',
414 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
415 'timestamp': 1630500414,
416 'upload_date': '20210901',
417 'description': 'md5:01113e39ab06e28042d74ac356a08786',
419 'uploader': '钉宫妮妮Ninico',
421 'uploader_id': '8881297',
422 'comment_count': int,
425 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
427 'playlist_count': 33,
430 'id': 'BV1af4y1H7ga_400950101',
432 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
433 'timestamp': 1630500414,
434 'upload_date': '20210901',
435 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
437 'uploader': '钉宫妮妮Ninico',
439 'uploader_id': '8881297',
440 'comment_count': int,
443 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
447 'note': '301 redirect to bangumi link',
448 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
451 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
456 'season_id': '28609',
458 'episode': '钱学森弹道和乘波体飞行器是什么?',
459 'episode_id': '288525',
460 'episode_number': 105,
461 'duration': 1183.957,
462 'timestamp': 1571648124,
463 'upload_date': '20191021',
464 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
467 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
469 'id': 'BV1jL41167ZG',
470 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
473 'skip': 'supporter-only video',
475 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
477 'id': 'BV1Ks411f7aQ',
478 'title': '【BD1080P】狼与香辛料I【华盟】',
481 'skip': 'login required',
483 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
485 'id': 'BV1GJ411x7h7',
486 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
489 'skip': 'geo-restricted',
492 def _real_extract(self
, url
):
493 video_id
= self
._match
_id
(url
)
494 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
)
495 if not self
._match
_valid
_url
(urlh
.url
):
496 return self
.url_result(urlh
.url
)
498 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
500 is_festival
= 'videoData' not in initial_state
502 video_data
= initial_state
['videoInfo']
504 play_info_obj
= self
._search
_json
(
505 r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
, fatal
=False)
506 if not play_info_obj
:
507 if traverse_obj(initial_state
, ('error', 'trueCode')) == -403:
508 self
.raise_login_required()
509 if traverse_obj(initial_state
, ('error', 'trueCode')) == -404:
510 raise ExtractorError(
511 'This video may be deleted or geo-restricted. '
512 'You might want to try a VPN or a proxy server (with --proxy)', expected
=True)
513 play_info
= traverse_obj(play_info_obj
, ('data', {dict}
))
515 if traverse_obj(play_info_obj
, 'code') == 87007:
516 toast
= get_element_by_class('tips-toast', webpage
) or ''
518 f
'{get_element_by_class("belongs-to", toast) or ""},'
519 + (get_element_by_class('level', toast
) or ''))
520 raise ExtractorError(
521 f
'This is a supporter-only video: {msg}. {self._login_hint()}', expected
=True)
522 raise ExtractorError('Failed to extract play info')
523 video_data
= initial_state
['videoData']
525 video_id
, title
= video_data
['bvid'], video_data
.get('title')
527 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
528 page_list_json
= not is_festival
and traverse_obj(
530 'https://api.bilibili.com/x/player/pagelist', video_id
,
531 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
532 note
='Extracting videos in anthology'),
533 'data', expected_type
=list) or []
534 is_anthology
= len(page_list_json
) > 1
536 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
537 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
538 return self
.playlist_from_matches(
539 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
540 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
543 part_id
= part_id
or 1
544 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
546 aid
= video_data
.get('aid')
547 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
549 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
553 play_info
= self
._download
_playinfo
(video_id
, cid
)
555 festival_info
= traverse_obj(initial_state
, {
556 'uploader': ('videoInfo', 'upName'),
557 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
558 'like_count': ('videoStatus', 'like', {int_or_none}
),
559 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
563 **traverse_obj(initial_state
, {
564 'uploader': ('upData', 'name'),
565 'uploader_id': ('upData', 'mid', {str_or_none}
),
566 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
567 'tags': ('tags', ..., 'tag_name'),
568 'thumbnail': ('videoData', 'pic', {url_or_none}
),
571 **traverse_obj(video_data
, {
572 'description': 'desc',
573 'timestamp': ('pubdate', {int_or_none}
),
574 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
575 'comment_count': ('stat', 'reply', {int_or_none}
),
577 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
578 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
580 'http_headers': {'Referer': url}
,
583 is_interactive
= traverse_obj(video_data
, ('rights', 'is_stein_gate'))
585 return self
.playlist_result(
586 self
._get
_interactive
_entries
(video_id
, cid
, metainfo
), **metainfo
, **{
587 'duration': traverse_obj(initial_state
, ('videoData', 'duration', {int_or_none}
)),
588 '__post_extractor': self
.extract_comments(aid
),
593 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
594 'chapters': self
._get
_chapters
(aid
, cid
),
595 'subtitles': self
.extract_subtitles(video_id
, cid
),
596 'formats': self
.extract_formats(play_info
),
597 '__post_extractor': self
.extract_comments(aid
),
601 class BiliBiliBangumiIE(BilibiliBaseIE
):
602 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
605 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
614 'episode': 'forever/ef',
615 'episode_id': '21495',
616 'episode_number': 12,
617 'title': '12 forever/ef',
618 'duration': 1420.791,
619 'timestamp': 1320412200,
620 'upload_date': '20111104',
621 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
624 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
631 'season_id': '26801',
634 'episode_id': '267851',
637 'duration': 1425.256,
638 'timestamp': 1554566400,
639 'upload_date': '20190406',
640 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
642 'skip': 'Geo-restricted',
644 'note': 'a making-of which falls outside main section',
645 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
652 'season_id': '26801',
655 'episode_id': '345120',
656 'episode_number': 27,
658 'duration': 1922.129,
659 'timestamp': 1602853860,
660 'upload_date': '20201016',
661 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
665 def _real_extract(self
, url
):
666 episode_id
= self
._match
_id
(url
)
667 webpage
= self
._download
_webpage
(url
, episode_id
)
669 if '您所在的地区无法观看本片' in webpage
:
670 raise GeoRestrictedError('This video is restricted')
671 elif '正在观看预览,大会员免费看全片' in webpage
:
672 self
.raise_login_required('This video is for premium members only')
674 headers
= {'Referer': url, **self.geo_verification_headers()}
675 play_info
= self
._download
_json
(
676 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id
,
677 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
679 premium_only
= play_info
.get('code') == -10403
680 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
682 formats
= self
.extract_formats(play_info
)
683 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
684 self
.raise_login_required('This video is for premium members only')
686 bangumi_info
= self
._download
_json
(
687 'https://api.bilibili.com/pgc/view/web/season', episode_id
, 'Get episode details',
688 query
={'ep_id': episode_id}
, headers
=headers
)['result']
690 episode_number
, episode_info
= next((
691 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
692 bangumi_info
, (('episodes', ('section', ..., 'episodes')), ..., {dict}
)), 1)
693 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
695 season_id
= bangumi_info
.get('season_id')
696 season_number
, season_title
= season_id
and next((
697 (idx
+ 1, e
.get('season_title')) for idx
, e
in enumerate(
698 traverse_obj(bangumi_info
, ('seasons', ...)))
699 if e
.get('season_id') == season_id
702 aid
= episode_info
.get('aid')
707 **traverse_obj(bangumi_info
, {
708 'series': ('series', 'series_title', {str}
),
709 'series_id': ('series', 'series_id', {str_or_none}
),
710 'thumbnail': ('square_cover', {url_or_none}
),
712 **traverse_obj(episode_info
, {
713 'episode': ('long_title', {str}
),
714 'episode_number': ('title', {int_or_none}
, {lambda x: x or episode_number}
),
715 'timestamp': ('pub_time', {int_or_none}
),
716 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)}
,
718 'episode_id': episode_id
,
719 'season': str_or_none(season_title
),
720 'season_id': str_or_none(season_id
),
721 'season_number': season_number
,
722 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
723 'subtitles': self
.extract_subtitles(episode_id
, episode_info
.get('cid'), aid
=aid
),
724 '__post_extractor': self
.extract_comments(aid
),
725 'http_headers': headers
,
729 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
730 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
732 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
735 'title': 'CAROLE & TUESDAY',
736 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
738 'playlist_mincount': 25,
740 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
743 'title': '攻壳机动队 S.A.C. 2nd GIG',
744 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
746 'playlist_count': 26,
756 'episode': '再启动 REEMBODY',
757 'episode_id': '68540',
759 'title': '1 再启动 REEMBODY',
760 'duration': 1525.777,
761 'timestamp': 1425074413,
762 'upload_date': '20150227',
763 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
768 def _real_extract(self
, url
):
769 media_id
= self
._match
_id
(url
)
770 webpage
= self
._download
_webpage
(url
, media_id
)
772 initial_state
= self
._search
_json
(
773 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)
774 ss_id
= initial_state
['mediaInfo']['season_id']
776 return self
.playlist_result(
777 self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
,
778 **traverse_obj(initial_state
, ('mediaInfo', {
779 'title': ('title', {str}
),
780 'description': ('evaluate', {str}
),
784 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
785 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
787 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
791 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
793 'playlist_mincount': 26
795 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
799 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
801 'playlist_count': 13,
812 'episode_id': '50188',
815 'duration': 1436.992,
816 'timestamp': 1343185080,
817 'upload_date': '20120725',
818 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
823 def _real_extract(self
, url
):
824 ss_id
= self
._match
_id
(url
)
825 webpage
= self
._download
_webpage
(url
, ss_id
)
826 metainfo
= traverse_obj(
827 self
._search
_json
(r
'<script[^>]+type="application/ld\+json"[^>]*>', webpage
, 'info', ss_id
),
828 ('itemListElement', ..., {
829 'title': ('name', {str}
),
830 'description': ('description', {str}
),
833 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
, **metainfo
)
836 class BilibiliCheeseBaseIE(BilibiliBaseIE
):
837 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
839 def _extract_episode(self
, season_info
, ep_id
):
840 episode_info
= traverse_obj(season_info
, (
841 'episodes', lambda _
, v
: v
['id'] == int(ep_id
)), get_all
=False)
842 aid
, cid
= episode_info
['aid'], episode_info
['cid']
844 if traverse_obj(episode_info
, 'ep_status') == -1:
845 raise ExtractorError('This course episode is not yet available.', expected
=True)
846 if not traverse_obj(episode_info
, 'playable'):
847 self
.raise_login_required('You need to purchase the course to download this episode')
849 play_info
= self
._download
_json
(
850 'https://api.bilibili.com/pugv/player/web/playurl', ep_id
,
851 query
={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}
,
852 headers
=self
._HEADERS
, note
='Downloading playinfo')['data']
855 'id': str_or_none(ep_id
),
856 'episode_id': str_or_none(ep_id
),
857 'formats': self
.extract_formats(play_info
),
858 'extractor_key': BilibiliCheeseIE
.ie_key(),
859 'extractor': BilibiliCheeseIE
.IE_NAME
,
860 'webpage_url': f
'https://www.bilibili.com/cheese/play/ep{ep_id}',
861 **traverse_obj(episode_info
, {
862 'episode': ('title', {str}
),
863 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}
,
864 'alt_title': ('subtitle', {str}
),
865 'duration': ('duration', {int_or_none}
),
866 'episode_number': ('index', {int_or_none}
),
867 'thumbnail': ('cover', {url_or_none}
),
868 'timestamp': ('release_date', {int_or_none}
),
869 'view_count': ('play', {int_or_none}
),
871 **traverse_obj(season_info
, {
872 'uploader': ('up_info', 'uname', {str}
),
873 'uploader_id': ('up_info', 'mid', {str_or_none}
),
875 'subtitles': self
.extract_subtitles(ep_id
, cid
, aid
=aid
),
876 '__post_extractor': self
.extract_comments(aid
),
877 'http_headers': self
._HEADERS
,
880 def _download_season_info(self
, query_key
, video_id
):
881 return self
._download
_json
(
882 f
'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id
,
883 headers
=self
._HEADERS
, note
='Downloading season info')['data']
886 class BilibiliCheeseIE(BilibiliCheeseBaseIE
):
887 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
889 'url': 'https://www.bilibili.com/cheese/play/ep229832',
893 'title': '1 - 课程先导片',
894 'alt_title': '视频课 · 3分41秒',
896 'uploader_id': '316568752',
898 'episode_id': '229832',
901 'timestamp': 1695549606,
902 'upload_date': '20230924',
903 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
908 def _real_extract(self
, url
):
909 ep_id
= self
._match
_id
(url
)
910 return self
._extract
_episode
(self
._download
_season
_info
('ep_id', ep_id
), ep_id
)
913 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE
):
914 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
916 'url': 'https://www.bilibili.com/cheese/play/ss5918',
919 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
920 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
926 'title': '1 - 课程先导片',
927 'alt_title': '视频课 · 3分41秒',
929 'uploader_id': '316568752',
931 'episode_id': '229832',
934 'timestamp': 1695549606,
935 'upload_date': '20230924',
936 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
940 'params': {'playlist_items': '1'}
,
942 'url': 'https://www.bilibili.com/cheese/play/ss5918',
945 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
946 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
948 'playlist_mincount': 5,
949 'skip': 'paid video in list',
952 def _get_cheese_entries(self
, season_info
):
953 for ep_id
in traverse_obj(season_info
, ('episodes', lambda _
, v
: v
['episode_can_view'], 'id')):
954 yield self
._extract
_episode
(season_info
, ep_id
)
956 def _real_extract(self
, url
):
957 season_id
= self
._match
_id
(url
)
958 season_info
= self
._download
_season
_info
('season_id', season_id
)
960 return self
.playlist_result(
961 self
._get
_cheese
_entries
(season_info
), season_id
,
962 **traverse_obj(season_info
, {
963 'title': ('title', {str}
),
964 'description': ('subtitle', {str}
),
968 class BilibiliSpaceBaseIE(InfoExtractor
):
969 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
970 first_page
= fetch_page(0)
971 metadata
= get_metadata(first_page
)
973 paged_list
= InAdvancePagedList(
974 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
975 metadata
['page_count'], metadata
['page_size'])
977 return metadata
, paged_list
980 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
981 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
983 'url': 'https://space.bilibili.com/3985676/video',
987 'playlist_mincount': 178,
989 'url': 'https://space.bilibili.com/313580179/video',
993 'playlist_mincount': 92,
996 def _extract_signature(self
, playlist_id
):
997 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
999 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
1000 img_key
= traverse_obj(
1001 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
1002 sub_key
= traverse_obj(
1003 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
1005 session_key
= img_key
+ sub_key
1007 signature_values
= []
1009 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1010 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1011 57, 62, 11, 36, 20, 34, 44, 52
1013 char_at_position
= try_call(lambda: session_key
[position
])
1014 if char_at_position
:
1015 signature_values
.append(char_at_position
)
1017 return ''.join(signature_values
)[:32]
1019 def _real_extract(self
, url
):
1020 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
1021 if not is_video_url
:
1022 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1023 'To download audios, add a "/audio" to the URL')
1025 signature
= self
._extract
_signature
(playlist_id
)
1027 def fetch_page(page_idx
):
1032 'order_avoided': 'true',
1037 'web_location': 1550101,
1038 'wts': int(time
.time()),
1040 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1043 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
1044 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
)
1045 except ExtractorError
as e
:
1046 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
1047 raise ExtractorError(
1048 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
1050 if response
['code'] == -401:
1051 raise ExtractorError(
1052 'Request is blocked by server (401), please add cookies, wait and try later.', expected
=True)
1053 return response
['data']
1055 def get_metadata(page_data
):
1056 page_size
= page_data
['page']['ps']
1057 entry_count
= page_data
['page']['count']
1059 'page_count': math
.ceil(entry_count
/ page_size
),
1060 'page_size': page_size
,
1063 def get_entries(page_data
):
1064 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
1065 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
1067 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1068 return self
.playlist_result(paged_list
, playlist_id
)
1071 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
1072 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1074 'url': 'https://space.bilibili.com/313580179/audio',
1078 'playlist_mincount': 1,
1081 def _real_extract(self
, url
):
1082 playlist_id
= self
._match
_id
(url
)
1084 def fetch_page(page_idx
):
1085 return self
._download
_json
(
1086 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
1087 note
=f
'Downloading page {page_idx}',
1088 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
1090 def get_metadata(page_data
):
1092 'page_count': page_data
['pageCount'],
1093 'page_size': page_data
['pageSize'],
1096 def get_entries(page_data
):
1097 for entry
in page_data
.get('data', []):
1098 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
1100 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1101 return self
.playlist_result(paged_list
, playlist_id
)
1104 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
1105 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
1106 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
1107 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
1109 def _get_uploader(self
, uid
, playlist_id
):
1110 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
1111 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
1113 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
1114 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1115 metadata
.pop('page_count', None)
1116 metadata
.pop('page_size', None)
1117 return metadata
, page_list
1120 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
1121 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1123 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1125 'id': '2142762_57445',
1126 'title': '【完结】《底特律 变人》全结局流程解说',
1129 'uploader_id': '2142762',
1132 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1134 'playlist_mincount': 31,
1137 def _real_extract(self
, url
):
1138 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1139 playlist_id
= f
'{mid}_{sid}'
1141 def fetch_page(page_idx
):
1142 return self
._download
_json
(
1143 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1144 playlist_id
, note
=f
'Downloading page {page_idx}',
1145 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
1147 def get_metadata(page_data
):
1148 page_size
= page_data
['page']['page_size']
1149 entry_count
= page_data
['page']['total']
1151 'page_count': math
.ceil(entry_count
/ page_size
),
1152 'page_size': page_size
,
1153 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1154 **traverse_obj(page_data
, {
1155 'title': ('meta', 'name', {str}
),
1156 'description': ('meta', 'description', {str}
),
1157 'uploader_id': ('meta', 'mid', {str_or_none}
),
1158 'timestamp': ('meta', 'ptime', {int_or_none}
),
1159 'thumbnail': ('meta', 'cover', {url_or_none}
),
1163 def get_entries(page_data
):
1164 return self
._get
_entries
(page_data
, 'archives')
1166 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1167 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1170 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
1171 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1173 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1175 'id': '1958703906_547718',
1177 'description': '直播回放',
1178 'uploader': '靡烟miya',
1179 'uploader_id': '1958703906',
1180 'timestamp': 1637985853,
1181 'upload_date': '20211127',
1182 'modified_timestamp': int,
1183 'modified_date': str,
1185 'playlist_mincount': 513,
1188 def _real_extract(self
, url
):
1189 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1190 playlist_id
= f
'{mid}_{sid}'
1191 playlist_meta
= traverse_obj(self
._download
_json
(
1192 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False
1194 'title': ('data', 'meta', 'name', {str}
),
1195 'description': ('data', 'meta', 'description', {str}
),
1196 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
1197 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
1198 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
1201 def fetch_page(page_idx
):
1202 return self
._download
_json
(
1203 'https://api.bilibili.com/x/series/archives',
1204 playlist_id
, note
=f
'Downloading page {page_idx}',
1205 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
1207 def get_metadata(page_data
):
1208 page_size
= page_data
['page']['size']
1209 entry_count
= page_data
['page']['total']
1211 'page_count': math
.ceil(entry_count
/ page_size
),
1212 'page_size': page_size
,
1213 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1217 def get_entries(page_data
):
1218 return self
._get
_entries
(page_data
, 'archives')
1220 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1221 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1224 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
1225 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1227 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1233 'uploader_id': '84912',
1234 'timestamp': 1604905176,
1235 'upload_date': '20201109',
1236 'modified_timestamp': int,
1237 'modified_date': str,
1238 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1242 'playlist_mincount': 22,
1244 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1245 'only_matching': True,
1248 def _real_extract(self
, url
):
1249 fid
= self
._match
_id
(url
)
1251 list_info
= self
._download
_json
(
1252 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1253 fid
, note
='Downloading favlist metadata')
1254 if list_info
['code'] == -403:
1255 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
1257 entries
= self
._get
_entries
(self
._download
_json
(
1258 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1259 fid
, note
='Download favlist entries'), 'data')
1261 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
1262 'title': ('title', {str}
),
1263 'description': ('intro', {str}
),
1264 'uploader': ('upper', 'name', {str}
),
1265 'uploader_id': ('upper', 'mid', {str_or_none}
),
1266 'timestamp': ('ctime', {int_or_none}
),
1267 'modified_timestamp': ('mtime', {int_or_none}
),
1268 'thumbnail': ('cover', {url_or_none}
),
1269 'view_count': ('cnt_info', 'play', {int_or_none}
),
1270 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
1274 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
1275 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1277 'url': 'https://www.bilibili.com/watchlater/#/list',
1278 'info_dict': {'id': 'watchlater'}
,
1279 'playlist_mincount': 0,
1280 'skip': 'login required',
1283 def _real_extract(self
, url
):
1284 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
1285 watchlater_info
= self
._download
_json
(
1286 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
1287 if watchlater_info
['code'] == -101:
1288 self
.raise_login_required(msg
='You need to login to access your watchlater list')
1289 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
1290 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
1293 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
1294 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1296 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1300 'uploader': '靡烟miya',
1301 'uploader_id': '1958703906',
1302 'timestamp': 1637985853,
1303 'upload_date': '20211127',
1305 'playlist_mincount': 513,
1307 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1311 'playlist_mincount': 513,
1312 'skip': 'redirect url',
1314 'url': 'https://www.bilibili.com/list/ml1103407912',
1316 'id': '3_1103407912',
1319 'uploader_id': '84912',
1320 'timestamp': 1604905176,
1321 'upload_date': '20201109',
1322 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1324 'playlist_mincount': 22,
1326 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1328 'id': '3_1103407912',
1330 'playlist_mincount': 22,
1331 'skip': 'redirect url',
1333 'url': 'https://www.bilibili.com/list/watchlater',
1334 'info_dict': {'id': 'watchlater'}
,
1335 'playlist_mincount': 0,
1336 'skip': 'login required',
1338 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1339 'info_dict': {'id': 'watchlater'}
,
1340 'playlist_mincount': 0,
1341 'skip': 'login required',
1344 def _extract_medialist(self
, query
, list_id
):
1345 for page_num
in itertools
.count(1):
1346 page_data
= self
._download
_json
(
1347 'https://api.bilibili.com/x/v2/medialist/resource/list',
1348 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}'
1350 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
1351 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
1352 if not page_data
.get('has_more', False):
1355 def _real_extract(self
, url
):
1356 list_id
= self
._match
_id
(url
)
1357 webpage
= self
._download
_webpage
(url
, list_id
)
1358 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
1359 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
1360 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
1361 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
1362 if error_code
== -400 and list_id
== 'watchlater':
1363 self
.raise_login_required('You need to login to access your watchlater playlist')
1364 elif error_code
== -403:
1365 self
.raise_login_required('This is a private playlist. You need to login as its owner')
1366 elif error_code
== 11010:
1367 raise ExtractorError('Playlist is no longer available', expected
=True)
1368 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
1372 'with_current': False,
1373 **traverse_obj(initial_state
, {
1374 'type': ('playlist', 'type', {int_or_none}
),
1375 'biz_id': ('playlist', 'id', {int_or_none}
),
1376 'tid': ('tid', {int_or_none}
),
1377 'sort_field': ('sortFiled', {int_or_none}
),
1378 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
1382 'id': f
'{query["type"]}_{query["biz_id"]}',
1383 **traverse_obj(initial_state
, ('mediaListInfo', {
1384 'title': ('title', {str}
),
1385 'uploader': ('upper', 'name', {str}
),
1386 'uploader_id': ('upper', 'mid', {str_or_none}
),
1387 'timestamp': ('ctime', {int_or_none}
),
1388 'thumbnail': ('cover', {url_or_none}
),
1391 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
1394 class BilibiliCategoryIE(InfoExtractor
):
1395 IE_NAME
= 'Bilibili category extractor'
1396 _MAX_RESULTS
= 1000000
1397 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1399 'url': 'https://www.bilibili.com/v/kichiku/mad',
1401 'id': 'kichiku: mad',
1402 'title': 'kichiku: mad'
1404 'playlist_mincount': 45,
1410 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
1411 parsed_json
= self
._download
_json
(
1412 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
1413 note
='Extracting results from page %s of %s' % (page_num
, num_pages
))
1415 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
1417 raise ExtractorError('Failed to retrieve video list for page %d' % page_num
)
1419 for video
in video_list
:
1420 yield self
.url_result(
1421 'https://www.bilibili.com/video/%s' % video
['bvid'], 'BiliBili', video
['bvid'])
1423 def _entries(self
, category
, subcategory
, query
):
1424 # map of categories : subcategories : RIDs
1428 'manual_vocaloid': 126,
1435 if category
not in rid_map
:
1436 raise ExtractorError(
1437 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1438 if subcategory
not in rid_map
[category
]:
1439 raise ExtractorError(
1440 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1441 rid_value
= rid_map
[category
][subcategory
]
1443 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1444 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1445 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1446 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1447 if count
is None or not size
:
1448 raise ExtractorError('Failed to calculate either page count or size')
1450 num_pages
= math
.ceil(count
/ size
)
1452 return OnDemandPagedList(functools
.partial(
1453 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1455 def _real_extract(self
, url
):
1456 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1457 query
= '%s: %s' % (category
, subcategory
)
1459 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1462 class BiliBiliSearchIE(SearchInfoExtractor
):
1463 IE_DESC
= 'Bilibili video search'
1464 _MAX_RESULTS
= 100000
1465 _SEARCH_KEY
= 'bilisearch'
1467 def _search_results(self
, query
):
1468 for page_num
in itertools
.count(1):
1469 videos
= self
._download
_json
(
1470 'https://api.bilibili.com/x/web-interface/search/type', query
,
1471 note
=f
'Extracting results from page {page_num}', query
={
1472 'Search_key': query
,
1478 '__refresh__': 'true',
1479 'search_type': 'video',
1482 })['data'].get('result')
1485 for video
in videos
:
1486 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1489 class BilibiliAudioBaseIE(InfoExtractor
):
1490 def _call_api(self
, path
, sid
, query
=None):
1492 query
= {'sid': sid}
1493 return self
._download
_json
(
1494 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1495 sid
, query
=query
)['data']
1498 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1499 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1501 'url': 'https://www.bilibili.com/audio/au1003142',
1502 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1506 'title': '【tsukimi】YELLOW / 神山羊',
1507 'artist': 'tsukimi',
1508 'comment_count': int,
1509 'description': 'YELLOW的mp3版!',
1516 'thumbnail': r
're:^https?://.+\.jpg',
1517 'timestamp': 1564836614,
1518 'upload_date': '20190803',
1519 'uploader': 'tsukimi-つきみぐー',
1524 def _real_extract(self
, url
):
1525 au_id
= self
._match
_id
(url
)
1527 play_data
= self
._call
_api
('url', au_id
)
1529 'url': play_data
['cdns'][0],
1530 'filesize': int_or_none(play_data
.get('size')),
1534 for a_format
in formats
:
1535 a_format
.setdefault('http_headers', {}).update({
1539 song
= self
._call
_api
('song/info', au_id
)
1540 title
= song
['title']
1541 statistic
= song
.get('statistic') or {}
1544 lyric
= song
.get('lyric')
1556 'artist': song
.get('author'),
1557 'comment_count': int_or_none(statistic
.get('comment')),
1558 'description': song
.get('intro'),
1559 'duration': int_or_none(song
.get('duration')),
1560 'subtitles': subtitles
,
1561 'thumbnail': song
.get('cover'),
1562 'timestamp': int_or_none(song
.get('passtime')),
1563 'uploader': song
.get('uname'),
1564 'view_count': int_or_none(statistic
.get('play')),
1568 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1569 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1571 'url': 'https://www.bilibili.com/audio/am10624',
1574 'title': '每日新曲推荐(每日11:00更新)',
1575 'description': '每天11:00更新,为你推送最新音乐',
1577 'playlist_count': 19,
1580 def _real_extract(self
, url
):
1581 am_id
= self
._match
_id
(url
)
1583 songs
= self
._call
_api
(
1584 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1588 sid
= str_or_none(song
.get('id'))
1591 entries
.append(self
.url_result(
1592 'https://www.bilibili.com/audio/au' + sid
,
1593 BilibiliAudioIE
.ie_key(), sid
))
1596 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1597 album_title
= album_data
.get('title')
1599 for entry
in entries
:
1600 entry
['album'] = album_title
1601 return self
.playlist_result(
1602 entries
, am_id
, album_title
, album_data
.get('intro'))
1604 return self
.playlist_result(entries
, am_id
)
1607 class BiliBiliPlayerIE(InfoExtractor
):
1608 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1610 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1611 'only_matching': True,
1614 def _real_extract(self
, url
):
1615 video_id
= self
._match
_id
(url
)
1616 return self
.url_result(
1617 'http://www.bilibili.tv/video/av%s/' % video_id
,
1618 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1621 class BiliIntlBaseIE(InfoExtractor
):
1622 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1623 _NETRC_MACHINE
= 'biliintl'
1625 def _call_api(self
, endpoint
, *args
, **kwargs
):
1626 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1627 if json
.get('code'):
1628 if json
['code'] in (10004004, 10004005, 10023006):
1629 self
.raise_login_required()
1630 elif json
['code'] == 10004001:
1631 self
.raise_geo_restricted()
1633 if json
.get('message') and str(json
['code']) != json
['message']:
1634 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1636 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1637 if kwargs
.get('fatal'):
1638 raise ExtractorError(errmsg
)
1640 self
.report_warning(errmsg
)
1641 return json
.get('data')
1643 def json2srt(self
, json
):
1645 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1646 for i
, line
in enumerate(traverse_obj(json
, (
1647 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1650 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1651 sub_json
= self
._call
_api
(
1652 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1653 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1656 's_locale': 'en_US',
1657 'episode_id': ep_id
,
1661 for sub
in sub_json
.get('subtitles') or []:
1662 sub_url
= sub
.get('url')
1665 sub_data
= self
._download
_json
(
1666 sub_url
, ep_id
or aid
, errnote
='Unable to download subtitles', fatal
=False,
1667 note
='Downloading subtitles%s' % f
' for {sub["lang"]}' if sub
.get('lang') else '')
1670 subtitles
.setdefault(sub
.get('lang_key', 'en'), []).append({
1672 'data': self
.json2srt(sub_data
)
1676 def _get_formats(self
, *, ep_id
=None, aid
=None):
1677 video_json
= self
._call
_api
(
1678 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1679 errnote
='Unable to download video formats', query
=filter_dict({
1684 video_json
= video_json
['playurl']
1686 for vid
in video_json
.get('video') or []:
1687 video_res
= vid
.get('video_resource') or {}
1688 video_info
= vid
.get('stream_info') or {}
1689 if not video_res
.get('url'):
1692 'url': video_res
['url'],
1694 'format_note': video_info
.get('desc_words'),
1695 'width': video_res
.get('width'),
1696 'height': video_res
.get('height'),
1697 'vbr': video_res
.get('bandwidth'),
1699 'vcodec': video_res
.get('codecs'),
1700 'filesize': video_res
.get('size'),
1702 for aud
in video_json
.get('audio_resource') or []:
1703 if not aud
.get('url'):
1708 'abr': aud
.get('bandwidth'),
1709 'acodec': aud
.get('codecs'),
1711 'filesize': aud
.get('size'),
1716 def _parse_video_metadata(self
, video_data
):
1718 'title': video_data
.get('title_display') or video_data
.get('title'),
1719 'thumbnail': video_data
.get('cover'),
1720 'episode_number': int_or_none(self
._search
_regex
(
1721 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1724 def _perform_login(self
, username
, password
):
1725 if not Cryptodome
.RSA
:
1726 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1728 key_data
= self
._download
_json
(
1729 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1730 note
='Downloading login key', errnote
='Unable to download login key')['data']
1732 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1733 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode('utf-8'))
1734 login_post
= self
._download
_json
(
1735 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1736 'username': username
,
1737 'password': base64
.b64encode(password_hash
).decode('ascii'),
1739 's_locale': 'en_US',
1741 }), note
='Logging in', errnote
='Unable to log in')
1742 if login_post
.get('code'):
1743 if login_post
.get('message'):
1744 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1746 raise ExtractorError('Unable to log in')
1749 class BiliIntlIE(BiliIntlBaseIE
):
1750 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1753 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1757 'title': 'E2 - The First Night',
1758 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1759 'episode_number': 2,
1760 'upload_date': '20201009',
1761 'episode': 'Episode 2',
1762 'timestamp': 1602259500,
1763 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1767 'title': '<Untitled Chapter 1>'
1769 'start_time': 76.242,
1770 'end_time': 161.161,
1773 'start_time': 1325.742,
1774 'end_time': 1403.903,
1780 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1784 'title': 'E3 - Who?',
1785 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1786 'episode_number': 3,
1787 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1788 'episode': 'Episode 3',
1789 'upload_date': '20211219',
1790 'timestamp': 1639928700,
1794 'title': '<Untitled Chapter 1>'
1800 'start_time': 1173.0,
1801 'end_time': 1259.535,
1806 # Subtitle with empty content
1807 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1811 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1812 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1813 'episode_number': 140,
1815 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1817 'url': 'https://www.bilibili.tv/en/video/2041863208',
1821 'timestamp': 1670874843,
1822 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1823 'thumbnail': r
're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1824 'upload_date': '20221212',
1825 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1828 # episode comment extraction
1829 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1833 'timestamp': 1604057820,
1834 'upload_date': '20201030',
1835 'episode_number': 5,
1836 'title': 'E5 - My Own Steel',
1837 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1838 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1839 'episode': 'Episode 5',
1840 'comment_count': int,
1844 'title': '<Untitled Chapter 1>'
1850 'start_time': 1290.0,
1859 # user generated content comment extraction
1860 'url': 'https://www.bilibili.tv/en/video/2045730385',
1864 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1865 'timestamp': 1667891924,
1866 'upload_date': '20221108',
1867 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1868 'comment_count': int,
1869 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1875 # episode id without intro and outro
1876 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1880 'title': 'E1 - Operation \'Strix\' <Owl>',
1881 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1882 'timestamp': 1649516400,
1883 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1884 'episode': 'Episode 1',
1885 'episode_number': 1,
1886 'upload_date': '20220409',
1889 'url': 'https://www.biliintl.com/en/play/34613/341736',
1890 'only_matching': True,
1892 # User-generated content (as opposed to a series licensed from a studio)
1893 'url': 'https://bilibili.tv/en/video/2019955076',
1894 'only_matching': True,
1896 # No language in URL
1897 'url': 'https://www.bilibili.tv/video/2019955076',
1898 'only_matching': True,
1900 # Uppercase language in URL
1901 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1902 'only_matching': True,
1905 def _make_url(video_id
, series_id
=None):
1907 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1908 return f
'https://www.bilibili.tv/en/video/{video_id}'
1910 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1911 url
, smuggled_data
= unsmuggle_url(url
, {})
1912 if smuggled_data
.get('title'):
1913 return smuggled_data
1915 webpage
= self
._download
_webpage
(url
, video_id
)
1918 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1919 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1920 video_data
= traverse_obj(
1921 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1923 if season_id
and not video_data
:
1924 # Non-Bstation layout, read through episode list
1925 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1926 video_data
= traverse_obj(season_json
, (
1927 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
1928 ), expected_type
=dict, get_all
=False)
1930 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1932 self
._parse
_video
_metadata
(video_data
), self
._search
_json
_ld
(webpage
, video_id
, fatal
=False), {
1933 'title': self
._html
_search
_meta
('og:title', webpage
),
1934 'description': self
._html
_search
_meta
('og:description', webpage
)
1937 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
1938 comment_api_raw_data
= self
._download
_json
(
1939 'https://api.bilibili.tv/reply/web/detail', display_id
,
1940 note
=f
'Downloading reply comment of {root_id} - {next_id}',
1943 'ps': 20, # comment's reply per page (default: 3)
1948 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1950 'author': traverse_obj(replies
, ('member', 'name')),
1951 'author_id': traverse_obj(replies
, ('member', 'mid')),
1952 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1953 'text': traverse_obj(replies
, ('content', 'message')),
1954 'id': replies
.get('rpid'),
1955 'like_count': int_or_none(replies
.get('like_count')),
1956 'parent': replies
.get('parent'),
1957 'timestamp': unified_timestamp(replies
.get('ctime_text'))
1960 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1961 yield from self
._get
_comments
_reply
(
1962 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
1964 def _get_comments(self
, video_id
, ep_id
):
1965 for i
in itertools
.count(0):
1966 comment_api_raw_data
= self
._download
_json
(
1967 'https://api.bilibili.tv/reply/web/root', video_id
,
1968 note
=f
'Downloading comment page {i + 1}',
1971 'pn': i
, # page number
1972 'ps': 20, # comment per page (default: 20)
1974 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
1975 'sort_type': 1, # 1: best, 2: recent
1978 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1980 'author': traverse_obj(replies
, ('member', 'name')),
1981 'author_id': traverse_obj(replies
, ('member', 'mid')),
1982 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1983 'text': traverse_obj(replies
, ('content', 'message')),
1984 'id': replies
.get('rpid'),
1985 'like_count': int_or_none(replies
.get('like_count')),
1986 'timestamp': unified_timestamp(replies
.get('ctime_text')),
1987 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
1989 if replies
.get('count'):
1990 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
1992 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1995 def _real_extract(self
, url
):
1996 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
1997 video_id
= ep_id
or aid
2001 intro_ending_json
= self
._call
_api
(
2002 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2003 video_id
, fatal
=False) or {}
2004 if intro_ending_json
.get('skip'):
2005 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2006 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2008 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
2009 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
2012 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
2013 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
2019 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
2020 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
2021 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
2022 'chapters': chapters
,
2023 '__post_extractor': self
.extract_comments(video_id
, ep_id
)
2027 class BiliIntlSeriesIE(BiliIntlBaseIE
):
2028 IE_NAME
= 'biliIntl:series'
2029 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2031 'url': 'https://www.bilibili.tv/en/play/34613',
2032 'playlist_mincount': 15,
2035 'title': 'TONIKAWA: Over the Moon For You',
2036 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2037 'categories': ['Slice of life', 'Comedy', 'Romance'],
2038 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2042 'skip_download': True,
2045 'url': 'https://www.bilibili.tv/en/media/1048837',
2048 'title': 'SPY×FAMILY',
2049 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2050 'categories': ['Adventure', 'Action', 'Comedy'],
2051 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2054 'playlist_mincount': 25,
2056 'url': 'https://www.biliintl.com/en/play/34613',
2057 'only_matching': True,
2059 'url': 'https://www.biliintl.com/EN/play/34613',
2060 'only_matching': True,
2063 def _entries(self
, series_id
):
2064 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
2065 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
2066 episode_id
= str(episode
['episode_id'])
2067 yield self
.url_result(smuggle_url(
2068 BiliIntlIE
._make
_url
(episode_id
, series_id
),
2069 self
._parse
_video
_metadata
(episode
)
2070 ), BiliIntlIE
, episode_id
)
2072 def _real_extract(self
, url
):
2073 series_id
= self
._match
_id
(url
)
2074 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
2075 return self
.playlist_result(
2076 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
2077 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
2078 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
2081 class BiliLiveIE(InfoExtractor
):
2082 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2085 'url': 'https://live.bilibili.com/196',
2088 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2090 'title': "太空狼人杀联动,不被爆杀就算赢",
2091 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2092 'timestamp': 1650802769,
2096 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2097 'only_matching': True
2099 'url': 'https://live.bilibili.com/blanc/196',
2100 'only_matching': True
2104 80: {'format_id': 'low', 'format_note': '流畅'}
,
2105 150: {'format_id': 'high_res', 'format_note': '高清'}
,
2106 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
2107 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
2108 10000: {'format_id': 'source', 'format_note': '原画'}
,
2109 20000: {'format_id': '4K', 'format_note': '4K'}
,
2110 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
2113 _quality
= staticmethod(qualities(list(_FORMATS
)))
2115 def _call_api(self
, path
, room_id
, query
):
2116 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
2117 if api_result
.get('code') != 0:
2118 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
2119 return api_result
.get('data') or {}
2121 def _parse_formats(self
, qn
, fmt
):
2122 for codec
in fmt
.get('codec') or []:
2123 if codec
.get('current_qn') != qn
:
2125 for url_info
in codec
['url_info']:
2127 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2128 'ext': fmt
.get('format_name'),
2129 'vcodec': codec
.get('codec_name'),
2130 'quality': self
._quality
(qn
),
2131 **self
._FORMATS
[qn
],
2134 def _real_extract(self
, url
):
2135 room_id
= self
._match
_id
(url
)
2136 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
2137 if room_data
.get('live_status') == 0:
2138 raise ExtractorError('Streamer is not live', expected
=True)
2141 for qn
in self
._FORMATS
.keys():
2142 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
2152 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2153 formats
.extend(self
._parse
_formats
(qn
, fmt
))
2157 'title': room_data
.get('title'),
2158 'description': room_data
.get('description'),
2159 'thumbnail': room_data
.get('user_cover'),
2160 'timestamp': stream_data
.get('live_time'),