12 from .common
import InfoExtractor
, SearchInfoExtractor
13 from ..dependencies
import Cryptodome
14 from ..networking
.exceptions
import HTTPError
36 srt_subtitles_timecode
,
48 class BilibiliBaseIE(InfoExtractor
):
49 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
51 def extract_formats(self
, play_info
):
53 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
54 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
57 audios
= traverse_obj(play_info
, ('dash', (None, 'dolby'), 'audio', ..., {dict}
))
58 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
60 audios
.append(flac_audio
)
62 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
63 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
64 'acodec': traverse_obj(audio
, ('codecs', {str.lower}
)),
66 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
67 'filesize': int_or_none(audio
.get('size')),
68 'format_id': str_or_none(audio
.get('id')),
69 } for audio
in audios
]
72 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
73 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
74 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
75 'width': int_or_none(video
.get('width')),
76 'height': int_or_none(video
.get('height')),
77 'vcodec': video
.get('codecs'),
78 'acodec': 'none' if audios
else None,
79 'dynamic_range': {126: 'DV', 125: 'HDR10'}
.get(int_or_none(video
.get('id'))),
80 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
81 'filesize': int_or_none(video
.get('size')),
82 'quality': int_or_none(video
.get('id')),
83 'format_id': traverse_obj(
84 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
85 ('id', {str_or_none}
), get_all
=False),
86 'format': format_names
.get(video
.get('id')),
87 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
89 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
91 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
92 f
'you have to login or become premium member to download them. {self._login_hint()}')
96 def _download_playinfo(self
, video_id
, cid
):
97 return self
._download
_json
(
98 'https://api.bilibili.com/x/player/playurl', video_id
,
99 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
100 note
=f
'Downloading video formats for cid {cid}')['data']
102 def json2srt(self
, json_data
):
104 for idx
, line
in enumerate(json_data
.get('body') or []):
105 srt_data
+= (f
'{idx + 1}\n'
106 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
107 f
'{line["content"]}\n\n')
110 def _get_subtitles(self
, video_id
, cid
, aid
=None):
114 'url': f
'https://comment.bilibili.com/{cid}.xml',
118 subtitle_info
= traverse_obj(self
._download
_json
(
119 'https://api.bilibili.com/x/player/v2', video_id
,
120 query
={'aid': aid, 'cid': cid}
if aid
else {'bvid': video_id, 'cid': cid}
,
121 note
=f
'Extracting subtitle info {cid}'), ('data', 'subtitle'))
122 subs_list
= traverse_obj(subtitle_info
, ('subtitles', lambda _
, v
: v
['subtitle_url'] and v
['lan']))
123 if not subs_list
and traverse_obj(subtitle_info
, 'allow_submit'):
124 if not self
._get
_cookies
('https://api.bilibili.com').get('SESSDATA'): # no login session cookie
125 self
.report_warning(f
'CC subtitles (if any) are only visible when logged in. {self._login_hint()}', only_once
=True)
127 subtitles
.setdefault(s
['lan'], []).append({
129 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
))
133 def _get_chapters(self
, aid
, cid
):
134 chapters
= aid
and cid
and self
._download
_json
(
135 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
136 note
='Extracting chapters', fatal
=False)
137 return traverse_obj(chapters
, ('data', 'view_points', ..., {
139 'start_time': 'from',
143 def _get_comments(self
, aid
):
144 for idx
in itertools
.count(1):
145 replies
= traverse_obj(
147 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
148 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
152 for children
in map(self
._get
_all
_children
, replies
):
155 def _get_all_children(self
, reply
):
157 'author': traverse_obj(reply
, ('member', 'uname')),
158 'author_id': traverse_obj(reply
, ('member', 'mid')),
159 'id': reply
.get('rpid'),
160 'text': traverse_obj(reply
, ('content', 'message')),
161 'timestamp': reply
.get('ctime'),
162 'parent': reply
.get('parent') or 'root',
164 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
167 def _get_episodes_from_season(self
, ss_id
, url
):
168 season_info
= self
._download
_json
(
169 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
170 note
='Downloading season info', query
={'season_id': ss_id}
,
171 headers
={'Referer': url, **self.geo_verification_headers()}
)
173 for entry
in traverse_obj(season_info
, (
174 'result', 'main_section', 'episodes',
175 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
176 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, str_or_none(entry
.get('id')))
178 def _get_divisions(self
, video_id
, graph_version
, edges
, edge_id
, cid_edges
=None):
179 cid_edges
= cid_edges
or {}
180 division_data
= self
._download
_json
(
181 'https://api.bilibili.com/x/stein/edgeinfo_v2', video_id
,
182 query
={'graph_version': graph_version, 'edge_id': edge_id, 'bvid': video_id}
,
183 note
=f
'Extracting divisions from edge {edge_id}')
184 edges
.setdefault(edge_id
, {}).update(
185 traverse_obj(division_data
, ('data', 'story_list', lambda _
, v
: v
['edge_id'] == edge_id
, {
186 'title': ('title', {str}
),
187 'cid': ('cid', {int_or_none}
),
190 edges
[edge_id
].update(traverse_obj(division_data
, ('data', {
191 'title': ('title', {str}
),
192 'choices': ('edges', 'questions', ..., 'choices', ..., {
193 'edge_id': ('id', {int_or_none}
),
194 'cid': ('cid', {int_or_none}
),
195 'text': ('option', {str}
),
198 # use dict to combine edges that use the same video section (same cid)
199 cid_edges
.setdefault(edges
[edge_id
]['cid'], {})[edge_id
] = edges
[edge_id
]
200 for choice
in traverse_obj(edges
, (edge_id
, 'choices', ...)):
201 if choice
['edge_id'] not in edges
:
202 edges
[choice
['edge_id']] = {'cid': choice['cid']}
203 self
._get
_divisions
(video_id
, graph_version
, edges
, choice
['edge_id'], cid_edges
=cid_edges
)
206 def _get_interactive_entries(self
, video_id
, cid
, metainfo
):
207 graph_version
= traverse_obj(
209 'https://api.bilibili.com/x/player/wbi/v2', video_id
,
210 'Extracting graph version', query
={'bvid': video_id, 'cid': cid}
),
211 ('data', 'interaction', 'graph_version', {int_or_none}
))
212 cid_edges
= self
._get
_divisions
(video_id
, graph_version
, {1: {'cid': cid}
}, 1)
213 for cid
, edges
in cid_edges
.items():
214 play_info
= self
._download
_playinfo
(video_id
, cid
)
217 'id': f
'{video_id}_{cid}',
218 'title': f
'{metainfo.get("title")} - {list(edges.values())[0].get("title")}',
219 'formats': self
.extract_formats(play_info
),
220 'description': f
'{json.dumps(edges, ensure_ascii=False)}\n{metainfo.get("description", "")}',
221 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
222 'subtitles': self
.extract_subtitles(video_id
, cid
),
226 class BiliBiliIE(BilibiliBaseIE
):
227 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
230 'url': 'https://www.bilibili.com/video/BV13x41117TL',
232 'id': 'BV13x41117TL',
233 'title': '阿滴英文|英文歌分享#6 "Closer',
235 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
236 'uploader_id': '65880958',
238 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
241 'comment_count': int,
242 'upload_date': '20170301',
243 'timestamp': 1488353834,
248 'note': 'old av URL version',
249 'url': 'http://www.bilibili.com/video/av1074402/',
251 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
254 'uploader_id': '156160',
255 'id': 'BV11x411K7CN',
258 'upload_date': '20140420',
259 'timestamp': 1397983878,
260 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
262 'comment_count': int,
266 'params': {'skip_download': True}
,
269 'url': 'https://www.bilibili.com/video/BV1bK411W797',
271 'id': 'BV1bK411W797',
272 'title': '物语中的人物是如何吐槽自己的OP的'
274 'playlist_count': 18,
277 'id': 'BV1bK411W797_p1',
279 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
281 'timestamp': 1589601697,
282 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
283 'uploader': '打牌还是打桩',
284 'uploader_id': '150259984',
286 'comment_count': int,
287 'upload_date': '20200516',
289 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
294 'note': 'Specific page of Anthology',
295 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
297 'id': 'BV1bK411W797_p1',
299 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
301 'timestamp': 1589601697,
302 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
303 'uploader': '打牌还是打桩',
304 'uploader_id': '150259984',
306 'comment_count': int,
307 'upload_date': '20200516',
309 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
313 'note': 'video has subtitles',
314 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
316 'id': 'BV12N4y1M7rh',
318 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
320 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
322 'upload_date': '20220709',
324 'timestamp': 1657347907,
325 'uploader_id': '1326814124',
326 'comment_count': int,
329 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
330 'subtitles': 'count:2'
332 'params': {'listsubtitles': True}
,
334 'url': 'https://www.bilibili.com/video/av8903802/',
336 'id': 'BV13x41117TL',
338 'title': '阿滴英文|英文歌分享#6 "Closer',
339 'upload_date': '20170301',
340 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
341 'timestamp': 1488353834,
342 'uploader_id': '65880958',
344 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
347 'comment_count': int,
352 'skip_download': True,
355 'note': 'video has chapter',
356 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
358 'id': 'BV1vL411G7N7',
360 'title': '如何为你的B站视频添加进度条分段',
361 'timestamp': 1634554558,
362 'upload_date': '20211018',
363 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
365 'uploader': '爱喝咖啡的当麻',
367 'uploader_id': '1680903',
368 'chapters': 'count:6',
369 'comment_count': int,
372 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
374 'params': {'skip_download': True}
,
376 'note': 'video redirects to festival page',
377 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
379 'id': 'BV1wP4y1P72h',
381 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
382 'timestamp': 1643947497,
383 'upload_date': '20220204',
384 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
385 'uploader': '叨叨冯聊音乐',
387 'uploader_id': '528182630',
390 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
392 'params': {'skip_download': True}
,
394 'note': 'newer festival video',
395 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
397 'id': 'BV1ay4y1d77f',
399 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
400 'timestamp': 1674273600,
401 'upload_date': '20230121',
402 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
404 'duration': 1111.722,
405 'uploader_id': '8469526',
408 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
410 'params': {'skip_download': True}
,
412 'note': 'interactive/split-path video',
413 'url': 'https://www.bilibili.com/video/BV1af4y1H7ga/',
415 'id': 'BV1af4y1H7ga',
416 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!!',
417 'timestamp': 1630500414,
418 'upload_date': '20210901',
419 'description': 'md5:01113e39ab06e28042d74ac356a08786',
421 'uploader': '钉宫妮妮Ninico',
423 'uploader_id': '8881297',
424 'comment_count': int,
427 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
429 'playlist_count': 33,
432 'id': 'BV1af4y1H7ga_400950101',
434 'title': '【互动游戏】花了大半年时间做的自我介绍~请查收!! - 听见猫猫叫~',
435 'timestamp': 1630500414,
436 'upload_date': '20210901',
437 'description': 'md5:db66ac7a2813a94b8291dbce990cc5b2',
439 'uploader': '钉宫妮妮Ninico',
441 'uploader_id': '8881297',
442 'comment_count': int,
445 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
449 'note': '301 redirect to bangumi link',
450 'url': 'https://www.bilibili.com/video/BV1TE411f7f1',
453 'title': '李永乐老师 钱学森弹道和乘波体飞行器是什么?',
458 'season_id': '28609',
460 'episode': '钱学森弹道和乘波体飞行器是什么?',
461 'episode_id': '288525',
462 'episode_number': 105,
463 'duration': 1183.957,
464 'timestamp': 1571648124,
465 'upload_date': '20191021',
466 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
469 'url': 'https://www.bilibili.com/video/BV1jL41167ZG/',
471 'id': 'BV1jL41167ZG',
472 'title': '一场大火引发的离奇死亡!古典推理经典短篇集《不可能犯罪诊断书》!',
475 'skip': 'supporter-only video',
477 'url': 'https://www.bilibili.com/video/BV1Ks411f7aQ/',
479 'id': 'BV1Ks411f7aQ',
480 'title': '【BD1080P】狼与香辛料I【华盟】',
483 'skip': 'login required',
485 'url': 'https://www.bilibili.com/video/BV1GJ411x7h7/',
487 'id': 'BV1GJ411x7h7',
488 'title': '【官方 MV】Never Gonna Give You Up - Rick Astley',
491 'skip': 'geo-restricted',
494 def _real_extract(self
, url
):
495 video_id
= self
._match
_id
(url
)
496 webpage
, urlh
= self
._download
_webpage
_handle
(url
, video_id
)
497 if not self
._match
_valid
_url
(urlh
.url
):
498 return self
.url_result(urlh
.url
)
500 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
502 is_festival
= 'videoData' not in initial_state
504 video_data
= initial_state
['videoInfo']
506 play_info_obj
= self
._search
_json
(
507 r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
, fatal
=False)
508 if not play_info_obj
:
509 if traverse_obj(initial_state
, ('error', 'trueCode')) == -403:
510 self
.raise_login_required()
511 if traverse_obj(initial_state
, ('error', 'trueCode')) == -404:
512 raise ExtractorError(
513 'This video may be deleted or geo-restricted. '
514 'You might want to try a VPN or a proxy server (with --proxy)', expected
=True)
515 play_info
= traverse_obj(play_info_obj
, ('data', {dict}
))
517 if traverse_obj(play_info_obj
, 'code') == 87007:
518 toast
= get_element_by_class('tips-toast', webpage
) or ''
520 f
'{get_element_by_class("belongs-to", toast) or ""},'
521 + (get_element_by_class('level', toast
) or ''))
522 raise ExtractorError(
523 f
'This is a supporter-only video: {msg}. {self._login_hint()}', expected
=True)
524 raise ExtractorError('Failed to extract play info')
525 video_data
= initial_state
['videoData']
527 video_id
, title
= video_data
['bvid'], video_data
.get('title')
529 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
530 page_list_json
= not is_festival
and traverse_obj(
532 'https://api.bilibili.com/x/player/pagelist', video_id
,
533 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
534 note
='Extracting videos in anthology'),
535 'data', expected_type
=list) or []
536 is_anthology
= len(page_list_json
) > 1
538 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
539 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
540 return self
.playlist_from_matches(
541 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
542 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
545 part_id
= part_id
or 1
546 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
548 aid
= video_data
.get('aid')
549 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
551 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
555 play_info
= self
._download
_playinfo
(video_id
, cid
)
557 festival_info
= traverse_obj(initial_state
, {
558 'uploader': ('videoInfo', 'upName'),
559 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
560 'like_count': ('videoStatus', 'like', {int_or_none}
),
561 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
565 **traverse_obj(initial_state
, {
566 'uploader': ('upData', 'name'),
567 'uploader_id': ('upData', 'mid', {str_or_none}
),
568 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
569 'tags': ('tags', ..., 'tag_name'),
570 'thumbnail': ('videoData', 'pic', {url_or_none}
),
573 **traverse_obj(video_data
, {
574 'description': 'desc',
575 'timestamp': ('pubdate', {int_or_none}
),
576 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
577 'comment_count': ('stat', 'reply', {int_or_none}
),
579 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
580 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
582 'http_headers': {'Referer': url}
,
585 is_interactive
= traverse_obj(video_data
, ('rights', 'is_stein_gate'))
587 return self
.playlist_result(
588 self
._get
_interactive
_entries
(video_id
, cid
, metainfo
), **metainfo
, **{
589 'duration': traverse_obj(initial_state
, ('videoData', 'duration', {int_or_none}
)),
590 '__post_extractor': self
.extract_comments(aid
),
595 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
596 'chapters': self
._get
_chapters
(aid
, cid
),
597 'subtitles': self
.extract_subtitles(video_id
, cid
),
598 'formats': self
.extract_formats(play_info
),
599 '__post_extractor': self
.extract_comments(aid
),
603 class BiliBiliBangumiIE(BilibiliBaseIE
):
604 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/ep(?P<id>\d+)'
607 'url': 'https://www.bilibili.com/bangumi/play/ep21495/',
616 'episode': 'forever/ef',
617 'episode_id': '21495',
618 'episode_number': 12,
619 'title': '12 forever/ef',
620 'duration': 1420.791,
621 'timestamp': 1320412200,
622 'upload_date': '20111104',
623 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
626 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
633 'season_id': '26801',
636 'episode_id': '267851',
639 'duration': 1425.256,
640 'timestamp': 1554566400,
641 'upload_date': '20190406',
642 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
644 'skip': 'Geo-restricted',
646 'note': 'a making-of which falls outside main section',
647 'url': 'https://www.bilibili.com/bangumi/play/ep345120',
654 'season_id': '26801',
657 'episode_id': '345120',
658 'episode_number': 27,
660 'duration': 1922.129,
661 'timestamp': 1602853860,
662 'upload_date': '20201016',
663 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
667 def _real_extract(self
, url
):
668 episode_id
= self
._match
_id
(url
)
669 webpage
= self
._download
_webpage
(url
, episode_id
)
671 if '您所在的地区无法观看本片' in webpage
:
672 raise GeoRestrictedError('This video is restricted')
673 elif '正在观看预览,大会员免费看全片' in webpage
:
674 self
.raise_login_required('This video is for premium members only')
676 headers
= {'Referer': url, **self.geo_verification_headers()}
677 play_info
= self
._download
_json
(
678 'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id
,
679 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
681 premium_only
= play_info
.get('code') == -10403
682 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
684 formats
= self
.extract_formats(play_info
)
685 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
686 self
.raise_login_required('This video is for premium members only')
688 bangumi_info
= self
._download
_json
(
689 'https://api.bilibili.com/pgc/view/web/season', episode_id
, 'Get episode details',
690 query
={'ep_id': episode_id}
, headers
=headers
)['result']
692 episode_number
, episode_info
= next((
693 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
694 bangumi_info
, (('episodes', ('section', ..., 'episodes')), ..., {dict}
)), 1)
695 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
697 season_id
= bangumi_info
.get('season_id')
698 season_number
, season_title
= season_id
and next((
699 (idx
+ 1, e
.get('season_title')) for idx
, e
in enumerate(
700 traverse_obj(bangumi_info
, ('seasons', ...)))
701 if e
.get('season_id') == season_id
704 aid
= episode_info
.get('aid')
709 **traverse_obj(bangumi_info
, {
710 'series': ('series', 'series_title', {str}
),
711 'series_id': ('series', 'series_id', {str_or_none}
),
712 'thumbnail': ('square_cover', {url_or_none}
),
714 **traverse_obj(episode_info
, {
715 'episode': ('long_title', {str}
),
716 'episode_number': ('title', {int_or_none}
, {lambda x: x or episode_number}
),
717 'timestamp': ('pub_time', {int_or_none}
),
718 'title': {lambda v: v and join_nonempty('title', 'long_title', delim=' ', from_dict=v)}
,
720 'episode_id': episode_id
,
721 'season': str_or_none(season_title
),
722 'season_id': str_or_none(season_id
),
723 'season_number': season_number
,
724 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
725 'subtitles': self
.extract_subtitles(episode_id
, episode_info
.get('cid'), aid
=aid
),
726 '__post_extractor': self
.extract_comments(aid
),
727 'http_headers': headers
,
731 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
732 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
734 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
737 'title': 'CAROLE & TUESDAY',
738 'description': 'md5:42417ad33d1eaa1c93bfd2dd1626b829',
740 'playlist_mincount': 25,
742 'url': 'https://www.bilibili.com/bangumi/media/md1565/',
745 'title': '攻壳机动队 S.A.C. 2nd GIG',
746 'description': 'md5:46cac00bafd645b97f4d6df616fc576d',
748 'playlist_count': 26,
758 'episode': '再启动 REEMBODY',
759 'episode_id': '68540',
761 'title': '1 再启动 REEMBODY',
762 'duration': 1525.777,
763 'timestamp': 1425074413,
764 'upload_date': '20150227',
765 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
770 def _real_extract(self
, url
):
771 media_id
= self
._match
_id
(url
)
772 webpage
= self
._download
_webpage
(url
, media_id
)
774 initial_state
= self
._search
_json
(
775 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)
776 ss_id
= initial_state
['mediaInfo']['season_id']
778 return self
.playlist_result(
779 self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
,
780 **traverse_obj(initial_state
, ('mediaInfo', {
781 'title': ('title', {str}
),
782 'description': ('evaluate', {str}
),
786 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
787 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
789 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
793 'description': 'md5:e2cc9848b6f69be6db79fc2a82d9661b',
795 'playlist_mincount': 26
797 'url': 'https://www.bilibili.com/bangumi/play/ss2251',
801 'description': 'md5:1fd40e3df4c08d4d9d89a6a34844bdc4',
803 'playlist_count': 13,
814 'episode_id': '50188',
817 'duration': 1436.992,
818 'timestamp': 1343185080,
819 'upload_date': '20120725',
820 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
825 def _real_extract(self
, url
):
826 ss_id
= self
._match
_id
(url
)
827 webpage
= self
._download
_webpage
(url
, ss_id
)
828 metainfo
= traverse_obj(
829 self
._search
_json
(r
'<script[^>]+type="application/ld\+json"[^>]*>', webpage
, 'info', ss_id
),
830 ('itemListElement', ..., {
831 'title': ('name', {str}
),
832 'description': ('description', {str}
),
835 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
, **metainfo
)
838 class BilibiliCheeseBaseIE(BilibiliBaseIE
):
839 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
841 def _extract_episode(self
, season_info
, ep_id
):
842 episode_info
= traverse_obj(season_info
, (
843 'episodes', lambda _
, v
: v
['id'] == int(ep_id
)), get_all
=False)
844 aid
, cid
= episode_info
['aid'], episode_info
['cid']
846 if traverse_obj(episode_info
, 'ep_status') == -1:
847 raise ExtractorError('This course episode is not yet available.', expected
=True)
848 if not traverse_obj(episode_info
, 'playable'):
849 self
.raise_login_required('You need to purchase the course to download this episode')
851 play_info
= self
._download
_json
(
852 'https://api.bilibili.com/pugv/player/web/playurl', ep_id
,
853 query
={'avid': aid, 'cid': cid, 'ep_id': ep_id, 'fnval': 16, 'fourk': 1}
,
854 headers
=self
._HEADERS
, note
='Downloading playinfo')['data']
857 'id': str_or_none(ep_id
),
858 'episode_id': str_or_none(ep_id
),
859 'formats': self
.extract_formats(play_info
),
860 'extractor_key': BilibiliCheeseIE
.ie_key(),
861 'extractor': BilibiliCheeseIE
.IE_NAME
,
862 'webpage_url': f
'https://www.bilibili.com/cheese/play/ep{ep_id}',
863 **traverse_obj(episode_info
, {
864 'episode': ('title', {str}
),
865 'title': {lambda v: v and join_nonempty('index', 'title', delim=' - ', from_dict=v)}
,
866 'alt_title': ('subtitle', {str}
),
867 'duration': ('duration', {int_or_none}
),
868 'episode_number': ('index', {int_or_none}
),
869 'thumbnail': ('cover', {url_or_none}
),
870 'timestamp': ('release_date', {int_or_none}
),
871 'view_count': ('play', {int_or_none}
),
873 **traverse_obj(season_info
, {
874 'uploader': ('up_info', 'uname', {str}
),
875 'uploader_id': ('up_info', 'mid', {str_or_none}
),
877 'subtitles': self
.extract_subtitles(ep_id
, cid
, aid
=aid
),
878 '__post_extractor': self
.extract_comments(aid
),
879 'http_headers': self
._HEADERS
,
882 def _download_season_info(self
, query_key
, video_id
):
883 return self
._download
_json
(
884 f
'https://api.bilibili.com/pugv/view/web/season?{query_key}={video_id}', video_id
,
885 headers
=self
._HEADERS
, note
='Downloading season info')['data']
888 class BilibiliCheeseIE(BilibiliCheeseBaseIE
):
889 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ep(?P<id>\d+)'
891 'url': 'https://www.bilibili.com/cheese/play/ep229832',
895 'title': '1 - 课程先导片',
896 'alt_title': '视频课 · 3分41秒',
898 'uploader_id': '316568752',
900 'episode_id': '229832',
903 'timestamp': 1695549606,
904 'upload_date': '20230924',
905 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
910 def _real_extract(self
, url
):
911 ep_id
= self
._match
_id
(url
)
912 return self
._extract
_episode
(self
._download
_season
_info
('ep_id', ep_id
), ep_id
)
915 class BilibiliCheeseSeasonIE(BilibiliCheeseBaseIE
):
916 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/cheese/play/ss(?P<id>\d+)'
918 'url': 'https://www.bilibili.com/cheese/play/ss5918',
921 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
922 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
928 'title': '1 - 课程先导片',
929 'alt_title': '视频课 · 3分41秒',
931 'uploader_id': '316568752',
933 'episode_id': '229832',
936 'timestamp': 1695549606,
937 'upload_date': '20230924',
938 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
942 'params': {'playlist_items': '1'}
,
944 'url': 'https://www.bilibili.com/cheese/play/ss5918',
947 'title': '【限时五折】新闻系学不到:马督工教你做自媒体',
948 'description': '帮普通人建立世界模型,降低人与人的沟通门槛',
950 'playlist_mincount': 5,
951 'skip': 'paid video in list',
954 def _get_cheese_entries(self
, season_info
):
955 for ep_id
in traverse_obj(season_info
, ('episodes', lambda _
, v
: v
['episode_can_view'], 'id')):
956 yield self
._extract
_episode
(season_info
, ep_id
)
958 def _real_extract(self
, url
):
959 season_id
= self
._match
_id
(url
)
960 season_info
= self
._download
_season
_info
('season_id', season_id
)
962 return self
.playlist_result(
963 self
._get
_cheese
_entries
(season_info
), season_id
,
964 **traverse_obj(season_info
, {
965 'title': ('title', {str}
),
966 'description': ('subtitle', {str}
),
970 class BilibiliSpaceBaseIE(InfoExtractor
):
971 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
972 first_page
= fetch_page(0)
973 metadata
= get_metadata(first_page
)
975 paged_list
= InAdvancePagedList(
976 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
977 metadata
['page_count'], metadata
['page_size'])
979 return metadata
, paged_list
982 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
983 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
985 'url': 'https://space.bilibili.com/3985676/video',
989 'playlist_mincount': 178,
991 'url': 'https://space.bilibili.com/313580179/video',
995 'playlist_mincount': 92,
998 def _extract_signature(self
, playlist_id
):
999 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
1001 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
1002 img_key
= traverse_obj(
1003 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
1004 sub_key
= traverse_obj(
1005 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
1007 session_key
= img_key
+ sub_key
1009 signature_values
= []
1011 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
1012 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
1013 57, 62, 11, 36, 20, 34, 44, 52
1015 char_at_position
= try_call(lambda: session_key
[position
])
1016 if char_at_position
:
1017 signature_values
.append(char_at_position
)
1019 return ''.join(signature_values
)[:32]
1021 def _real_extract(self
, url
):
1022 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
1023 if not is_video_url
:
1024 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
1025 'To download audios, add a "/audio" to the URL')
1027 signature
= self
._extract
_signature
(playlist_id
)
1029 def fetch_page(page_idx
):
1034 'order_avoided': 'true',
1039 'web_location': 1550101,
1040 'wts': int(time
.time()),
1042 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
1045 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
1046 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
)
1047 except ExtractorError
as e
:
1048 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
1049 raise ExtractorError(
1050 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
1052 if response
['code'] == -401:
1053 raise ExtractorError(
1054 'Request is blocked by server (401), please add cookies, wait and try later.', expected
=True)
1055 return response
['data']
1057 def get_metadata(page_data
):
1058 page_size
= page_data
['page']['ps']
1059 entry_count
= page_data
['page']['count']
1061 'page_count': math
.ceil(entry_count
/ page_size
),
1062 'page_size': page_size
,
1065 def get_entries(page_data
):
1066 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
1067 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
1069 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1070 return self
.playlist_result(paged_list
, playlist_id
)
1073 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
1074 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
1076 'url': 'https://space.bilibili.com/313580179/audio',
1080 'playlist_mincount': 1,
1083 def _real_extract(self
, url
):
1084 playlist_id
= self
._match
_id
(url
)
1086 def fetch_page(page_idx
):
1087 return self
._download
_json
(
1088 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
1089 note
=f
'Downloading page {page_idx}',
1090 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
1092 def get_metadata(page_data
):
1094 'page_count': page_data
['pageCount'],
1095 'page_size': page_data
['pageSize'],
1098 def get_entries(page_data
):
1099 for entry
in page_data
.get('data', []):
1100 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
1102 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1103 return self
.playlist_result(paged_list
, playlist_id
)
1106 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
1107 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
1108 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
1109 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
1111 def _get_uploader(self
, uid
, playlist_id
):
1112 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
1113 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
1115 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
1116 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1117 metadata
.pop('page_count', None)
1118 metadata
.pop('page_size', None)
1119 return metadata
, page_list
1122 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
1123 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
1125 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
1127 'id': '2142762_57445',
1128 'title': '【完结】《底特律 变人》全结局流程解说',
1131 'uploader_id': '2142762',
1134 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
1136 'playlist_mincount': 31,
1139 def _real_extract(self
, url
):
1140 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1141 playlist_id
= f
'{mid}_{sid}'
1143 def fetch_page(page_idx
):
1144 return self
._download
_json
(
1145 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
1146 playlist_id
, note
=f
'Downloading page {page_idx}',
1147 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
1149 def get_metadata(page_data
):
1150 page_size
= page_data
['page']['page_size']
1151 entry_count
= page_data
['page']['total']
1153 'page_count': math
.ceil(entry_count
/ page_size
),
1154 'page_size': page_size
,
1155 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1156 **traverse_obj(page_data
, {
1157 'title': ('meta', 'name', {str}
),
1158 'description': ('meta', 'description', {str}
),
1159 'uploader_id': ('meta', 'mid', {str_or_none}
),
1160 'timestamp': ('meta', 'ptime', {int_or_none}
),
1161 'thumbnail': ('meta', 'cover', {url_or_none}
),
1165 def get_entries(page_data
):
1166 return self
._get
_entries
(page_data
, 'archives')
1168 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1169 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1172 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
1173 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
1175 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
1177 'id': '1958703906_547718',
1179 'description': '直播回放',
1180 'uploader': '靡烟miya',
1181 'uploader_id': '1958703906',
1182 'timestamp': 1637985853,
1183 'upload_date': '20211127',
1184 'modified_timestamp': int,
1185 'modified_date': str,
1187 'playlist_mincount': 513,
1190 def _real_extract(self
, url
):
1191 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
1192 playlist_id
= f
'{mid}_{sid}'
1193 playlist_meta
= traverse_obj(self
._download
_json
(
1194 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False
1196 'title': ('data', 'meta', 'name', {str}
),
1197 'description': ('data', 'meta', 'description', {str}
),
1198 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
1199 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
1200 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
1203 def fetch_page(page_idx
):
1204 return self
._download
_json
(
1205 'https://api.bilibili.com/x/series/archives',
1206 playlist_id
, note
=f
'Downloading page {page_idx}',
1207 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
1209 def get_metadata(page_data
):
1210 page_size
= page_data
['page']['size']
1211 entry_count
= page_data
['page']['total']
1213 'page_count': math
.ceil(entry_count
/ page_size
),
1214 'page_size': page_size
,
1215 'uploader': self
._get
_uploader
(mid
, playlist_id
),
1219 def get_entries(page_data
):
1220 return self
._get
_entries
(page_data
, 'archives')
1222 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
1223 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
1226 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
1227 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
1229 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
1235 'uploader_id': '84912',
1236 'timestamp': 1604905176,
1237 'upload_date': '20201109',
1238 'modified_timestamp': int,
1239 'modified_date': str,
1240 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1244 'playlist_mincount': 22,
1246 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
1247 'only_matching': True,
1250 def _real_extract(self
, url
):
1251 fid
= self
._match
_id
(url
)
1253 list_info
= self
._download
_json
(
1254 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
1255 fid
, note
='Downloading favlist metadata')
1256 if list_info
['code'] == -403:
1257 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
1259 entries
= self
._get
_entries
(self
._download
_json
(
1260 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
1261 fid
, note
='Download favlist entries'), 'data')
1263 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
1264 'title': ('title', {str}
),
1265 'description': ('intro', {str}
),
1266 'uploader': ('upper', 'name', {str}
),
1267 'uploader_id': ('upper', 'mid', {str_or_none}
),
1268 'timestamp': ('ctime', {int_or_none}
),
1269 'modified_timestamp': ('mtime', {int_or_none}
),
1270 'thumbnail': ('cover', {url_or_none}
),
1271 'view_count': ('cnt_info', 'play', {int_or_none}
),
1272 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
1276 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
1277 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
1279 'url': 'https://www.bilibili.com/watchlater/#/list',
1280 'info_dict': {'id': 'watchlater'}
,
1281 'playlist_mincount': 0,
1282 'skip': 'login required',
1285 def _real_extract(self
, url
):
1286 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
1287 watchlater_info
= self
._download
_json
(
1288 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
1289 if watchlater_info
['code'] == -101:
1290 self
.raise_login_required(msg
='You need to login to access your watchlater list')
1291 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
1292 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
1295 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
1296 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
1298 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
1302 'uploader': '靡烟miya',
1303 'uploader_id': '1958703906',
1304 'timestamp': 1637985853,
1305 'upload_date': '20211127',
1307 'playlist_mincount': 513,
1309 'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
1311 'id': 'BV1DU4y1r7tz',
1313 'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
1314 'upload_date': '20220820',
1316 'timestamp': 1661016330,
1317 'uploader_id': '1958703906',
1318 'uploader': '靡烟miya',
1319 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1320 'duration': 9552.903,
1322 'comment_count': int,
1325 '_old_archive_ids': ['bilibili 687146339_part1'],
1327 'params': {'noplaylist': True}
,
1329 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
1333 'playlist_mincount': 513,
1334 'skip': 'redirect url',
1336 'url': 'https://www.bilibili.com/list/ml1103407912',
1338 'id': '3_1103407912',
1341 'uploader_id': '84912',
1342 'timestamp': 1604905176,
1343 'upload_date': '20201109',
1344 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
1346 'playlist_mincount': 22,
1348 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
1350 'id': '3_1103407912',
1352 'playlist_mincount': 22,
1353 'skip': 'redirect url',
1355 'url': 'https://www.bilibili.com/list/watchlater',
1356 'info_dict': {'id': 'watchlater'}
,
1357 'playlist_mincount': 0,
1358 'skip': 'login required',
1360 'url': 'https://www.bilibili.com/medialist/play/watchlater',
1361 'info_dict': {'id': 'watchlater'}
,
1362 'playlist_mincount': 0,
1363 'skip': 'login required',
1366 def _extract_medialist(self
, query
, list_id
):
1367 for page_num
in itertools
.count(1):
1368 page_data
= self
._download
_json
(
1369 'https://api.bilibili.com/x/v2/medialist/resource/list',
1370 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}'
1372 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
1373 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
1374 if not page_data
.get('has_more', False):
1377 def _real_extract(self
, url
):
1378 list_id
= self
._match
_id
(url
)
1380 bvid
= traverse_obj(parse_qs(url
), ('bvid', 0))
1381 if not self
._yes
_playlist
(list_id
, bvid
):
1382 return self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
)
1384 webpage
= self
._download
_webpage
(url
, list_id
)
1385 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
1386 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
1387 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
1388 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
1389 if error_code
== -400 and list_id
== 'watchlater':
1390 self
.raise_login_required('You need to login to access your watchlater playlist')
1391 elif error_code
== -403:
1392 self
.raise_login_required('This is a private playlist. You need to login as its owner')
1393 elif error_code
== 11010:
1394 raise ExtractorError('Playlist is no longer available', expected
=True)
1395 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
1399 'with_current': False,
1400 **traverse_obj(initial_state
, {
1401 'type': ('playlist', 'type', {int_or_none}
),
1402 'biz_id': ('playlist', 'id', {int_or_none}
),
1403 'tid': ('tid', {int_or_none}
),
1404 'sort_field': ('sortFiled', {int_or_none}
),
1405 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
1409 'id': f
'{query["type"]}_{query["biz_id"]}',
1410 **traverse_obj(initial_state
, ('mediaListInfo', {
1411 'title': ('title', {str}
),
1412 'uploader': ('upper', 'name', {str}
),
1413 'uploader_id': ('upper', 'mid', {str_or_none}
),
1414 'timestamp': ('ctime', {int_or_none}
),
1415 'thumbnail': ('cover', {url_or_none}
),
1418 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
1421 class BilibiliCategoryIE(InfoExtractor
):
1422 IE_NAME
= 'Bilibili category extractor'
1423 _MAX_RESULTS
= 1000000
1424 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
1426 'url': 'https://www.bilibili.com/v/kichiku/mad',
1428 'id': 'kichiku: mad',
1429 'title': 'kichiku: mad'
1431 'playlist_mincount': 45,
1437 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
1438 parsed_json
= self
._download
_json
(
1439 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
1440 note
='Extracting results from page %s of %s' % (page_num
, num_pages
))
1442 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
1444 raise ExtractorError('Failed to retrieve video list for page %d' % page_num
)
1446 for video
in video_list
:
1447 yield self
.url_result(
1448 'https://www.bilibili.com/video/%s' % video
['bvid'], 'BiliBili', video
['bvid'])
1450 def _entries(self
, category
, subcategory
, query
):
1451 # map of categories : subcategories : RIDs
1455 'manual_vocaloid': 126,
1462 if category
not in rid_map
:
1463 raise ExtractorError(
1464 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1465 if subcategory
not in rid_map
[category
]:
1466 raise ExtractorError(
1467 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1468 rid_value
= rid_map
[category
][subcategory
]
1470 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1471 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1472 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1473 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1474 if count
is None or not size
:
1475 raise ExtractorError('Failed to calculate either page count or size')
1477 num_pages
= math
.ceil(count
/ size
)
1479 return OnDemandPagedList(functools
.partial(
1480 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1482 def _real_extract(self
, url
):
1483 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1484 query
= '%s: %s' % (category
, subcategory
)
1486 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1489 class BiliBiliSearchIE(SearchInfoExtractor
):
1490 IE_DESC
= 'Bilibili video search'
1491 _MAX_RESULTS
= 100000
1492 _SEARCH_KEY
= 'bilisearch'
1494 'url': 'bilisearch3:靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1495 'playlist_count': 3,
1497 'id': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1498 'title': '靡烟 出道一年,我怎么还在等你单推的女人睡觉后开播啊',
1502 'id': 'BV1n44y1Q7sc',
1504 'title': '“出道一年,我怎么还在等你单推的女人睡觉后开播啊?”【一分钟了解靡烟miya】',
1505 'timestamp': 1669889987,
1506 'upload_date': '20221201',
1507 'description': 'md5:43343c0973defff527b5a4b403b4abf9',
1509 'uploader': '靡烟miya',
1510 'duration': 123.156,
1511 'uploader_id': '1958703906',
1512 'comment_count': int,
1515 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
1516 '_old_archive_ids': ['bilibili 988222410_part1'],
1521 def _search_results(self
, query
):
1522 if not self
._get
_cookies
('https://api.bilibili.com').get('buvid3'):
1523 self
._set
_cookie
('.bilibili.com', 'buvid3', f
'{uuid.uuid4()}infoc')
1524 for page_num
in itertools
.count(1):
1525 videos
= self
._download
_json
(
1526 'https://api.bilibili.com/x/web-interface/search/type', query
,
1527 note
=f
'Extracting results from page {page_num}', query
={
1528 'Search_key': query
,
1534 '__refresh__': 'true',
1535 'search_type': 'video',
1538 })['data'].get('result')
1541 for video
in videos
:
1542 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1545 class BilibiliAudioBaseIE(InfoExtractor
):
1546 def _call_api(self
, path
, sid
, query
=None):
1548 query
= {'sid': sid}
1549 return self
._download
_json
(
1550 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1551 sid
, query
=query
)['data']
1554 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1555 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1557 'url': 'https://www.bilibili.com/audio/au1003142',
1558 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1562 'title': '【tsukimi】YELLOW / 神山羊',
1563 'artist': 'tsukimi',
1564 'comment_count': int,
1565 'description': 'YELLOW的mp3版!',
1572 'thumbnail': r
're:^https?://.+\.jpg',
1573 'timestamp': 1564836614,
1574 'upload_date': '20190803',
1575 'uploader': 'tsukimi-つきみぐー',
1580 def _real_extract(self
, url
):
1581 au_id
= self
._match
_id
(url
)
1583 play_data
= self
._call
_api
('url', au_id
)
1585 'url': play_data
['cdns'][0],
1586 'filesize': int_or_none(play_data
.get('size')),
1590 for a_format
in formats
:
1591 a_format
.setdefault('http_headers', {}).update({
1595 song
= self
._call
_api
('song/info', au_id
)
1596 title
= song
['title']
1597 statistic
= song
.get('statistic') or {}
1600 lyric
= song
.get('lyric')
1612 'artist': song
.get('author'),
1613 'comment_count': int_or_none(statistic
.get('comment')),
1614 'description': song
.get('intro'),
1615 'duration': int_or_none(song
.get('duration')),
1616 'subtitles': subtitles
,
1617 'thumbnail': song
.get('cover'),
1618 'timestamp': int_or_none(song
.get('passtime')),
1619 'uploader': song
.get('uname'),
1620 'view_count': int_or_none(statistic
.get('play')),
1624 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1625 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1627 'url': 'https://www.bilibili.com/audio/am10624',
1630 'title': '每日新曲推荐(每日11:00更新)',
1631 'description': '每天11:00更新,为你推送最新音乐',
1633 'playlist_count': 19,
1636 def _real_extract(self
, url
):
1637 am_id
= self
._match
_id
(url
)
1639 songs
= self
._call
_api
(
1640 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1644 sid
= str_or_none(song
.get('id'))
1647 entries
.append(self
.url_result(
1648 'https://www.bilibili.com/audio/au' + sid
,
1649 BilibiliAudioIE
.ie_key(), sid
))
1652 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1653 album_title
= album_data
.get('title')
1655 for entry
in entries
:
1656 entry
['album'] = album_title
1657 return self
.playlist_result(
1658 entries
, am_id
, album_title
, album_data
.get('intro'))
1660 return self
.playlist_result(entries
, am_id
)
1663 class BiliBiliPlayerIE(InfoExtractor
):
1664 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1666 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1667 'only_matching': True,
1670 def _real_extract(self
, url
):
1671 video_id
= self
._match
_id
(url
)
1672 return self
.url_result(
1673 'http://www.bilibili.tv/video/av%s/' % video_id
,
1674 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1677 class BiliIntlBaseIE(InfoExtractor
):
1678 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1679 _NETRC_MACHINE
= 'biliintl'
1680 _HEADERS
= {'Referer': 'https://www.bilibili.com/'}
1682 def _call_api(self
, endpoint
, *args
, **kwargs
):
1683 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1684 if json
.get('code'):
1685 if json
['code'] in (10004004, 10004005, 10023006):
1686 self
.raise_login_required()
1687 elif json
['code'] == 10004001:
1688 self
.raise_geo_restricted()
1690 if json
.get('message') and str(json
['code']) != json
['message']:
1691 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1693 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1694 if kwargs
.get('fatal'):
1695 raise ExtractorError(errmsg
)
1697 self
.report_warning(errmsg
)
1698 return json
.get('data')
1700 def json2srt(self
, json
):
1702 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1703 for i
, line
in enumerate(traverse_obj(json
, (
1704 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1707 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1708 sub_json
= self
._call
_api
(
1709 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1710 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1713 's_locale': 'en_US',
1714 'episode_id': ep_id
,
1718 fetched_urls
= set()
1719 for sub
in traverse_obj(sub_json
, (('subtitles', 'video_subtitle'), ..., {dict}
)):
1720 for url
in traverse_obj(sub
, ((None, 'ass', 'srt'), 'url', {url_or_none}
)):
1721 if url
in fetched_urls
:
1723 fetched_urls
.add(url
)
1724 sub_ext
= determine_ext(url
)
1725 sub_lang
= sub
.get('lang_key') or 'en'
1727 if sub_ext
== 'ass':
1728 subtitles
.setdefault(sub_lang
, []).append({
1732 elif sub_ext
== 'json':
1733 sub_data
= self
._download
_json
(
1734 url
, ep_id
or aid
, fatal
=False,
1735 note
=f
'Downloading subtitles{format_field(sub, "lang", " for %s")} ({sub_lang})',
1736 errnote
='Unable to download subtitles')
1739 subtitles
.setdefault(sub_lang
, []).append({
1741 'data': self
.json2srt(sub_data
),
1744 self
.report_warning('Unexpected subtitle extension', ep_id
or aid
)
1748 def _get_formats(self
, *, ep_id
=None, aid
=None):
1749 video_json
= self
._call
_api
(
1750 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1751 errnote
='Unable to download video formats', query
=filter_dict({
1756 video_json
= video_json
['playurl']
1758 for vid
in video_json
.get('video') or []:
1759 video_res
= vid
.get('video_resource') or {}
1760 video_info
= vid
.get('stream_info') or {}
1761 if not video_res
.get('url'):
1764 'url': video_res
['url'],
1766 'format_note': video_info
.get('desc_words'),
1767 'width': video_res
.get('width'),
1768 'height': video_res
.get('height'),
1769 'vbr': video_res
.get('bandwidth'),
1771 'vcodec': video_res
.get('codecs'),
1772 'filesize': video_res
.get('size'),
1774 for aud
in video_json
.get('audio_resource') or []:
1775 if not aud
.get('url'):
1780 'abr': aud
.get('bandwidth'),
1781 'acodec': aud
.get('codecs'),
1783 'filesize': aud
.get('size'),
1788 def _parse_video_metadata(self
, video_data
):
1790 'title': video_data
.get('title_display') or video_data
.get('title'),
1791 'description': video_data
.get('desc'),
1792 'thumbnail': video_data
.get('cover'),
1793 'timestamp': unified_timestamp(video_data
.get('formatted_pub_date')),
1794 'episode_number': int_or_none(self
._search
_regex
(
1795 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1798 def _perform_login(self
, username
, password
):
1799 if not Cryptodome
.RSA
:
1800 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1802 key_data
= self
._download
_json
(
1803 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1804 note
='Downloading login key', errnote
='Unable to download login key')['data']
1806 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1807 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode('utf-8'))
1808 login_post
= self
._download
_json
(
1809 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1810 'username': username
,
1811 'password': base64
.b64encode(password_hash
).decode('ascii'),
1813 's_locale': 'en_US',
1815 }), note
='Logging in', errnote
='Unable to log in')
1816 if login_post
.get('code'):
1817 if login_post
.get('message'):
1818 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1820 raise ExtractorError('Unable to log in')
1823 class BiliIntlIE(BiliIntlBaseIE
):
1824 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1827 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1831 'title': 'E2 - The First Night',
1832 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1833 'episode_number': 2,
1834 'upload_date': '20201009',
1835 'episode': 'Episode 2',
1836 'timestamp': 1602259500,
1837 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1841 'title': '<Untitled Chapter 1>'
1843 'start_time': 76.242,
1844 'end_time': 161.161,
1847 'start_time': 1325.742,
1848 'end_time': 1403.903,
1854 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1858 'title': 'E3 - Who?',
1859 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1860 'episode_number': 3,
1861 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1862 'episode': 'Episode 3',
1863 'upload_date': '20211219',
1864 'timestamp': 1639928700,
1868 'title': '<Untitled Chapter 1>'
1874 'start_time': 1173.0,
1875 'end_time': 1259.535,
1880 # Subtitle with empty content
1881 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1885 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1886 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1887 'episode_number': 140,
1889 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1891 # episode comment extraction
1892 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1896 'timestamp': 1604057820,
1897 'upload_date': '20201030',
1898 'episode_number': 5,
1899 'title': 'E5 - My Own Steel',
1900 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1901 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1902 'episode': 'Episode 5',
1903 'comment_count': int,
1907 'title': '<Untitled Chapter 1>'
1913 'start_time': 1290.0,
1922 # user generated content comment extraction
1923 'url': 'https://www.bilibili.tv/en/video/2045730385',
1927 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1928 'timestamp': 1667891924,
1929 'upload_date': '20221108',
1930 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan',
1931 'comment_count': int,
1932 'thumbnail': r
're:https://pic\.bstarstatic\.(?:com|net)/ugc/f6c363659efd2eabe5683fbb906b1582\.jpg',
1938 # episode id without intro and outro
1939 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1943 'title': 'E1 - Operation \'Strix\' <Owl>',
1944 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1945 'timestamp': 1649516400,
1946 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1947 'episode': 'Episode 1',
1948 'episode_number': 1,
1949 'upload_date': '20220409',
1952 'url': 'https://www.biliintl.com/en/play/34613/341736',
1953 'only_matching': True,
1955 # User-generated content (as opposed to a series licensed from a studio)
1956 'url': 'https://bilibili.tv/en/video/2019955076',
1957 'only_matching': True,
1959 # No language in URL
1960 'url': 'https://www.bilibili.tv/video/2019955076',
1961 'only_matching': True,
1963 # Uppercase language in URL
1964 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1965 'only_matching': True,
1969 def _make_url(video_id
, series_id
=None):
1971 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1972 return f
'https://www.bilibili.tv/en/video/{video_id}'
1974 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1975 url
, smuggled_data
= unsmuggle_url(url
, {})
1976 if smuggled_data
.get('title'):
1977 return smuggled_data
1979 webpage
= self
._download
_webpage
(url
, video_id
)
1982 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1983 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1984 video_data
= traverse_obj(
1985 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1987 if season_id
and not video_data
:
1988 # Non-Bstation layout, read through episode list
1989 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1990 video_data
= traverse_obj(season_json
, (
1991 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
1992 ), expected_type
=dict, get_all
=False)
1994 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1996 self
._parse
_video
_metadata
(video_data
), {
1997 'title': get_element_by_class(
1998 'bstar-meta__title', webpage
) or self
._html
_search
_meta
('og:title', webpage
),
1999 'description': get_element_by_class(
2000 'bstar-meta__desc', webpage
) or self
._html
_search
_meta
('og:description', webpage
),
2001 }, self
._search
_json
_ld
(webpage
, video_id
, default
={}))
2003 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
2004 comment_api_raw_data
= self
._download
_json
(
2005 'https://api.bilibili.tv/reply/web/detail', display_id
,
2006 note
=f
'Downloading reply comment of {root_id} - {next_id}',
2009 'ps': 20, # comment's reply per page (default: 3)
2014 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2016 'author': traverse_obj(replies
, ('member', 'name')),
2017 'author_id': traverse_obj(replies
, ('member', 'mid')),
2018 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2019 'text': traverse_obj(replies
, ('content', 'message')),
2020 'id': replies
.get('rpid'),
2021 'like_count': int_or_none(replies
.get('like_count')),
2022 'parent': replies
.get('parent'),
2023 'timestamp': unified_timestamp(replies
.get('ctime_text'))
2026 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2027 yield from self
._get
_comments
_reply
(
2028 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
2030 def _get_comments(self
, video_id
, ep_id
):
2031 for i
in itertools
.count(0):
2032 comment_api_raw_data
= self
._download
_json
(
2033 'https://api.bilibili.tv/reply/web/root', video_id
,
2034 note
=f
'Downloading comment page {i + 1}',
2037 'pn': i
, # page number
2038 'ps': 20, # comment per page (default: 20)
2040 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
2041 'sort_type': 1, # 1: best, 2: recent
2044 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
2046 'author': traverse_obj(replies
, ('member', 'name')),
2047 'author_id': traverse_obj(replies
, ('member', 'mid')),
2048 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
2049 'text': traverse_obj(replies
, ('content', 'message')),
2050 'id': replies
.get('rpid'),
2051 'like_count': int_or_none(replies
.get('like_count')),
2052 'timestamp': unified_timestamp(replies
.get('ctime_text')),
2053 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
2055 if replies
.get('count'):
2056 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
2058 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
2061 def _real_extract(self
, url
):
2062 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
2063 video_id
= ep_id
or aid
2067 intro_ending_json
= self
._call
_api
(
2068 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
2069 video_id
, fatal
=False) or {}
2070 if intro_ending_json
.get('skip'):
2071 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
2072 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
2074 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
2075 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
2078 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
2079 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
2085 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
2086 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
2087 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
2088 'chapters': chapters
,
2089 '__post_extractor': self
.extract_comments(video_id
, ep_id
),
2090 'http_headers': self
._HEADERS
,
2094 class BiliIntlSeriesIE(BiliIntlBaseIE
):
2095 IE_NAME
= 'biliIntl:series'
2096 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
2098 'url': 'https://www.bilibili.tv/en/play/34613',
2099 'playlist_mincount': 15,
2102 'title': 'TONIKAWA: Over the Moon For You',
2103 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
2104 'categories': ['Slice of life', 'Comedy', 'Romance'],
2105 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
2109 'skip_download': True,
2112 'url': 'https://www.bilibili.tv/en/media/1048837',
2115 'title': 'SPY×FAMILY',
2116 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
2117 'categories': ['Adventure', 'Action', 'Comedy'],
2118 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
2121 'playlist_mincount': 25,
2123 'url': 'https://www.biliintl.com/en/play/34613',
2124 'only_matching': True,
2126 'url': 'https://www.biliintl.com/EN/play/34613',
2127 'only_matching': True,
2130 def _entries(self
, series_id
):
2131 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
2132 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
2133 episode_id
= str(episode
['episode_id'])
2134 yield self
.url_result(smuggle_url(
2135 BiliIntlIE
._make
_url
(episode_id
, series_id
),
2136 self
._parse
_video
_metadata
(episode
)
2137 ), BiliIntlIE
, episode_id
)
2139 def _real_extract(self
, url
):
2140 series_id
= self
._match
_id
(url
)
2141 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
2142 return self
.playlist_result(
2143 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
2144 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
2145 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
2148 class BiliLiveIE(InfoExtractor
):
2149 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
2152 'url': 'https://live.bilibili.com/196',
2155 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
2157 'title': "太空狼人杀联动,不被爆杀就算赢",
2158 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
2159 'timestamp': 1650802769,
2163 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
2164 'only_matching': True
2166 'url': 'https://live.bilibili.com/blanc/196',
2167 'only_matching': True
2171 80: {'format_id': 'low', 'format_note': '流畅'}
,
2172 150: {'format_id': 'high_res', 'format_note': '高清'}
,
2173 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
2174 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
2175 10000: {'format_id': 'source', 'format_note': '原画'}
,
2176 20000: {'format_id': '4K', 'format_note': '4K'}
,
2177 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
2180 _quality
= staticmethod(qualities(list(_FORMATS
)))
2182 def _call_api(self
, path
, room_id
, query
):
2183 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
2184 if api_result
.get('code') != 0:
2185 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
2186 return api_result
.get('data') or {}
2188 def _parse_formats(self
, qn
, fmt
):
2189 for codec
in fmt
.get('codec') or []:
2190 if codec
.get('current_qn') != qn
:
2192 for url_info
in codec
['url_info']:
2194 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
2195 'ext': fmt
.get('format_name'),
2196 'vcodec': codec
.get('codec_name'),
2197 'quality': self
._quality
(qn
),
2198 **self
._FORMATS
[qn
],
2201 def _real_extract(self
, url
):
2202 room_id
= self
._match
_id
(url
)
2203 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
2204 if room_data
.get('live_status') == 0:
2205 raise ExtractorError('Streamer is not live', expected
=True)
2208 for qn
in self
._FORMATS
.keys():
2209 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
2219 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
2220 formats
.extend(self
._parse
_formats
(qn
, fmt
))
2224 'title': room_data
.get('title'),
2225 'description': room_data
.get('description'),
2226 'thumbnail': room_data
.get('user_cover'),
2227 'timestamp': stream_data
.get('live_time'),