10 from .common
import InfoExtractor
, SearchInfoExtractor
11 from ..dependencies
import Cryptodome
12 from ..networking
.exceptions
import HTTPError
31 srt_subtitles_timecode
,
43 class BilibiliBaseIE(InfoExtractor
):
44 _FORMAT_ID_RE
= re
.compile(r
'-(\d+)\.m4s\?')
46 def extract_formats(self
, play_info
):
48 r
['quality']: traverse_obj(r
, 'new_description', 'display_desc')
49 for r
in traverse_obj(play_info
, ('support_formats', lambda _
, v
: v
['quality']))
52 audios
= traverse_obj(play_info
, ('dash', 'audio', ...))
53 flac_audio
= traverse_obj(play_info
, ('dash', 'flac', 'audio'))
55 audios
.append(flac_audio
)
57 'url': traverse_obj(audio
, 'baseUrl', 'base_url', 'url'),
58 'ext': mimetype2ext(traverse_obj(audio
, 'mimeType', 'mime_type')),
59 'acodec': audio
.get('codecs'),
61 'tbr': float_or_none(audio
.get('bandwidth'), scale
=1000),
62 'filesize': int_or_none(audio
.get('size')),
63 'format_id': str_or_none(audio
.get('id')),
64 } for audio
in audios
]
67 'url': traverse_obj(video
, 'baseUrl', 'base_url', 'url'),
68 'ext': mimetype2ext(traverse_obj(video
, 'mimeType', 'mime_type')),
69 'fps': float_or_none(traverse_obj(video
, 'frameRate', 'frame_rate')),
70 'width': int_or_none(video
.get('width')),
71 'height': int_or_none(video
.get('height')),
72 'vcodec': video
.get('codecs'),
73 'acodec': 'none' if audios
else None,
74 'tbr': float_or_none(video
.get('bandwidth'), scale
=1000),
75 'filesize': int_or_none(video
.get('size')),
76 'quality': int_or_none(video
.get('id')),
77 'format_id': traverse_obj(
78 video
, (('baseUrl', 'base_url'), {self._FORMAT_ID_RE.search}
, 1),
79 ('id', {str_or_none}
), get_all
=False),
80 'format': format_names
.get(video
.get('id')),
81 } for video
in traverse_obj(play_info
, ('dash', 'video', ...)))
83 missing_formats
= format_names
.keys() - set(traverse_obj(formats
, (..., 'quality')))
85 self
.to_screen(f
'Format(s) {", ".join(format_names[i] for i in missing_formats)} are missing; '
86 f
'you have to login or become premium member to download them. {self._login_hint()}')
90 def json2srt(self
, json_data
):
92 for idx
, line
in enumerate(json_data
.get('body') or []):
93 srt_data
+= (f
'{idx + 1}\n'
94 f
'{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n'
95 f
'{line["content"]}\n\n')
98 def _get_subtitles(self
, video_id
, aid
, cid
):
102 'url': f
'https://comment.bilibili.com/{cid}.xml',
106 video_info_json
= self
._download
_json
(f
'https://api.bilibili.com/x/player/v2?aid={aid}&cid={cid}', video_id
)
107 for s
in traverse_obj(video_info_json
, ('data', 'subtitle', 'subtitles', ...)):
108 subtitles
.setdefault(s
['lan'], []).append({
110 'data': self
.json2srt(self
._download
_json
(s
['subtitle_url'], video_id
))
114 def _get_chapters(self
, aid
, cid
):
115 chapters
= aid
and cid
and self
._download
_json
(
116 'https://api.bilibili.com/x/player/v2', aid
, query
={'aid': aid, 'cid': cid}
,
117 note
='Extracting chapters', fatal
=False)
118 return traverse_obj(chapters
, ('data', 'view_points', ..., {
120 'start_time': 'from',
124 def _get_comments(self
, aid
):
125 for idx
in itertools
.count(1):
126 replies
= traverse_obj(
128 f
'https://api.bilibili.com/x/v2/reply?pn={idx}&oid={aid}&type=1&jsonp=jsonp&sort=2&_=1567227301685',
129 aid
, note
=f
'Extracting comments from page {idx}', fatal
=False),
133 for children
in map(self
._get
_all
_children
, replies
):
136 def _get_all_children(self
, reply
):
138 'author': traverse_obj(reply
, ('member', 'uname')),
139 'author_id': traverse_obj(reply
, ('member', 'mid')),
140 'id': reply
.get('rpid'),
141 'text': traverse_obj(reply
, ('content', 'message')),
142 'timestamp': reply
.get('ctime'),
143 'parent': reply
.get('parent') or 'root',
145 for children
in map(self
._get
_all
_children
, traverse_obj(reply
, ('replies', ...))):
148 def _get_episodes_from_season(self
, ss_id
, url
):
149 season_info
= self
._download
_json
(
150 'https://api.bilibili.com/pgc/web/season/section', ss_id
,
151 note
='Downloading season info', query
={'season_id': ss_id}
,
152 headers
={'Referer': url, **self.geo_verification_headers()}
)
154 for entry
in traverse_obj(season_info
, (
155 'result', 'main_section', 'episodes',
156 lambda _
, v
: url_or_none(v
['share_url']) and v
['id'])):
157 yield self
.url_result(entry
['share_url'], BiliBiliBangumiIE
, f
'ep{entry["id"]}')
160 class BiliBiliIE(BilibiliBaseIE
):
161 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:video/|festival/\w+\?(?:[^#]*&)?bvid=)[aAbB][vV](?P<id>[^/?#&]+)'
164 'url': 'https://www.bilibili.com/video/BV13x41117TL',
166 'id': 'BV13x41117TL',
167 'title': '阿滴英文|英文歌分享#6 "Closer',
169 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
170 'uploader_id': '65880958',
172 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
175 'comment_count': int,
176 'upload_date': '20170301',
177 'timestamp': 1488353834,
183 'url': 'http://www.bilibili.com/video/av1074402/',
185 'thumbnail': r
're:^https?://.*\.(jpg|jpeg)$',
188 'uploader_id': '156160',
189 'id': 'BV11x411K7CN',
192 'upload_date': '20140420',
193 'timestamp': 1397983878,
194 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
196 'comment_count': int,
200 'params': {'skip_download': True}
,
203 'url': 'https://www.bilibili.com/video/BV1bK411W797',
205 'id': 'BV1bK411W797',
206 'title': '物语中的人物是如何吐槽自己的OP的'
208 'playlist_count': 18,
211 'id': 'BV1bK411W797_p1',
213 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
215 'timestamp': 1589601697,
216 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
217 'uploader': '打牌还是打桩',
218 'uploader_id': '150259984',
220 'comment_count': int,
221 'upload_date': '20200516',
223 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
228 'note': 'Specific page of Anthology',
229 'url': 'https://www.bilibili.com/video/BV1bK411W797?p=1',
231 'id': 'BV1bK411W797_p1',
233 'title': '物语中的人物是如何吐槽自己的OP的 p01 Staple Stable/战场原+羽川',
235 'timestamp': 1589601697,
236 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
237 'uploader': '打牌还是打桩',
238 'uploader_id': '150259984',
240 'comment_count': int,
241 'upload_date': '20200516',
243 'description': 'md5:e3c401cf7bc363118d1783dd74068a68',
247 'note': 'video has subtitles',
248 'url': 'https://www.bilibili.com/video/BV12N4y1M7rh',
250 'id': 'BV12N4y1M7rh',
252 'title': 'md5:96e8bb42c2b432c0d4ce3434a61479c1',
254 'description': 'md5:afde2b7ba9025c01d9e3dde10de221e4',
256 'upload_date': '20220709',
258 'timestamp': 1657347907,
259 'uploader_id': '1326814124',
260 'comment_count': int,
263 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
264 'subtitles': 'count:2'
266 'params': {'listsubtitles': True}
,
268 'url': 'https://www.bilibili.com/video/av8903802/',
270 'id': 'BV13x41117TL',
272 'title': '阿滴英文|英文歌分享#6 "Closer',
273 'upload_date': '20170301',
274 'description': 'md5:3b1b9e25b78da4ef87e9b548b88ee76a',
275 'timestamp': 1488353834,
276 'uploader_id': '65880958',
278 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
281 'comment_count': int,
286 'skip_download': True,
289 'note': 'video has chapter',
290 'url': 'https://www.bilibili.com/video/BV1vL411G7N7/',
292 'id': 'BV1vL411G7N7',
294 'title': '如何为你的B站视频添加进度条分段',
295 'timestamp': 1634554558,
296 'upload_date': '20211018',
297 'description': 'md5:a9a3d6702b3a94518d419b2e9c320a6d',
299 'uploader': '爱喝咖啡的当麻',
301 'uploader_id': '1680903',
302 'chapters': 'count:6',
303 'comment_count': int,
306 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
308 'params': {'skip_download': True}
,
310 'note': 'video redirects to festival page',
311 'url': 'https://www.bilibili.com/video/BV1wP4y1P72h',
313 'id': 'BV1wP4y1P72h',
315 'title': '牛虎年相交之际,一首传统民族打击乐《牛斗虎》祝大家新春快乐,虎年大吉!【bilibili音乐虎闹新春】',
316 'timestamp': 1643947497,
317 'upload_date': '20220204',
318 'description': 'md5:8681a0d4d2c06b4ae27e59c8080a7fe6',
319 'uploader': '叨叨冯聊音乐',
321 'uploader_id': '528182630',
324 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
326 'params': {'skip_download': True}
,
328 'note': 'newer festival video',
329 'url': 'https://www.bilibili.com/festival/2023honkaiimpact3gala?bvid=BV1ay4y1d77f',
331 'id': 'BV1ay4y1d77f',
333 'title': '【崩坏3新春剧场】为特别的你送上祝福!',
334 'timestamp': 1674273600,
335 'upload_date': '20230121',
336 'description': 'md5:58af66d15c6a0122dc30c8adfd828dd8',
338 'duration': 1111.722,
339 'uploader_id': '8469526',
342 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$',
344 'params': {'skip_download': True}
,
347 def _real_extract(self
, url
):
348 video_id
= self
._match
_id
(url
)
349 webpage
= self
._download
_webpage
(url
, video_id
)
350 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', video_id
)
352 is_festival
= 'videoData' not in initial_state
354 video_data
= initial_state
['videoInfo']
356 play_info
= self
._search
_json
(r
'window\.__playinfo__\s*=', webpage
, 'play info', video_id
)['data']
357 video_data
= initial_state
['videoData']
359 video_id
, title
= video_data
['bvid'], video_data
.get('title')
361 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
362 page_list_json
= not is_festival
and traverse_obj(
364 'https://api.bilibili.com/x/player/pagelist', video_id
,
365 fatal
=False, query
={'bvid': video_id, 'jsonp': 'jsonp'}
,
366 note
='Extracting videos in anthology'),
367 'data', expected_type
=list) or []
368 is_anthology
= len(page_list_json
) > 1
370 part_id
= int_or_none(parse_qs(url
).get('p', [None])[-1])
371 if is_anthology
and not part_id
and self
._yes
_playlist
(video_id
, video_id
):
372 return self
.playlist_from_matches(
373 page_list_json
, video_id
, title
, ie
=BiliBiliIE
,
374 getter
=lambda entry
: f
'https://www.bilibili.com/video/{video_id}?p={entry["page"]}')
377 part_id
= part_id
or 1
378 title
+= f
' p{part_id:02d} {traverse_obj(page_list_json, (part_id - 1, "part")) or ""}'
380 aid
= video_data
.get('aid')
381 old_video_id
= format_field(aid
, None, f
'%s_part{part_id or 1}')
383 cid
= traverse_obj(video_data
, ('pages', part_id
- 1, 'cid')) if part_id
else video_data
.get('cid')
387 play_info
= self
._download
_json
(
388 'https://api.bilibili.com/x/player/playurl', video_id
,
389 query
={'bvid': video_id, 'cid': cid, 'fnval': 4048}
,
390 note
='Extracting festival video formats')['data']
392 festival_info
= traverse_obj(initial_state
, {
393 'uploader': ('videoInfo', 'upName'),
394 'uploader_id': ('videoInfo', 'upMid', {str_or_none}
),
395 'like_count': ('videoStatus', 'like', {int_or_none}
),
396 'thumbnail': ('sectionEpisodes', lambda _
, v
: v
['bvid'] == video_id
, 'cover'),
400 **traverse_obj(initial_state
, {
401 'uploader': ('upData', 'name'),
402 'uploader_id': ('upData', 'mid', {str_or_none}
),
403 'like_count': ('videoData', 'stat', 'like', {int_or_none}
),
404 'tags': ('tags', ..., 'tag_name'),
405 'thumbnail': ('videoData', 'pic', {url_or_none}
),
408 **traverse_obj(video_data
, {
409 'description': 'desc',
410 'timestamp': ('pubdate', {int_or_none}
),
411 'view_count': (('viewCount', ('stat', 'view')), {int_or_none}
),
412 'comment_count': ('stat', 'reply', {int_or_none}
),
414 'id': f
'{video_id}{format_field(part_id, None, "_p%d")}',
415 'formats': self
.extract_formats(play_info
),
416 '_old_archive_ids': [make_archive_id(self
, old_video_id
)] if old_video_id
else None,
418 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
419 'chapters': self
._get
_chapters
(aid
, cid
),
420 'subtitles': self
.extract_subtitles(video_id
, aid
, cid
),
421 '__post_extractor': self
.extract_comments(aid
),
422 'http_headers': {'Referer': url}
,
426 class BiliBiliBangumiIE(BilibiliBaseIE
):
427 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/play/(?P<id>ep\d+)'
430 'url': 'https://www.bilibili.com/bangumi/play/ep267851',
437 'season_id': '26801',
440 'episode_id': '267851',
443 'duration': 1425.256,
444 'timestamp': 1554566400,
445 'upload_date': '20190406',
446 'thumbnail': r
're:^https?://.*\.(jpg|jpeg|png)$'
448 'skip': 'According to the copyright owner\'s request, you may only watch the video after you are premium member.'
451 def _real_extract(self
, url
):
452 video_id
= self
._match
_id
(url
)
453 episode_id
= video_id
[2:]
454 webpage
= self
._download
_webpage
(url
, video_id
)
456 if '您所在的地区无法观看本片' in webpage
:
457 raise GeoRestrictedError('This video is restricted')
458 elif '正在观看预览,大会员免费看全片' in webpage
:
459 self
.raise_login_required('This video is for premium members only')
461 headers
= {'Referer': url, **self.geo_verification_headers()}
462 play_info
= self
._download
_json
(
463 'https://api.bilibili.com/pgc/player/web/v2/playurl', video_id
,
464 'Extracting episode', query
={'fnval': '4048', 'ep_id': episode_id}
,
466 premium_only
= play_info
.get('code') == -10403
467 play_info
= traverse_obj(play_info
, ('result', 'video_info', {dict}
)) or {}
469 formats
= self
.extract_formats(play_info
)
470 if not formats
and (premium_only
or '成为大会员抢先看' in webpage
or '开通大会员观看' in webpage
):
471 self
.raise_login_required('This video is for premium members only')
473 bangumi_info
= self
._download
_json
(
474 'https://api.bilibili.com/pgc/view/web/season', video_id
, 'Get episode details',
475 query
={'ep_id': episode_id}
, headers
=headers
)['result']
477 episode_number
, episode_info
= next((
478 (idx
, ep
) for idx
, ep
in enumerate(traverse_obj(
479 bangumi_info
, ('episodes', ..., {dict}
)), 1)
480 if str_or_none(ep
.get('id')) == episode_id
), (1, {}))
482 season_id
= bangumi_info
.get('season_id')
483 season_number
= season_id
and next((
484 idx
+ 1 for idx
, e
in enumerate(
485 traverse_obj(bangumi_info
, ('seasons', ...)))
486 if e
.get('season_id') == season_id
489 aid
= episode_info
.get('aid')
494 **traverse_obj(bangumi_info
, {
495 'series': ('series', 'series_title', {str}
),
496 'series_id': ('series', 'series_id', {str_or_none}
),
497 'thumbnail': ('square_cover', {url_or_none}
),
499 'title': join_nonempty('title', 'long_title', delim
=' ', from_dict
=episode_info
),
500 'episode': episode_info
.get('long_title'),
501 'episode_id': episode_id
,
502 'episode_number': int_or_none(episode_info
.get('title')) or episode_number
,
503 'season_id': str_or_none(season_id
),
504 'season_number': season_number
,
505 'timestamp': int_or_none(episode_info
.get('pub_time')),
506 'duration': float_or_none(play_info
.get('timelength'), scale
=1000),
507 'subtitles': self
.extract_subtitles(video_id
, aid
, episode_info
.get('cid')),
508 '__post_extractor': self
.extract_comments(aid
),
509 'http_headers': headers
,
513 class BiliBiliBangumiMediaIE(BilibiliBaseIE
):
514 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/bangumi/media/md(?P<id>\d+)'
516 'url': 'https://www.bilibili.com/bangumi/media/md24097891',
520 'playlist_mincount': 25,
523 def _real_extract(self
, url
):
524 media_id
= self
._match
_id
(url
)
525 webpage
= self
._download
_webpage
(url
, media_id
)
526 ss_id
= self
._search
_json
(
527 r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial_state', media_id
)['mediaInfo']['season_id']
529 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), media_id
)
532 class BiliBiliBangumiSeasonIE(BilibiliBaseIE
):
533 _VALID_URL
= r
'(?x)https?://(?:www\.)?bilibili\.com/bangumi/play/ss(?P<id>\d+)'
535 'url': 'https://www.bilibili.com/bangumi/play/ss26801',
539 'playlist_mincount': 26
542 def _real_extract(self
, url
):
543 ss_id
= self
._match
_id
(url
)
545 return self
.playlist_result(self
._get
_episodes
_from
_season
(ss_id
, url
), ss_id
)
548 class BilibiliSpaceBaseIE(InfoExtractor
):
549 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
550 first_page
= fetch_page(0)
551 metadata
= get_metadata(first_page
)
553 paged_list
= InAdvancePagedList(
554 lambda idx
: get_entries(fetch_page(idx
) if idx
else first_page
),
555 metadata
['page_count'], metadata
['page_size'])
557 return metadata
, paged_list
560 class BilibiliSpaceVideoIE(BilibiliSpaceBaseIE
):
561 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)(?P<video>/video)?/?(?:[?#]|$)'
563 'url': 'https://space.bilibili.com/3985676/video',
567 'playlist_mincount': 178,
569 'url': 'https://space.bilibili.com/313580179/video',
573 'playlist_mincount': 92,
576 def _extract_signature(self
, playlist_id
):
577 session_data
= self
._download
_json
('https://api.bilibili.com/x/web-interface/nav', playlist_id
, fatal
=False)
579 key_from_url
= lambda x
: x
[x
.rfind('/') + 1:].split('.')[0]
580 img_key
= traverse_obj(
581 session_data
, ('data', 'wbi_img', 'img_url', {key_from_url}
)) or '34478ba821254d9d93542680e3b86100'
582 sub_key
= traverse_obj(
583 session_data
, ('data', 'wbi_img', 'sub_url', {key_from_url}
)) or '7e16a90d190a4355a78fd00b32a38de6'
585 session_key
= img_key
+ sub_key
587 signature_values
= []
589 46, 47, 18, 2, 53, 8, 23, 32, 15, 50, 10, 31, 58, 3, 45, 35, 27, 43, 5, 49, 33, 9, 42, 19, 29, 28, 14, 39,
590 12, 38, 41, 13, 37, 48, 7, 16, 24, 55, 40, 61, 26, 17, 0, 1, 60, 51, 30, 4, 22, 25, 54, 21, 56, 59, 6, 63,
591 57, 62, 11, 36, 20, 34, 44, 52
593 char_at_position
= try_call(lambda: session_key
[position
])
595 signature_values
.append(char_at_position
)
597 return ''.join(signature_values
)[:32]
599 def _real_extract(self
, url
):
600 playlist_id
, is_video_url
= self
._match
_valid
_url
(url
).group('id', 'video')
602 self
.to_screen('A channel URL was given. Only the channel\'s videos will be downloaded. '
603 'To download audios, add a "/audio" to the URL')
605 signature
= self
._extract
_signature
(playlist_id
)
607 def fetch_page(page_idx
):
612 'order_avoided': 'true',
617 'web_location': 1550101,
618 'wts': int(time
.time()),
620 query
['w_rid'] = hashlib
.md5(f
'{urllib.parse.urlencode(query)}{signature}'.encode()).hexdigest()
623 response
= self
._download
_json
('https://api.bilibili.com/x/space/wbi/arc/search',
624 playlist_id
, note
=f
'Downloading page {page_idx}', query
=query
)
625 except ExtractorError
as e
:
626 if isinstance(e
.cause
, HTTPError
) and e
.cause
.status
== 412:
627 raise ExtractorError(
628 'Request is blocked by server (412), please add cookies, wait and try later.', expected
=True)
630 if response
['code'] == -401:
631 raise ExtractorError(
632 'Request is blocked by server (401), please add cookies, wait and try later.', expected
=True)
633 return response
['data']
635 def get_metadata(page_data
):
636 page_size
= page_data
['page']['ps']
637 entry_count
= page_data
['page']['count']
639 'page_count': math
.ceil(entry_count
/ page_size
),
640 'page_size': page_size
,
643 def get_entries(page_data
):
644 for entry
in traverse_obj(page_data
, ('list', 'vlist')) or []:
645 yield self
.url_result(f
'https://www.bilibili.com/video/{entry["bvid"]}', BiliBiliIE
, entry
['bvid'])
647 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
648 return self
.playlist_result(paged_list
, playlist_id
)
651 class BilibiliSpaceAudioIE(BilibiliSpaceBaseIE
):
652 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<id>\d+)/audio'
654 'url': 'https://space.bilibili.com/313580179/audio',
658 'playlist_mincount': 1,
661 def _real_extract(self
, url
):
662 playlist_id
= self
._match
_id
(url
)
664 def fetch_page(page_idx
):
665 return self
._download
_json
(
666 'https://api.bilibili.com/audio/music-service/web/song/upper', playlist_id
,
667 note
=f
'Downloading page {page_idx}',
668 query
={'uid': playlist_id, 'pn': page_idx + 1, 'ps': 30, 'order': 1, 'jsonp': 'jsonp'}
)['data']
670 def get_metadata(page_data
):
672 'page_count': page_data
['pageCount'],
673 'page_size': page_data
['pageSize'],
676 def get_entries(page_data
):
677 for entry
in page_data
.get('data', []):
678 yield self
.url_result(f
'https://www.bilibili.com/audio/au{entry["id"]}', BilibiliAudioIE
, entry
['id'])
680 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
681 return self
.playlist_result(paged_list
, playlist_id
)
684 class BilibiliSpaceListBaseIE(BilibiliSpaceBaseIE
):
685 def _get_entries(self
, page_data
, bvid_keys
, ending_key
='bvid'):
686 for bvid
in traverse_obj(page_data
, (*variadic(bvid_keys
, (str, bytes, dict, set)), ..., ending_key
, {str}
)):
687 yield self
.url_result(f
'https://www.bilibili.com/video/{bvid}', BiliBiliIE
, bvid
)
689 def _get_uploader(self
, uid
, playlist_id
):
690 webpage
= self
._download
_webpage
(f
'https://space.bilibili.com/{uid}', playlist_id
, fatal
=False)
691 return self
._search
_regex
(r
'(?s)<title\b[^>]*>([^<]+)的个人空间-', webpage
, 'uploader', fatal
=False)
693 def _extract_playlist(self
, fetch_page
, get_metadata
, get_entries
):
694 metadata
, page_list
= super()._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
695 metadata
.pop('page_count', None)
696 metadata
.pop('page_size', None)
697 return metadata
, page_list
700 class BilibiliCollectionListIE(BilibiliSpaceListBaseIE
):
701 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/collectiondetail/?\?sid=(?P<sid>\d+)'
703 'url': 'https://space.bilibili.com/2142762/channel/collectiondetail?sid=57445',
705 'id': '2142762_57445',
706 'title': '【完结】《底特律 变人》全结局流程解说',
709 'uploader_id': '2142762',
712 'thumbnail': 'https://archive.biliimg.com/bfs/archive/e0e543ae35ad3df863ea7dea526bc32e70f4c091.jpg',
714 'playlist_mincount': 31,
717 def _real_extract(self
, url
):
718 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
719 playlist_id
= f
'{mid}_{sid}'
721 def fetch_page(page_idx
):
722 return self
._download
_json
(
723 'https://api.bilibili.com/x/polymer/space/seasons_archives_list',
724 playlist_id
, note
=f
'Downloading page {page_idx}',
725 query
={'mid': mid, 'season_id': sid, 'page_num': page_idx + 1, 'page_size': 30}
)['data']
727 def get_metadata(page_data
):
728 page_size
= page_data
['page']['page_size']
729 entry_count
= page_data
['page']['total']
731 'page_count': math
.ceil(entry_count
/ page_size
),
732 'page_size': page_size
,
733 'uploader': self
._get
_uploader
(mid
, playlist_id
),
734 **traverse_obj(page_data
, {
735 'title': ('meta', 'name', {str}
),
736 'description': ('meta', 'description', {str}
),
737 'uploader_id': ('meta', 'mid', {str_or_none}
),
738 'timestamp': ('meta', 'ptime', {int_or_none}
),
739 'thumbnail': ('meta', 'cover', {url_or_none}
),
743 def get_entries(page_data
):
744 return self
._get
_entries
(page_data
, 'archives')
746 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
747 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
750 class BilibiliSeriesListIE(BilibiliSpaceListBaseIE
):
751 _VALID_URL
= r
'https?://space\.bilibili\.com/(?P<mid>\d+)/channel/seriesdetail/?\?\bsid=(?P<sid>\d+)'
753 'url': 'https://space.bilibili.com/1958703906/channel/seriesdetail?sid=547718&ctype=0',
755 'id': '1958703906_547718',
757 'description': '直播回放',
758 'uploader': '靡烟miya',
759 'uploader_id': '1958703906',
760 'timestamp': 1637985853,
761 'upload_date': '20211127',
762 'modified_timestamp': int,
763 'modified_date': str,
765 'playlist_mincount': 513,
768 def _real_extract(self
, url
):
769 mid
, sid
= self
._match
_valid
_url
(url
).group('mid', 'sid')
770 playlist_id
= f
'{mid}_{sid}'
771 playlist_meta
= traverse_obj(self
._download
_json
(
772 f
'https://api.bilibili.com/x/series/series?series_id={sid}', playlist_id
, fatal
=False
774 'title': ('data', 'meta', 'name', {str}
),
775 'description': ('data', 'meta', 'description', {str}
),
776 'uploader_id': ('data', 'meta', 'mid', {str_or_none}
),
777 'timestamp': ('data', 'meta', 'ctime', {int_or_none}
),
778 'modified_timestamp': ('data', 'meta', 'mtime', {int_or_none}
),
781 def fetch_page(page_idx
):
782 return self
._download
_json
(
783 'https://api.bilibili.com/x/series/archives',
784 playlist_id
, note
=f
'Downloading page {page_idx}',
785 query
={'mid': mid, 'series_id': sid, 'pn': page_idx + 1, 'ps': 30}
)['data']
787 def get_metadata(page_data
):
788 page_size
= page_data
['page']['size']
789 entry_count
= page_data
['page']['total']
791 'page_count': math
.ceil(entry_count
/ page_size
),
792 'page_size': page_size
,
793 'uploader': self
._get
_uploader
(mid
, playlist_id
),
797 def get_entries(page_data
):
798 return self
._get
_entries
(page_data
, 'archives')
800 metadata
, paged_list
= self
._extract
_playlist
(fetch_page
, get_metadata
, get_entries
)
801 return self
.playlist_result(paged_list
, playlist_id
, **metadata
)
804 class BilibiliFavoritesListIE(BilibiliSpaceListBaseIE
):
805 _VALID_URL
= r
'https?://(?:space\.bilibili\.com/\d+/favlist/?\?fid=|(?:www\.)?bilibili\.com/medialist/detail/ml)(?P<id>\d+)'
807 'url': 'https://space.bilibili.com/84912/favlist?fid=1103407912&ftype=create',
813 'uploader_id': '84912',
814 'timestamp': 1604905176,
815 'upload_date': '20201109',
816 'modified_timestamp': int,
817 'modified_date': str,
818 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
822 'playlist_mincount': 22,
824 'url': 'https://www.bilibili.com/medialist/detail/ml1103407912',
825 'only_matching': True,
828 def _real_extract(self
, url
):
829 fid
= self
._match
_id
(url
)
831 list_info
= self
._download
_json
(
832 f
'https://api.bilibili.com/x/v3/fav/resource/list?media_id={fid}&pn=1&ps=20',
833 fid
, note
='Downloading favlist metadata')
834 if list_info
['code'] == -403:
835 self
.raise_login_required(msg
='This is a private favorites list. You need to log in as its owner')
837 entries
= self
._get
_entries
(self
._download
_json
(
838 f
'https://api.bilibili.com/x/v3/fav/resource/ids?media_id={fid}',
839 fid
, note
='Download favlist entries'), 'data')
841 return self
.playlist_result(entries
, fid
, **traverse_obj(list_info
, ('data', 'info', {
842 'title': ('title', {str}
),
843 'description': ('intro', {str}
),
844 'uploader': ('upper', 'name', {str}
),
845 'uploader_id': ('upper', 'mid', {str_or_none}
),
846 'timestamp': ('ctime', {int_or_none}
),
847 'modified_timestamp': ('mtime', {int_or_none}
),
848 'thumbnail': ('cover', {url_or_none}
),
849 'view_count': ('cnt_info', 'play', {int_or_none}
),
850 'like_count': ('cnt_info', 'thumb_up', {int_or_none}
),
854 class BilibiliWatchlaterIE(BilibiliSpaceListBaseIE
):
855 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/watchlater/?(?:[?#]|$)'
857 'url': 'https://www.bilibili.com/watchlater/#/list',
858 'info_dict': {'id': 'watchlater'}
,
859 'playlist_mincount': 0,
860 'skip': 'login required',
863 def _real_extract(self
, url
):
864 list_id
= getattr(self
._get
_cookies
(url
).get('DedeUserID'), 'value', 'watchlater')
865 watchlater_info
= self
._download
_json
(
866 'https://api.bilibili.com/x/v2/history/toview/web?jsonp=jsonp', list_id
)
867 if watchlater_info
['code'] == -101:
868 self
.raise_login_required(msg
='You need to login to access your watchlater list')
869 entries
= self
._get
_entries
(watchlater_info
, ('data', 'list'))
870 return self
.playlist_result(entries
, id=list_id
, title
='稍后再看')
873 class BilibiliPlaylistIE(BilibiliSpaceListBaseIE
):
874 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/(?:medialist/play|list)/(?P<id>\w+)'
876 'url': 'https://www.bilibili.com/list/1958703906?sid=547718',
880 'uploader': '靡烟miya',
881 'uploader_id': '1958703906',
882 'timestamp': 1637985853,
883 'upload_date': '20211127',
885 'playlist_mincount': 513,
887 'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
891 'playlist_mincount': 513,
892 'skip': 'redirect url',
894 'url': 'https://www.bilibili.com/list/ml1103407912',
896 'id': '3_1103407912',
899 'uploader_id': '84912',
900 'timestamp': 1604905176,
901 'upload_date': '20201109',
902 'thumbnail': r
"re:http://i\d\.hdslb\.com/bfs/archive/14b83c62aa8871b79083df1e9ab4fbc699ad16fe\.jpg",
904 'playlist_mincount': 22,
906 'url': 'https://www.bilibili.com/medialist/play/ml1103407912',
908 'id': '3_1103407912',
910 'playlist_mincount': 22,
911 'skip': 'redirect url',
913 'url': 'https://www.bilibili.com/list/watchlater',
914 'info_dict': {'id': 'watchlater'}
,
915 'playlist_mincount': 0,
916 'skip': 'login required',
918 'url': 'https://www.bilibili.com/medialist/play/watchlater',
919 'info_dict': {'id': 'watchlater'}
,
920 'playlist_mincount': 0,
921 'skip': 'login required',
924 def _extract_medialist(self
, query
, list_id
):
925 for page_num
in itertools
.count(1):
926 page_data
= self
._download
_json
(
927 'https://api.bilibili.com/x/v2/medialist/resource/list',
928 list_id
, query
=query
, note
=f
'getting playlist {query["biz_id"]} page {page_num}'
930 yield from self
._get
_entries
(page_data
, 'media_list', ending_key
='bv_id')
931 query
['oid'] = traverse_obj(page_data
, ('media_list', -1, 'id'))
932 if not page_data
.get('has_more', False):
935 def _real_extract(self
, url
):
936 list_id
= self
._match
_id
(url
)
937 webpage
= self
._download
_webpage
(url
, list_id
)
938 initial_state
= self
._search
_json
(r
'window\.__INITIAL_STATE__\s*=', webpage
, 'initial state', list_id
)
939 if traverse_obj(initial_state
, ('error', 'code', {int_or_none}
)) != 200:
940 error_code
= traverse_obj(initial_state
, ('error', 'trueCode', {int_or_none}
))
941 error_message
= traverse_obj(initial_state
, ('error', 'message', {str_or_none}
))
942 if error_code
== -400 and list_id
== 'watchlater':
943 self
.raise_login_required('You need to login to access your watchlater playlist')
944 elif error_code
== -403:
945 self
.raise_login_required('This is a private playlist. You need to login as its owner')
946 elif error_code
== 11010:
947 raise ExtractorError('Playlist is no longer available', expected
=True)
948 raise ExtractorError(f
'Could not access playlist: {error_code} {error_message}')
952 'with_current': False,
953 **traverse_obj(initial_state
, {
954 'type': ('playlist', 'type', {int_or_none}
),
955 'biz_id': ('playlist', 'id', {int_or_none}
),
956 'tid': ('tid', {int_or_none}
),
957 'sort_field': ('sortFiled', {int_or_none}
),
958 'desc': ('desc', {bool_or_none}
, {str_or_none}
, {str.lower}
),
962 'id': f
'{query["type"]}_{query["biz_id"]}',
963 **traverse_obj(initial_state
, ('mediaListInfo', {
964 'title': ('title', {str}
),
965 'uploader': ('upper', 'name', {str}
),
966 'uploader_id': ('upper', 'mid', {str_or_none}
),
967 'timestamp': ('ctime', {int_or_none}
),
968 'thumbnail': ('cover', {url_or_none}
),
971 return self
.playlist_result(self
._extract
_medialist
(query
, list_id
), **metadata
)
974 class BilibiliCategoryIE(InfoExtractor
):
975 IE_NAME
= 'Bilibili category extractor'
976 _MAX_RESULTS
= 1000000
977 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/v/[a-zA-Z]+\/[a-zA-Z]+'
979 'url': 'https://www.bilibili.com/v/kichiku/mad',
981 'id': 'kichiku: mad',
982 'title': 'kichiku: mad'
984 'playlist_mincount': 45,
990 def _fetch_page(self
, api_url
, num_pages
, query
, page_num
):
991 parsed_json
= self
._download
_json
(
992 api_url
, query
, query
={'Search_key': query, 'pn': page_num}
,
993 note
='Extracting results from page %s of %s' % (page_num
, num_pages
))
995 video_list
= traverse_obj(parsed_json
, ('data', 'archives'), expected_type
=list)
997 raise ExtractorError('Failed to retrieve video list for page %d' % page_num
)
999 for video
in video_list
:
1000 yield self
.url_result(
1001 'https://www.bilibili.com/video/%s' % video
['bvid'], 'BiliBili', video
['bvid'])
1003 def _entries(self
, category
, subcategory
, query
):
1004 # map of categories : subcategories : RIDs
1008 'manual_vocaloid': 126,
1015 if category
not in rid_map
:
1016 raise ExtractorError(
1017 f
'The category {category} isn\'t supported. Supported categories: {list(rid_map.keys())}')
1018 if subcategory
not in rid_map
[category
]:
1019 raise ExtractorError(
1020 f
'The subcategory {subcategory} isn\'t supported for this category. Supported subcategories: {list(rid_map[category].keys())}')
1021 rid_value
= rid_map
[category
][subcategory
]
1023 api_url
= 'https://api.bilibili.com/x/web-interface/newlist?rid=%d&type=1&ps=20&jsonp=jsonp' % rid_value
1024 page_json
= self
._download
_json
(api_url
, query
, query
={'Search_key': query, 'pn': '1'}
)
1025 page_data
= traverse_obj(page_json
, ('data', 'page'), expected_type
=dict)
1026 count
, size
= int_or_none(page_data
.get('count')), int_or_none(page_data
.get('size'))
1027 if count
is None or not size
:
1028 raise ExtractorError('Failed to calculate either page count or size')
1030 num_pages
= math
.ceil(count
/ size
)
1032 return OnDemandPagedList(functools
.partial(
1033 self
._fetch
_page
, api_url
, num_pages
, query
), size
)
1035 def _real_extract(self
, url
):
1036 category
, subcategory
= urllib
.parse
.urlparse(url
).path
.split('/')[2:4]
1037 query
= '%s: %s' % (category
, subcategory
)
1039 return self
.playlist_result(self
._entries
(category
, subcategory
, query
), query
, query
)
1042 class BiliBiliSearchIE(SearchInfoExtractor
):
1043 IE_DESC
= 'Bilibili video search'
1044 _MAX_RESULTS
= 100000
1045 _SEARCH_KEY
= 'bilisearch'
1047 def _search_results(self
, query
):
1048 for page_num
in itertools
.count(1):
1049 videos
= self
._download
_json
(
1050 'https://api.bilibili.com/x/web-interface/search/type', query
,
1051 note
=f
'Extracting results from page {page_num}', query
={
1052 'Search_key': query
,
1058 '__refresh__': 'true',
1059 'search_type': 'video',
1062 })['data'].get('result')
1065 for video
in videos
:
1066 yield self
.url_result(video
['arcurl'], 'BiliBili', str(video
['aid']))
1069 class BilibiliAudioBaseIE(InfoExtractor
):
1070 def _call_api(self
, path
, sid
, query
=None):
1072 query
= {'sid': sid}
1073 return self
._download
_json
(
1074 'https://www.bilibili.com/audio/music-service-c/web/' + path
,
1075 sid
, query
=query
)['data']
1078 class BilibiliAudioIE(BilibiliAudioBaseIE
):
1079 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/au(?P<id>\d+)'
1081 'url': 'https://www.bilibili.com/audio/au1003142',
1082 'md5': 'fec4987014ec94ef9e666d4d158ad03b',
1086 'title': '【tsukimi】YELLOW / 神山羊',
1087 'artist': 'tsukimi',
1088 'comment_count': int,
1089 'description': 'YELLOW的mp3版!',
1096 'thumbnail': r
're:^https?://.+\.jpg',
1097 'timestamp': 1564836614,
1098 'upload_date': '20190803',
1099 'uploader': 'tsukimi-つきみぐー',
1104 def _real_extract(self
, url
):
1105 au_id
= self
._match
_id
(url
)
1107 play_data
= self
._call
_api
('url', au_id
)
1109 'url': play_data
['cdns'][0],
1110 'filesize': int_or_none(play_data
.get('size')),
1114 for a_format
in formats
:
1115 a_format
.setdefault('http_headers', {}).update({
1119 song
= self
._call
_api
('song/info', au_id
)
1120 title
= song
['title']
1121 statistic
= song
.get('statistic') or {}
1124 lyric
= song
.get('lyric')
1136 'artist': song
.get('author'),
1137 'comment_count': int_or_none(statistic
.get('comment')),
1138 'description': song
.get('intro'),
1139 'duration': int_or_none(song
.get('duration')),
1140 'subtitles': subtitles
,
1141 'thumbnail': song
.get('cover'),
1142 'timestamp': int_or_none(song
.get('passtime')),
1143 'uploader': song
.get('uname'),
1144 'view_count': int_or_none(statistic
.get('play')),
1148 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE
):
1149 _VALID_URL
= r
'https?://(?:www\.)?bilibili\.com/audio/am(?P<id>\d+)'
1151 'url': 'https://www.bilibili.com/audio/am10624',
1154 'title': '每日新曲推荐(每日11:00更新)',
1155 'description': '每天11:00更新,为你推送最新音乐',
1157 'playlist_count': 19,
1160 def _real_extract(self
, url
):
1161 am_id
= self
._match
_id
(url
)
1163 songs
= self
._call
_api
(
1164 'song/of-menu', am_id
, {'sid': am_id, 'pn': 1, 'ps': 100}
)['data']
1168 sid
= str_or_none(song
.get('id'))
1171 entries
.append(self
.url_result(
1172 'https://www.bilibili.com/audio/au' + sid
,
1173 BilibiliAudioIE
.ie_key(), sid
))
1176 album_data
= self
._call
_api
('menu/info', am_id
) or {}
1177 album_title
= album_data
.get('title')
1179 for entry
in entries
:
1180 entry
['album'] = album_title
1181 return self
.playlist_result(
1182 entries
, am_id
, album_title
, album_data
.get('intro'))
1184 return self
.playlist_result(entries
, am_id
)
1187 class BiliBiliPlayerIE(InfoExtractor
):
1188 _VALID_URL
= r
'https?://player\.bilibili\.com/player\.html\?.*?\baid=(?P<id>\d+)'
1190 'url': 'http://player.bilibili.com/player.html?aid=92494333&cid=157926707&page=1',
1191 'only_matching': True,
1194 def _real_extract(self
, url
):
1195 video_id
= self
._match
_id
(url
)
1196 return self
.url_result(
1197 'http://www.bilibili.tv/video/av%s/' % video_id
,
1198 ie
=BiliBiliIE
.ie_key(), video_id
=video_id
)
1201 class BiliIntlBaseIE(InfoExtractor
):
1202 _API_URL
= 'https://api.bilibili.tv/intl/gateway'
1203 _NETRC_MACHINE
= 'biliintl'
1205 def _call_api(self
, endpoint
, *args
, **kwargs
):
1206 json
= self
._download
_json
(self
._API
_URL
+ endpoint
, *args
, **kwargs
)
1207 if json
.get('code'):
1208 if json
['code'] in (10004004, 10004005, 10023006):
1209 self
.raise_login_required()
1210 elif json
['code'] == 10004001:
1211 self
.raise_geo_restricted()
1213 if json
.get('message') and str(json
['code']) != json
['message']:
1214 errmsg
= f
'{kwargs.get("errnote", "Unable to download JSON metadata")}: {self.IE_NAME} said: {json["message"]}'
1216 errmsg
= kwargs
.get('errnote', 'Unable to download JSON metadata')
1217 if kwargs
.get('fatal'):
1218 raise ExtractorError(errmsg
)
1220 self
.report_warning(errmsg
)
1221 return json
.get('data')
1223 def json2srt(self
, json
):
1225 f
'{i + 1}\n{srt_subtitles_timecode(line["from"])} --> {srt_subtitles_timecode(line["to"])}\n{line["content"]}'
1226 for i
, line
in enumerate(traverse_obj(json
, (
1227 'body', lambda _
, l
: l
['content'] and l
['from'] and l
['to']))))
1230 def _get_subtitles(self
, *, ep_id
=None, aid
=None):
1231 sub_json
= self
._call
_api
(
1232 '/web/v2/subtitle', ep_id
or aid
, fatal
=False,
1233 note
='Downloading subtitles list', errnote
='Unable to download subtitles list',
1236 's_locale': 'en_US',
1237 'episode_id': ep_id
,
1241 for sub
in sub_json
.get('subtitles') or []:
1242 sub_url
= sub
.get('url')
1245 sub_data
= self
._download
_json
(
1246 sub_url
, ep_id
or aid
, errnote
='Unable to download subtitles', fatal
=False,
1247 note
='Downloading subtitles%s' % f
' for {sub["lang"]}' if sub
.get('lang') else '')
1250 subtitles
.setdefault(sub
.get('lang_key', 'en'), []).append({
1252 'data': self
.json2srt(sub_data
)
1256 def _get_formats(self
, *, ep_id
=None, aid
=None):
1257 video_json
= self
._call
_api
(
1258 '/web/playurl', ep_id
or aid
, note
='Downloading video formats',
1259 errnote
='Unable to download video formats', query
=filter_dict({
1264 video_json
= video_json
['playurl']
1266 for vid
in video_json
.get('video') or []:
1267 video_res
= vid
.get('video_resource') or {}
1268 video_info
= vid
.get('stream_info') or {}
1269 if not video_res
.get('url'):
1272 'url': video_res
['url'],
1274 'format_note': video_info
.get('desc_words'),
1275 'width': video_res
.get('width'),
1276 'height': video_res
.get('height'),
1277 'vbr': video_res
.get('bandwidth'),
1279 'vcodec': video_res
.get('codecs'),
1280 'filesize': video_res
.get('size'),
1282 for aud
in video_json
.get('audio_resource') or []:
1283 if not aud
.get('url'):
1288 'abr': aud
.get('bandwidth'),
1289 'acodec': aud
.get('codecs'),
1291 'filesize': aud
.get('size'),
1296 def _parse_video_metadata(self
, video_data
):
1298 'title': video_data
.get('title_display') or video_data
.get('title'),
1299 'thumbnail': video_data
.get('cover'),
1300 'episode_number': int_or_none(self
._search
_regex
(
1301 r
'^E(\d+)(?:$| - )', video_data
.get('title_display') or '', 'episode number', default
=None)),
1304 def _perform_login(self
, username
, password
):
1305 if not Cryptodome
.RSA
:
1306 raise ExtractorError('pycryptodomex not found. Please install', expected
=True)
1308 key_data
= self
._download
_json
(
1309 'https://passport.bilibili.tv/x/intl/passport-login/web/key?lang=en-US', None,
1310 note
='Downloading login key', errnote
='Unable to download login key')['data']
1312 public_key
= Cryptodome
.RSA
.importKey(key_data
['key'])
1313 password_hash
= Cryptodome
.PKCS1_v1_5
.new(public_key
).encrypt((key_data
['hash'] + password
).encode('utf-8'))
1314 login_post
= self
._download
_json
(
1315 'https://passport.bilibili.tv/x/intl/passport-login/web/login/password?lang=en-US', None, data
=urlencode_postdata({
1316 'username': username
,
1317 'password': base64
.b64encode(password_hash
).decode('ascii'),
1319 's_locale': 'en_US',
1321 }), note
='Logging in', errnote
='Unable to log in')
1322 if login_post
.get('code'):
1323 if login_post
.get('message'):
1324 raise ExtractorError(f
'Unable to log in: {self.IE_NAME} said: {login_post["message"]}', expected
=True)
1326 raise ExtractorError('Unable to log in')
1329 class BiliIntlIE(BiliIntlBaseIE
):
1330 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(play/(?P<season_id>\d+)/(?P<ep_id>\d+)|video/(?P<aid>\d+))'
1333 'url': 'https://www.bilibili.tv/en/play/34613/341736',
1337 'title': 'E2 - The First Night',
1338 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1339 'episode_number': 2,
1340 'upload_date': '20201009',
1341 'episode': 'Episode 2',
1342 'timestamp': 1602259500,
1343 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1347 'title': '<Untitled Chapter 1>'
1349 'start_time': 76.242,
1350 'end_time': 161.161,
1353 'start_time': 1325.742,
1354 'end_time': 1403.903,
1360 'url': 'https://www.bilibili.tv/en/play/1033760/11005006',
1364 'title': 'E3 - Who?',
1365 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1366 'episode_number': 3,
1367 'description': 'md5:e1a775e71a35c43f141484715470ad09',
1368 'episode': 'Episode 3',
1369 'upload_date': '20211219',
1370 'timestamp': 1639928700,
1374 'title': '<Untitled Chapter 1>'
1380 'start_time': 1173.0,
1381 'end_time': 1259.535,
1386 # Subtitle with empty content
1387 'url': 'https://www.bilibili.tv/en/play/1005144/10131790',
1391 'title': 'E140 - Two Heartbeats: Kabuto\'s Trap',
1392 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1393 'episode_number': 140,
1395 'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
1397 'url': 'https://www.bilibili.tv/en/video/2041863208',
1401 'timestamp': 1670874843,
1402 'description': 'Scheduled for April 2023.\nStudio: ufotable',
1403 'thumbnail': r
're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
1404 'upload_date': '20221212',
1405 'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
1408 # episode comment extraction
1409 'url': 'https://www.bilibili.tv/en/play/34580/340317',
1413 'timestamp': 1604057820,
1414 'upload_date': '20201030',
1415 'episode_number': 5,
1416 'title': 'E5 - My Own Steel',
1417 'description': 'md5:2b17ab10aebb33e3c2a54da9e8e487e2',
1418 'thumbnail': r
're:https?://pic\.bstarstatic\.com/ogv/.+\.png$',
1419 'episode': 'Episode 5',
1420 'comment_count': int,
1424 'title': '<Untitled Chapter 1>'
1430 'start_time': 1290.0,
1439 # user generated content comment extraction
1440 'url': 'https://www.bilibili.tv/en/video/2045730385',
1444 'description': 'md5:693b6f3967fb4e7e7764ea817857c33a',
1445 'timestamp': 1667891924,
1446 'upload_date': '20221108',
1447 'title': 'That Time I Got Reincarnated as a Slime: Scarlet Bond - Official Trailer 3| AnimeStan - Bstation',
1448 'comment_count': int,
1449 'thumbnail': 'https://pic.bstarstatic.com/ugc/f6c363659efd2eabe5683fbb906b1582.jpg',
1455 # episode id without intro and outro
1456 'url': 'https://www.bilibili.tv/en/play/1048837/11246489',
1460 'title': 'E1 - Operation \'Strix\' <Owl>',
1461 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1462 'timestamp': 1649516400,
1463 'thumbnail': 'https://pic.bstarstatic.com/ogv/62cb1de23ada17fb70fbe7bdd6ff29c29da02a64.png',
1464 'episode': 'Episode 1',
1465 'episode_number': 1,
1466 'upload_date': '20220409',
1469 'url': 'https://www.biliintl.com/en/play/34613/341736',
1470 'only_matching': True,
1472 # User-generated content (as opposed to a series licensed from a studio)
1473 'url': 'https://bilibili.tv/en/video/2019955076',
1474 'only_matching': True,
1476 # No language in URL
1477 'url': 'https://www.bilibili.tv/video/2019955076',
1478 'only_matching': True,
1480 # Uppercase language in URL
1481 'url': 'https://www.bilibili.tv/EN/video/2019955076',
1482 'only_matching': True,
1485 def _make_url(video_id
, series_id
=None):
1487 return f
'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
1488 return f
'https://www.bilibili.tv/en/video/{video_id}'
1490 def _extract_video_metadata(self
, url
, video_id
, season_id
):
1491 url
, smuggled_data
= unsmuggle_url(url
, {})
1492 if smuggled_data
.get('title'):
1493 return smuggled_data
1495 webpage
= self
._download
_webpage
(url
, video_id
)
1498 self
._search
_json
(r
'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage
, 'preload state', video_id
, default
={})
1499 or self
._search
_nuxt
_data
(webpage
, video_id
, '__initialState', fatal
=False, traverse
=None))
1500 video_data
= traverse_obj(
1501 initial_data
, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type
=dict) or {}
1503 if season_id
and not video_data
:
1504 # Non-Bstation layout, read through episode list
1505 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
1506 video_data
= traverse_obj(season_json
, (
1507 'sections', ..., 'episodes', lambda _
, v
: str(v
['episode_id']) == video_id
1508 ), expected_type
=dict, get_all
=False)
1510 # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
1512 self
._parse
_video
_metadata
(video_data
), self
._search
_json
_ld
(webpage
, video_id
, fatal
=False), {
1513 'title': self
._html
_search
_meta
('og:title', webpage
),
1514 'description': self
._html
_search
_meta
('og:description', webpage
)
1517 def _get_comments_reply(self
, root_id
, next_id
=0, display_id
=None):
1518 comment_api_raw_data
= self
._download
_json
(
1519 'https://api.bilibili.tv/reply/web/detail', display_id
,
1520 note
=f
'Downloading reply comment of {root_id} - {next_id}',
1523 'ps': 20, # comment's reply per page (default: 3)
1528 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1530 'author': traverse_obj(replies
, ('member', 'name')),
1531 'author_id': traverse_obj(replies
, ('member', 'mid')),
1532 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1533 'text': traverse_obj(replies
, ('content', 'message')),
1534 'id': replies
.get('rpid'),
1535 'like_count': int_or_none(replies
.get('like_count')),
1536 'parent': replies
.get('parent'),
1537 'timestamp': unified_timestamp(replies
.get('ctime_text'))
1540 if not traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1541 yield from self
._get
_comments
_reply
(
1542 root_id
, comment_api_raw_data
['data']['cursor']['next'], display_id
)
1544 def _get_comments(self
, video_id
, ep_id
):
1545 for i
in itertools
.count(0):
1546 comment_api_raw_data
= self
._download
_json
(
1547 'https://api.bilibili.tv/reply/web/root', video_id
,
1548 note
=f
'Downloading comment page {i + 1}',
1551 'pn': i
, # page number
1552 'ps': 20, # comment per page (default: 20)
1554 'type': 3 if ep_id
else 1, # 1: user generated content, 3: series content
1555 'sort_type': 1, # 1: best, 2: recent
1558 for replies
in traverse_obj(comment_api_raw_data
, ('data', 'replies', ...)):
1560 'author': traverse_obj(replies
, ('member', 'name')),
1561 'author_id': traverse_obj(replies
, ('member', 'mid')),
1562 'author_thumbnail': traverse_obj(replies
, ('member', 'face')),
1563 'text': traverse_obj(replies
, ('content', 'message')),
1564 'id': replies
.get('rpid'),
1565 'like_count': int_or_none(replies
.get('like_count')),
1566 'timestamp': unified_timestamp(replies
.get('ctime_text')),
1567 'author_is_uploader': bool(traverse_obj(replies
, ('member', 'type'))),
1569 if replies
.get('count'):
1570 yield from self
._get
_comments
_reply
(replies
.get('rpid'), display_id
=video_id
)
1572 if traverse_obj(comment_api_raw_data
, ('data', 'cursor', 'is_end')):
1575 def _real_extract(self
, url
):
1576 season_id
, ep_id
, aid
= self
._match
_valid
_url
(url
).group('season_id', 'ep_id', 'aid')
1577 video_id
= ep_id
or aid
1581 intro_ending_json
= self
._call
_api
(
1582 f
'/web/v2/ogv/play/episode?episode_id={ep_id}&platform=web',
1583 video_id
, fatal
=False) or {}
1584 if intro_ending_json
.get('skip'):
1585 # FIXME: start time and end time seems a bit off a few second even it corrext based on ogv.*.js
1586 # ref: https://p.bstarstatic.com/fe-static/bstar-web-new/assets/ogv.2b147442.js
1588 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_start_time')), 1000),
1589 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'opening_end_time')), 1000),
1592 'start_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_start_time')), 1000),
1593 'end_time': float_or_none(traverse_obj(intro_ending_json
, ('skip', 'ending_end_time')), 1000),
1599 **self
._extract
_video
_metadata
(url
, video_id
, season_id
),
1600 'formats': self
._get
_formats
(ep_id
=ep_id
, aid
=aid
),
1601 'subtitles': self
.extract_subtitles(ep_id
=ep_id
, aid
=aid
),
1602 'chapters': chapters
,
1603 '__post_extractor': self
.extract_comments(video_id
, ep_id
)
1607 class BiliIntlSeriesIE(BiliIntlBaseIE
):
1608 IE_NAME
= 'biliIntl:series'
1609 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-zA-Z]{2}/)?(?:play|media)/(?P<id>\d+)/?(?:[?#]|$)'
1611 'url': 'https://www.bilibili.tv/en/play/34613',
1612 'playlist_mincount': 15,
1615 'title': 'TONIKAWA: Over the Moon For You',
1616 'description': 'md5:297b5a17155eb645e14a14b385ab547e',
1617 'categories': ['Slice of life', 'Comedy', 'Romance'],
1618 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
1622 'skip_download': True,
1625 'url': 'https://www.bilibili.tv/en/media/1048837',
1628 'title': 'SPY×FAMILY',
1629 'description': 'md5:b4434eb1a9a97ad2bccb779514b89f17',
1630 'categories': ['Adventure', 'Action', 'Comedy'],
1631 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.jpg$',
1634 'playlist_mincount': 25,
1636 'url': 'https://www.biliintl.com/en/play/34613',
1637 'only_matching': True,
1639 'url': 'https://www.biliintl.com/EN/play/34613',
1640 'only_matching': True,
1643 def _entries(self
, series_id
):
1644 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
1645 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict):
1646 episode_id
= str(episode
['episode_id'])
1647 yield self
.url_result(smuggle_url(
1648 BiliIntlIE
._make
_url
(episode_id
, series_id
),
1649 self
._parse
_video
_metadata
(episode
)
1650 ), BiliIntlIE
, episode_id
)
1652 def _real_extract(self
, url
):
1653 series_id
= self
._match
_id
(url
)
1654 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
1655 return self
.playlist_result(
1656 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
1657 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
1658 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))
1661 class BiliLiveIE(InfoExtractor
):
1662 _VALID_URL
= r
'https?://live\.bilibili\.com/(?:blanc/)?(?P<id>\d+)'
1665 'url': 'https://live.bilibili.com/196',
1668 'description': "周六杂谈回,其他时候随机游戏。 | \n录播:@下播型泛式录播组。 | \n直播通知群(全员禁言):666906670,902092584,59971⑧481 (功能一样,别多加)",
1670 'title': "太空狼人杀联动,不被爆杀就算赢",
1671 'thumbnail': "https://i0.hdslb.com/bfs/live/new_room_cover/e607bc1529057ef4b332e1026e62cf46984c314d.jpg",
1672 'timestamp': 1650802769,
1676 'url': 'https://live.bilibili.com/196?broadcast_type=0&is_room_feed=1?spm_id_from=333.999.space_home.strengthen_live_card.click',
1677 'only_matching': True
1679 'url': 'https://live.bilibili.com/blanc/196',
1680 'only_matching': True
1684 80: {'format_id': 'low', 'format_note': '流畅'}
,
1685 150: {'format_id': 'high_res', 'format_note': '高清'}
,
1686 250: {'format_id': 'ultra_high_res', 'format_note': '超清'}
,
1687 400: {'format_id': 'blue_ray', 'format_note': '蓝光'}
,
1688 10000: {'format_id': 'source', 'format_note': '原画'}
,
1689 20000: {'format_id': '4K', 'format_note': '4K'}
,
1690 30000: {'format_id': 'dolby', 'format_note': '杜比'}
,
1693 _quality
= staticmethod(qualities(list(_FORMATS
)))
1695 def _call_api(self
, path
, room_id
, query
):
1696 api_result
= self
._download
_json
(f
'https://api.live.bilibili.com/{path}', room_id
, query
=query
)
1697 if api_result
.get('code') != 0:
1698 raise ExtractorError(api_result
.get('message') or 'Unable to download JSON metadata')
1699 return api_result
.get('data') or {}
1701 def _parse_formats(self
, qn
, fmt
):
1702 for codec
in fmt
.get('codec') or []:
1703 if codec
.get('current_qn') != qn
:
1705 for url_info
in codec
['url_info']:
1707 'url': f
'{url_info["host"]}{codec["base_url"]}{url_info["extra"]}',
1708 'ext': fmt
.get('format_name'),
1709 'vcodec': codec
.get('codec_name'),
1710 'quality': self
._quality
(qn
),
1711 **self
._FORMATS
[qn
],
1714 def _real_extract(self
, url
):
1715 room_id
= self
._match
_id
(url
)
1716 room_data
= self
._call
_api
('room/v1/Room/get_info', room_id
, {'id': room_id}
)
1717 if room_data
.get('live_status') == 0:
1718 raise ExtractorError('Streamer is not live', expected
=True)
1721 for qn
in self
._FORMATS
.keys():
1722 stream_data
= self
._call
_api
('xlive/web-room/v2/index/getRoomPlayInfo', room_id
, {
1732 for fmt
in traverse_obj(stream_data
, ('playurl_info', 'playurl', 'stream', ..., 'format', ...)) or []:
1733 formats
.extend(self
._parse
_formats
(qn
, fmt
))
1737 'title': room_data
.get('title'),
1738 'description': room_data
.get('description'),
1739 'thumbnail': room_data
.get('user_cover'),
1740 'timestamp': stream_data
.get('live_time'),