10 from .common
import InfoExtractor
, SearchInfoExtractor
11 from ..compat
import (
14 compat_urllib_parse_urlparse
25 srt_subtitles_timecode
,
36 class BiliBiliIE(InfoExtractor
):
39 (?:(?:www|bangumi)\.)?
44 anime/(?P<anime_id>\d+)/play\#
46 (s/)?video/[bB][vV](?P<id_bv>[^/?#&]+)
48 (?:/?\?p=(?P<page>\d+))?
52 'url': 'http://www.bilibili.com/video/av1074402/',
53 'md5': '5f7d29e1a2872f3df0cf76b1f87d3788',
55 'id': '1074402_part1',
58 'uploader_id': '156160',
60 'upload_date': '20140420',
61 'description': 'md5:ce18c2a2d2193f0df2917d270f2e5923',
62 'timestamp': 1398012678,
65 # Tested in BiliBiliBangumiIE
66 'url': 'http://bangumi.bilibili.com/anime/1869/play#40062',
67 'only_matching': True,
70 'url': 'http://www.bilibili.tv/video/av1074402/',
71 'only_matching': True,
73 'url': 'http://bangumi.bilibili.com/anime/5802/play#100643',
74 'md5': '3f721ad1e75030cc06faf73587cfec57',
78 'title': 'CHAOS;CHILD',
79 'description': '如果你是神明,并且能够让妄想成为现实。那你会进行怎么样的妄想?是淫靡的世界?独裁社会?毁灭性的制裁?还是……2015年,涩谷。从6年前发生的大灾害“涩谷地震”之后复兴了的这个街区里新设立的私立高中...',
81 'skip': 'Geo-restricted to China',
83 'url': 'http://www.bilibili.com/video/av8903802/',
85 'id': '8903802_part1',
87 'title': '阿滴英文|英文歌分享#6 "Closer',
88 'upload_date': '20170301',
89 'description': '滴妹今天唱Closer給你聽! 有史以来,被推最多次也是最久的歌曲,其实歌词跟我原本想像差蛮多的,不过还是好听! 微博@阿滴英文',
90 'timestamp': 1488382634,
91 'uploader_id': '65880958',
95 'skip_download': True,
98 # new BV video id format
99 'url': 'https://www.bilibili.com/video/BV1JE411F741',
100 'only_matching': True,
103 'url': 'https://www.bilibili.com/video/BV1bK411W797',
105 'id': 'BV1bK411W797',
106 'title': '物语中的人物是如何吐槽自己的OP的'
108 'playlist_count': 17,
111 _APP_KEY
= 'iVGUTjsxvpLeuDCf'
112 _BILIBILI_KEY
= 'aHRmhWMLkdeMuILqORnYZocwMBpMEOdt'
114 def _report_error(self
, result
):
115 if 'message' in result
:
116 raise ExtractorError('%s said: %s' % (self
.IE_NAME
, result
['message']), expected
=True)
117 elif 'code' in result
:
118 raise ExtractorError('%s returns error %d' % (self
.IE_NAME
, result
['code']), expected
=True)
120 raise ExtractorError('Can\'t extract Bangumi episode ID')
122 def _real_extract(self
, url
):
123 url
, smuggled_data
= unsmuggle_url(url
, {})
125 mobj
= self
._match
_valid
_url
(url
)
126 video_id
= mobj
.group('id_bv') or mobj
.group('id')
128 av_id
, bv_id
= self
._get
_video
_id
_set
(video_id
, mobj
.group('id_bv') is not None)
132 anime_id
= mobj
.group('anime_id')
133 page_id
= mobj
.group('page')
134 webpage
= self
._download
_webpage
(url
, video_id
)
136 # Bilibili anthologies are similar to playlists but all videos share the same video ID as the anthology itself.
137 # If the video has no page argument, check to see if it's an anthology
139 if not self
.get_param('noplaylist'):
140 r
= self
._extract
_anthology
_entries
(bv_id
, video_id
, webpage
)
142 self
.to_screen('Downloading anthology %s - add --no-playlist to just download video' % video_id
)
145 self
.to_screen('Downloading just video %s because of --no-playlist' % video_id
)
147 if 'anime/' not in url
:
148 cid
= self
._search
_regex
(
149 r
'\bcid(?:["\']:|
=)(\d
+),["\']page(?:["\']:|
=)' + str(page_id), webpage, 'cid
',
151 ) or self._search_regex(
152 r'\bcid
(?
:["\']:|=)(\d+)', webpage, 'cid',
154 ) or compat_parse_qs(self._search_regex(
155 [r'EmbedPlayer\([^)]+,\s*"([^
"]+)"\
)',
156 r'EmbedPlayer\
([^
)]+,\s
*\\"([^"]+)\\"\)',
157 r'<iframe[^>]+src="https
://secure\
.bilibili\
.com
/secure
,([^
"]+)"'],
158 webpage, 'player parameters
'))['cid
'][0]
160 if 'no_bangumi_tip
' not in smuggled_data:
161 self.to_screen('Downloading episode
%s. To download all videos
in anime
%s, re
-run yt
-dlp
with %s' % (
162 video_id, anime_id, compat_urlparse.urljoin(url, '//bangumi
.bilibili
.com
/anime
/%s' % anime_id)))
164 'Content
-Type
': 'application
/x
-www
-form
-urlencoded
; charset
=UTF
-8',
167 headers.update(self.geo_verification_headers())
169 js = self._download_json(
170 'http
://bangumi
.bilibili
.com
/web_api
/get_source
', video_id,
171 data=urlencode_postdata({'episode_id': video_id}),
173 if 'result
' not in js:
174 self._report_error(js)
175 cid = js['result
']['cid
']
178 'Accept
': 'application
/json
',
181 headers.update(self.geo_verification_headers())
183 video_info = self._parse_json(
184 self._search_regex(r'window
.__playinfo
__\s
*=\s
*({.+?}
)</script
>', webpage, 'video info
', default=None) or '{}',
185 video_id, fatal=False)
186 video_info = video_info.get('data
') or {}
188 durl = traverse_obj(video_info, ('dash
', 'video
'))
189 audios = traverse_obj(video_info, ('dash
', 'audio
')) or []
192 RENDITIONS = ('qn
=80&quality
=80&type=', 'quality
=2&type=mp4
')
193 for num, rendition in enumerate(RENDITIONS, start=1):
194 payload = 'appkey
=%s&cid
=%s&otype
=json
&%s' % (self._APP_KEY, cid, rendition)
195 sign = hashlib.md5((payload + self._BILIBILI_KEY).encode('utf
-8')).hexdigest()
197 video_info = self._download_json(
198 'http
://interface
.bilibili
.com
/v2
/playurl?
%s&sign
=%s' % (payload, sign),
199 video_id, note='Downloading video info page
',
200 headers=headers, fatal=num == len(RENDITIONS))
204 if not durl and 'durl
' not in video_info:
205 if num < len(RENDITIONS):
207 self._report_error(video_info)
210 for idx, durl in enumerate(durl or video_info['durl
']):
212 'url
': durl.get('baseUrl
') or durl.get('base_url
') or durl.get('url
'),
213 'ext
': mimetype2ext(durl.get('mimeType
') or durl.get('mime_type
')),
214 'fps
': int_or_none(durl.get('frameRate
') or durl.get('frame_rate
')),
215 'width
': int_or_none(durl.get('width
')),
216 'height
': int_or_none(durl.get('height
')),
217 'vcodec
': durl.get('codecs
'),
218 'acodec
': 'none
' if audios else None,
219 'tbr
': float_or_none(durl.get('bandwidth
'), scale=1000),
220 'filesize
': int_or_none(durl.get('size
')),
222 for backup_url in traverse_obj(durl, 'backup_url
', expected_type=list) or []:
225 'quality
': -2 if 'hd
.mp4
' in backup_url else -3,
230 'url
': audio.get('baseUrl
') or audio.get('base_url
') or audio.get('url
'),
231 'ext
': mimetype2ext(audio.get('mimeType
') or audio.get('mime_type
')),
232 'fps
': int_or_none(audio.get('frameRate
') or audio.get('frame_rate
')),
233 'width
': int_or_none(audio.get('width
')),
234 'height
': int_or_none(audio.get('height
')),
235 'acodec
': audio.get('codecs
'),
237 'tbr
': float_or_none(audio.get('bandwidth
'), scale=1000),
238 'filesize
': int_or_none(audio.get('size
'))
240 for backup_url in traverse_obj(audio, 'backup_url
', expected_type=list) or []:
243 # backup URLs have lower priorities
249 'duration
': float_or_none(durl.get('length
'), 1000),
257 self._sort_formats(formats)
259 title = self._html_search_regex((
260 r'<h1
[^
>]+title
=(["\'])(?P<content>[^"\']+)',
261 r'(?s
)<h1
[^
>]*>(?P
<content
>.+?
)</h1
>',
262 self._meta_regex('title
')
263 ), webpage, 'title
', group='content
', fatal=False)
265 # Get part title for anthologies
266 if page_id is not None:
267 # TODO: The json is already downloaded by _extract_anthology_entries. Don't redownload
for each video
.
268 part_info
= traverse_obj(self
._download
_json
(
269 f
'https://api.bilibili.com/x/player/pagelist?bvid={bv_id}&jsonp=jsonp',
270 video_id
, note
='Extracting videos in anthology'), 'data', expected_type
=list)
271 title
= title
if len(part_info
) == 1 else traverse_obj(part_info
, (int(page_id
) - 1, 'part')) or title
273 description
= self
._html
_search
_meta
('description', webpage
)
274 timestamp
= unified_timestamp(self
._html
_search
_regex
(
275 r
'<time[^>]+datetime="([^"]+)"', webpage
, 'upload time',
276 default
=None) or self
._html
_search
_meta
(
277 'uploadDate', webpage
, 'timestamp', default
=None))
278 thumbnail
= self
._html
_search
_meta
(['og:image', 'thumbnailUrl'], webpage
)
280 # TODO 'view_count' requires deobfuscating Javascript
282 'id': f
'{video_id}_part{page_id or 1}',
285 'description': description
,
286 'timestamp': timestamp
,
287 'thumbnail': thumbnail
,
288 'duration': float_or_none(video_info
.get('timelength'), scale
=1000),
291 uploader_mobj
= re
.search(
292 r
'<a[^>]+href="(?:https?:)?//space\.bilibili\.com/(?P<id>\d+)"[^>]*>\s*(?P<name>[^<]+?)\s*<',
296 'uploader': uploader_mobj
.group('name').strip(),
297 'uploader_id': uploader_mobj
.group('id'),
300 if not info
.get('uploader'):
301 info
['uploader'] = self
._html
_search
_meta
(
302 'author', webpage
, 'uploader', default
=None)
305 'tags': traverse_obj(self
._download
_json
(
306 f
'https://api.bilibili.com/x/tag/archive/tags?aid={video_id}',
307 video_id
, fatal
=False, note
='Downloading tags'), ('data', ..., 'tag_name')),
310 info
['subtitles'] = {
313 'url': f
'https://comment.bilibili.com/{cid}.xml',
318 # Requires https://github.com/m13253/danmaku2ass which is licenced under GPL3
319 # See https://github.com/animelover1984/youtube-dl
321 raw_danmaku = self._download_webpage(
322 f'https://comment.bilibili.com/{cid}.xml', video_id, fatal=False, note='Downloading danmaku comments')
323 danmaku = NiconicoIE.CreateDanmaku(raw_danmaku, commentType='Bilibili', x=1024, y=576)
324 entries[0]['subtitles'] = {
332 top_level_info
['__post_extractor'] = self
.extract_comments(video_id
)
334 for entry
in entries
:
337 if len(entries
) == 1:
338 entries
[0].update(top_level_info
)
341 for idx
, entry
in enumerate(entries
):
342 entry
['id'] = '%s_part%d' % (video_id
, (idx
+ 1))
348 'description': description
,
349 **info
, **top_level_info
352 def _extract_anthology_entries(self
, bv_id
, video_id
, webpage
):
353 title
= self
._html
_search
_regex
(
354 (r
'<h1[^>]+\btitle=(["\'])(?P
<title
>(?
:(?
!\
1).)+)\
1',
355 r'(?s
)<h1
[^
>]*>(?P
<title
>.+?
)</h1
>',
356 r'<title
>(?P
<title
>.+?
)</title
>'), webpage, 'title
',
358 json_data = self._download_json(
359 f'https
://api
.bilibili
.com
/x
/player
/pagelist?bvid
={bv_id}
&jsonp
=jsonp
',
360 video_id, note='Extracting videos
in anthology
')
362 if json_data['data
']:
363 return self.playlist_from_matches(
364 json_data['data
'], bv_id, title, ie=BiliBiliIE.ie_key(),
365 getter=lambda entry: 'https
://www
.bilibili
.com
/video
/%s?p
=%d' % (bv_id, entry['page
']))
367 def _get_video_id_set(self, id, is_bv):
368 query = {'bvid': id} if is_bv else {'aid': id}
369 response = self._download_json(
370 "http://api.bilibili.cn/x/web-interface/view",
372 note='Grabbing original ID via API
')
374 if response['code
'] == -400:
375 raise ExtractorError('Video ID does
not exist
', expected=True, video_id=id)
376 elif response['code
'] != 0:
377 raise ExtractorError(f'Unknown error occurred during API
check (code {response["code"]}
)',
378 expected=True, video_id=id)
379 return response['data
']['aid
'], response['data
']['bvid
']
381 def _get_comments(self, video_id, commentPageNumber=0):
382 for idx in itertools.count(1):
383 replies = traverse_obj(
385 f'https
://api
.bilibili
.com
/x
/v2
/reply?pn
={idx}
&oid
={video_id}
&type=1&jsonp
=jsonp
&sort
=2&_
=1567227301685',
386 video_id, note=f'Extracting comments
from page {idx}
', fatal=False),
390 for children in map(self._get_all_children, replies):
393 def _get_all_children(self, reply):
395 'author
': traverse_obj(reply, ('member
', 'uname
')),
396 'author_id
': traverse_obj(reply, ('member
', 'mid
')),
397 'id': reply.get('rpid
'),
398 'text
': traverse_obj(reply, ('content
', 'message
')),
399 'timestamp
': reply.get('ctime
'),
400 'parent
': reply.get('parent
') or 'root
',
402 for children in map(self._get_all_children, reply.get('replies
') or []):
406 class BiliBiliBangumiIE(InfoExtractor):
407 _VALID_URL = r'https?
://bangumi\
.bilibili\
.com
/anime
/(?P
<id>\d
+)'
409 IE_NAME = 'bangumi
.bilibili
.com
'
410 IE_DESC = 'BiliBili番剧
'
413 'url
': 'http
://bangumi
.bilibili
.com
/anime
/1869',
417 'description
': 'md5
:6a9622b911565794c11f25f81d6a97d2
',
419 'playlist_count
': 26,
421 'url
': 'http
://bangumi
.bilibili
.com
/anime
/1869',
425 'description
': 'md5
:6a9622b911565794c11f25f81d6a97d2
',
428 'md5
': '91da8621454dd58316851c27c68b0c13
',
433 'description
': '故事发生在日本的江户时代。风是一个小酒馆的打工女。一日,酒馆里来了一群恶霸,虽然他们的举动令风十分不满,但是毕竟风只是一届女流,无法对他们采取什么行动,只能在心里嘟哝。这时,酒家里又进来了个“不良份子
...',
434 'timestamp
': 1414538739,
435 'upload_date
': '20141028',
436 'episode
': '疾风怒涛 Tempestuous Temperaments
',
441 'playlist_items
': '1',
446 def suitable(cls, url):
447 return False if BiliBiliIE.suitable(url) else super(BiliBiliBangumiIE, cls).suitable(url)
449 def _real_extract(self, url):
450 bangumi_id = self._match_id(url)
452 # Sometimes this API returns a JSONP response
453 season_info = self._download_json(
454 'http
://bangumi
.bilibili
.com
/jsonp
/seasoninfo
/%s.ver
' % bangumi_id,
455 bangumi_id, transform_source=strip_jsonp)['result
']
458 '_type
': 'url_transparent
',
459 'url
': smuggle_url(episode['webplay_url
'], {'no_bangumi_tip': 1}),
460 'ie_key
': BiliBiliIE.ie_key(),
461 'timestamp
': parse_iso8601(episode.get('update_time
'), delimiter=' '),
462 'episode
': episode.get('index_title
'),
463 'episode_number
': int_or_none(episode.get('index
')),
464 } for episode in season_info['episodes
']]
466 entries = sorted(entries, key=lambda entry: entry.get('episode_number
'))
468 return self.playlist_result(
470 season_info.get('bangumi_title
'), season_info.get('evaluate
'))
473 class BilibiliChannelIE(InfoExtractor):
474 _VALID_URL = r'https?
://space
.bilibili\
.com
/(?P
<id>\d
+)'
475 _API_URL = "https://api.bilibili.com/x/space/arc/search?mid=%s&pn=%d&jsonp=jsonp"
477 'url
': 'https
://space
.bilibili
.com
/3985676/video
',
479 'playlist_mincount
': 112,
482 def _entries(self, list_id):
483 count, max_count = 0, None
485 for page_num in itertools.count(1):
486 data = self._download_json(
487 self._API_URL % (list_id, page_num), list_id, note=f'Downloading page {page_num}
')['data
']
489 max_count = max_count or traverse_obj(data, ('page
', 'count
'))
491 entries = traverse_obj(data, ('list', 'vlist
'))
494 for entry in entries:
495 yield self.url_result(
496 'https
://www
.bilibili
.com
/video
/%s' % entry['bvid
'],
497 BiliBiliIE.ie_key(), entry['bvid
'])
499 count += len(entries)
500 if max_count and count >= max_count:
503 def _real_extract(self, url):
504 list_id = self._match_id(url)
505 return self.playlist_result(self._entries(list_id), list_id)
508 class BilibiliCategoryIE(InfoExtractor):
509 IE_NAME = 'Bilibili category extractor
'
510 _MAX_RESULTS = 1000000
511 _VALID_URL = r'https?
://www\
.bilibili\
.com
/v
/[a
-zA
-Z
]+\
/[a
-zA
-Z
]+'
513 'url
': 'https
://www
.bilibili
.com
/v
/kichiku
/mad
',
515 'id': 'kichiku
: mad
',
516 'title
': 'kichiku
: mad
'
518 'playlist_mincount
': 45,
524 def _fetch_page(self, api_url, num_pages, query, page_num):
525 parsed_json = self._download_json(
526 api_url, query, query={'Search_key': query, 'pn': page_num},
527 note='Extracting results
from page
%s of
%s' % (page_num, num_pages))
529 video_list = traverse_obj(parsed_json, ('data
', 'archives
'), expected_type=list)
531 raise ExtractorError('Failed to retrieve video
list for page
%d' % page_num)
533 for video in video_list:
534 yield self.url_result(
535 'https
://www
.bilibili
.com
/video
/%s' % video['bvid
'], 'BiliBili
', video['bvid
'])
537 def _entries(self, category, subcategory, query):
538 # map of categories : subcategories : RIDs
542 'manual_vocaloid
': 126,
549 if category not in rid_map:
550 raise ExtractorError(
551 f'The category {category} isn
\'t supported
. Supported categories
: {list(rid_map.keys())}
')
552 if subcategory not in rid_map[category]:
553 raise ExtractorError(
554 f'The subcategory {subcategory} isn
\'t supported
for this category
. Supported subcategories
: {list(rid_map[category].keys())}
')
555 rid_value = rid_map[category][subcategory]
557 api_url = 'https
://api
.bilibili
.com
/x
/web
-interface
/newlist?rid
=%d&type=1&ps
=20&jsonp
=jsonp
' % rid_value
558 page_json = self._download_json(api_url, query, query={'Search_key': query, 'pn': '1'})
559 page_data = traverse_obj(page_json, ('data
', 'page
'), expected_type=dict)
560 count, size = int_or_none(page_data.get('count
')), int_or_none(page_data.get('size
'))
561 if count is None or not size:
562 raise ExtractorError('Failed to calculate either page count
or size
')
564 num_pages = math.ceil(count / size)
566 return OnDemandPagedList(functools.partial(
567 self._fetch_page, api_url, num_pages, query), size)
569 def _real_extract(self, url):
570 u = compat_urllib_parse_urlparse(url)
571 category, subcategory = u.path.split('/')[2:4]
572 query = '%s: %s' % (category, subcategory)
574 return self.playlist_result(self._entries(category, subcategory, query), query, query)
577 class BiliBiliSearchIE(SearchInfoExtractor):
578 IE_DESC = 'Bilibili video search
'
579 _MAX_RESULTS = 100000
580 _SEARCH_KEY = 'bilisearch
'
582 def _search_results(self, query):
583 for page_num in itertools.count(1):
584 videos = self._download_json(
585 'https
://api
.bilibili
.com
/x
/web
-interface
/search
/type', query,
586 note=f'Extracting results
from page {page_num}
', query={
594 '__refresh__
': 'true
',
595 'search_type
': 'video
',
598 })['data
'].get('result
') or []
600 yield self.url_result(video['arcurl
'], 'BiliBili
', str(video['aid
']))
603 class BilibiliAudioBaseIE(InfoExtractor):
604 def _call_api(self, path, sid, query=None):
607 return self._download_json(
608 'https
://www
.bilibili
.com
/audio
/music
-service
-c
/web
/' + path,
609 sid, query=query)['data
']
612 class BilibiliAudioIE(BilibiliAudioBaseIE):
613 _VALID_URL = r'https?
://(?
:www\
.)?bilibili\
.com
/audio
/au(?P
<id>\d
+)'
615 'url
': 'https
://www
.bilibili
.com
/audio
/au1003142
',
616 'md5
': 'fec4987014ec94ef9e666d4d158ad03b
',
620 'title
': '【tsukimi】YELLOW
/ 神山羊
',
622 'comment_count
': int,
623 'description
': 'YELLOW的mp3版!
',
630 'thumbnail
': r're
:^https?
://.+\
.jpg
',
631 'timestamp
': 1564836614,
632 'upload_date
': '20190803',
633 'uploader
': 'tsukimi
-つきみぐー
',
638 def _real_extract(self, url):
639 au_id = self._match_id(url)
641 play_data = self._call_api('url
', au_id)
643 'url
': play_data['cdns
'][0],
644 'filesize
': int_or_none(play_data.get('size
')),
648 song = self._call_api('song
/info
', au_id)
649 title = song['title
']
650 statistic = song.get('statistic
') or {}
653 lyric = song.get('lyric
')
665 'artist
': song.get('author
'),
666 'comment_count
': int_or_none(statistic.get('comment
')),
667 'description
': song.get('intro
'),
668 'duration
': int_or_none(song.get('duration
')),
669 'subtitles
': subtitles,
670 'thumbnail
': song.get('cover
'),
671 'timestamp
': int_or_none(song.get('passtime
')),
672 'uploader
': song.get('uname
'),
673 'view_count
': int_or_none(statistic.get('play
')),
677 class BilibiliAudioAlbumIE(BilibiliAudioBaseIE):
678 _VALID_URL = r'https?
://(?
:www\
.)?bilibili\
.com
/audio
/am(?P
<id>\d
+)'
680 'url
': 'https
://www
.bilibili
.com
/audio
/am10624
',
683 'title
': '每日新曲推荐(每日
11:00更新)
',
684 'description
': '每天
11:00更新,为你推送最新音乐
',
686 'playlist_count
': 19,
689 def _real_extract(self, url):
690 am_id = self._match_id(url)
692 songs = self._call_api(
693 'song
/of
-menu
', am_id, {'sid': am_id, 'pn': 1, 'ps': 100})['data
']
697 sid = str_or_none(song.get('id'))
700 entries.append(self.url_result(
701 'https
://www
.bilibili
.com
/audio
/au
' + sid,
702 BilibiliAudioIE.ie_key(), sid))
705 album_data = self._call_api('menu
/info
', am_id) or {}
706 album_title = album_data.get('title
')
708 for entry in entries:
709 entry['album
'] = album_title
710 return self.playlist_result(
711 entries, am_id, album_title, album_data.get('intro
'))
713 return self.playlist_result(entries, am_id)
716 class BiliBiliPlayerIE(InfoExtractor):
717 _VALID_URL = r'https?
://player\
.bilibili\
.com
/player\
.html
\?.*?
\baid
=(?P
<id>\d
+)'
719 'url
': 'http
://player
.bilibili
.com
/player
.html?aid
=92494333&cid
=157926707&page
=1',
720 'only_matching
': True,
723 def _real_extract(self, url):
724 video_id = self._match_id(url)
725 return self.url_result(
726 'http
://www
.bilibili
.tv
/video
/av
%s/' % video_id,
727 ie=BiliBiliIE.ie_key(), video_id=video_id)
730 class BiliIntlBaseIE(InfoExtractor):
731 _API_URL = 'https
://api
.bilibili
.tv
/intl
/gateway
'
732 _NETRC_MACHINE = 'biliintl
'
734 def _call_api(self, endpoint, *args, **kwargs):
735 json = self._download_json(self._API_URL + endpoint, *args, **kwargs)
737 if json['code
'] in (10004004, 10004005, 10023006):
738 self.raise_login_required()
739 elif json['code
'] == 10004001:
740 self.raise_geo_restricted()
742 if json.get('message
') and str(json['code
']) != json['message
']:
743 errmsg = f'{kwargs.get("errnote", "Unable to download JSON metadata")}
: {self.IE_NAME} said
: {json["message"]}
'
745 errmsg = kwargs.get('errnote
', 'Unable to download JSON metadata
')
746 if kwargs.get('fatal
'):
747 raise ExtractorError(errmsg)
749 self.report_warning(errmsg)
750 return json.get('data
')
752 def json2srt(self, json):
754 f'{i + 1}
\n{srt_subtitles_timecode(line["from"])}
--> {srt_subtitles_timecode(line["to"])}
\n{line["content"]}
'
755 for i, line in enumerate(json['body
']) if line.get('content
'))
758 def _get_subtitles(self, ep_id):
759 sub_json = self._call_api(f'/web
/v2
/subtitle?episode_id
={ep_id}
&platform
=web
', ep_id)
761 for sub in sub_json.get('subtitles
') or []:
762 sub_url = sub.get('url
')
765 sub_data = self._download_json(
766 sub_url, ep_id, errnote='Unable to download subtitles
', fatal=False,
767 note='Downloading subtitles
%s' % f' for {sub["lang"]}
' if sub.get('lang
') else '')
770 subtitles.setdefault(sub.get('lang_key
', 'en
'), []).append({
772 'data
': self.json2srt(sub_data)
776 def _get_formats(self, ep_id):
777 video_json = self._call_api(f'/web
/playurl?ep_id
={ep_id}
&platform
=web
', ep_id,
778 note='Downloading video formats
', errnote='Unable to download video formats
')
779 video_json = video_json['playurl
']
781 for vid in video_json.get('video
') or []:
782 video_res = vid.get('video_resource
') or {}
783 video_info = vid.get('stream_info
') or {}
784 if not video_res.get('url
'):
787 'url
': video_res['url
'],
789 'format_note
': video_info.get('desc_words
'),
790 'width
': video_res.get('width
'),
791 'height
': video_res.get('height
'),
792 'vbr
': video_res.get('bandwidth
'),
794 'vcodec
': video_res.get('codecs
'),
795 'filesize
': video_res.get('size
'),
797 for aud in video_json.get('audio_resource
') or []:
798 if not aud.get('url
'):
803 'abr
': aud.get('bandwidth
'),
804 'acodec
': aud.get('codecs
'),
806 'filesize
': aud.get('size
'),
809 self._sort_formats(formats)
812 def _extract_ep_info(self, episode_data, ep_id):
815 'title
': episode_data.get('title_display
') or episode_data['title
'],
816 'thumbnail
': episode_data.get('cover
'),
817 'episode_number
': int_or_none(self._search_regex(
818 r'^
E(\d
+)(?
:$|
- )', episode_data.get('title_display
'), 'episode number
', default=None)),
819 'formats
': self._get_formats(ep_id),
820 'subtitles
': self._get_subtitles(ep_id),
821 'extractor_key
': BiliIntlIE.ie_key(),
824 def _perform_login(self, username, password):
826 from Cryptodome.PublicKey import RSA
827 from Cryptodome.Cipher import PKCS1_v1_5
830 from Crypto.PublicKey import RSA
831 from Crypto.Cipher import PKCS1_v1_5
833 raise ExtractorError('pycryptodomex
not found
. Please install
', expected=True)
835 key_data = self._download_json(
836 'https
://passport
.bilibili
.tv
/x
/intl
/passport
-login
/web
/key?lang
=en
-US
', None,
837 note='Downloading login key
', errnote='Unable to download login key
')['data
']
839 public_key = RSA.importKey(key_data['key
'])
840 password_hash = PKCS1_v1_5.new(public_key).encrypt((key_data['hash'] + password).encode('utf
-8'))
841 login_post = self._download_json(
842 'https
://passport
.bilibili
.tv
/x
/intl
/passport
-login
/web
/login
/password?lang
=en
-US
', None, data=urlencode_postdata({
843 'username
': username,
844 'password
': base64.b64encode(password_hash).decode('ascii
'),
848 }), note='Logging
in', errnote='Unable to log
in')
849 if login_post.get('code
'):
850 if login_post.get('message
'):
851 raise ExtractorError(f'Unable to log
in: {self.IE_NAME} said
: {login_post["message"]}
', expected=True)
853 raise ExtractorError('Unable to log
in')
856 class BiliIntlIE(BiliIntlBaseIE):
857 _VALID_URL = r'https?
://(?
:www\
.)?
bili(?
:bili\
.tv|intl\
.com
)/(?
:[a
-z
]{2}
/)?play
/(?P
<season_id
>\d
+)/(?P
<id>\d
+)'
860 'url
': 'https
://www
.bilibili
.tv
/en
/play
/34613/341736',
864 'title
': 'E2
- The First Night
',
865 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
870 'url
': 'https
://www
.bilibili
.tv
/en
/play
/1033760/11005006',
874 'title
': 'E3
- Who?
',
875 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
879 # Subtitle with empty content
880 'url
': 'https
://www
.bilibili
.tv
/en
/play
/1005144/10131790',
884 'title
': 'E140
- Two Heartbeats
: Kabuto
\'s Trap
',
885 'thumbnail
': r're
:^https
://pic\
.bstarstatic\
.com
/ogv
/.+\
.png$
',
886 'episode_number
': 140,
888 'skip
': 'According to the copyright owner
\'s request
, you may only watch the video after you log
in.'
890 'url
': 'https
://www
.biliintl
.com
/en
/play
/34613/341736',
891 'only_matching
': True,
894 def _real_extract(self, url):
895 season_id, video_id = self._match_valid_url(url).groups()
896 webpage = self._download_webpage(url, video_id)
898 initial_data = self._parse_json(self._search_regex(
899 r'window\
.__INITIAL
_DATA
__\s
*=\s
*({.+?}
);', webpage,
900 'preload state
', default='{}'), video_id, fatal=False) or {}
901 episode_data
= traverse_obj(initial_data
, ('OgvVideo', 'epDetail'), expected_type
=dict)
904 # Non-Bstation layout, read through episode list
905 season_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={season_id}&platform=web', video_id
)
907 episode
for episode
in traverse_obj(season_json
, ('sections', ..., 'episodes', ...), expected_type
=dict)
908 if str(episode
.get('episode_id')) == video_id
)
909 return self
._extract
_ep
_info
(episode_data
, video_id
)
912 class BiliIntlSeriesIE(BiliIntlBaseIE
):
913 _VALID_URL
= r
'https?://(?:www\.)?bili(?:bili\.tv|intl\.com)/(?:[a-z]{2}/)?play/(?P<id>\d+)$'
915 'url': 'https://www.bilibili.tv/en/play/34613',
916 'playlist_mincount': 15,
919 'title': 'Fly Me to the Moon',
920 'description': 'md5:a861ee1c4dc0acfad85f557cc42ac627',
921 'categories': ['Romance', 'Comedy', 'Slice of life'],
922 'thumbnail': r
're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
926 'skip_download': True,
929 'url': 'https://www.biliintl.com/en/play/34613',
930 'only_matching': True,
933 def _entries(self
, series_id
):
934 series_json
= self
._call
_api
(f
'/web/v2/ogv/play/episodes?season_id={series_id}&platform=web', series_id
)
935 for episode
in traverse_obj(series_json
, ('sections', ..., 'episodes', ...), expected_type
=dict, default
=[]):
936 episode_id
= str(episode
.get('episode_id'))
937 yield self
._extract
_ep
_info
(episode
, episode_id
)
939 def _real_extract(self
, url
):
940 series_id
= self
._match
_id
(url
)
941 series_info
= self
._call
_api
(f
'/web/v2/ogv/play/season_info?season_id={series_id}&platform=web', series_id
).get('season') or {}
942 return self
.playlist_result(
943 self
._entries
(series_id
), series_id
, series_info
.get('title'), series_info
.get('description'),
944 categories
=traverse_obj(series_info
, ('styles', ..., 'title'), expected_type
=str_or_none
),
945 thumbnail
=url_or_none(series_info
.get('horizontal_cover')), view_count
=parse_count(series_info
.get('view')))