]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/bilibili.py
[ie/crunchyroll] Fix stream extraction (#10005)
[yt-dlp.git] / yt_dlp / extractor / bilibili.py
index 4ed9e2af7299869d3fe248fe7735b86b63f2bf95..b38c90b1d1041cce4710fd2ed5e4783fea26b534 100644 (file)
@@ -93,11 +93,11 @@ def extract_formats(self, play_info):
 
         return formats
 
-    def _download_playinfo(self, video_id, cid):
+    def _download_playinfo(self, video_id, cid, headers=None):
         return self._download_json(
             'https://api.bilibili.com/x/player/playurl', video_id,
             query={'bvid': video_id, 'cid': cid, 'fnval': 4048},
-            note=f'Downloading video formats for cid {cid}')['data']
+            note=f'Downloading video formats for cid {cid}', headers=headers)['data']
 
     def json2srt(self, json_data):
         srt_data = ''
@@ -493,7 +493,8 @@ class BiliBiliIE(BilibiliBaseIE):
 
     def _real_extract(self, url):
         video_id = self._match_id(url)
-        webpage, urlh = self._download_webpage_handle(url, video_id)
+        headers = self.geo_verification_headers()
+        webpage, urlh = self._download_webpage_handle(url, video_id, headers=headers)
         if not self._match_valid_url(urlh.url):
             return self.url_result(urlh.url)
 
@@ -531,7 +532,7 @@ def _real_extract(self, url):
             self._download_json(
                 'https://api.bilibili.com/x/player/pagelist', video_id,
                 fatal=False, query={'bvid': video_id, 'jsonp': 'jsonp'},
-                note='Extracting videos in anthology'),
+                note='Extracting videos in anthology', headers=headers),
             'data', expected_type=list) or []
         is_anthology = len(page_list_json) > 1
 
@@ -552,7 +553,7 @@ def _real_extract(self, url):
 
         festival_info = {}
         if is_festival:
-            play_info = self._download_playinfo(video_id, cid)
+            play_info = self._download_playinfo(video_id, cid, headers=headers)
 
             festival_info = traverse_obj(initial_state, {
                 'uploader': ('videoInfo', 'upName'),
@@ -666,14 +667,15 @@ class BiliBiliBangumiIE(BilibiliBaseIE):
 
     def _real_extract(self, url):
         episode_id = self._match_id(url)
-        webpage = self._download_webpage(url, episode_id)
+        headers = self.geo_verification_headers()
+        webpage = self._download_webpage(url, episode_id, headers=headers)
 
         if '您所在的地区无法观看本片' in webpage:
             raise GeoRestrictedError('This video is restricted')
         elif '正在观看预览,大会员免费看全片' in webpage:
             self.raise_login_required('This video is for premium members only')
 
-        headers = {'Referer': url, **self.geo_verification_headers()}
+        headers['Referer'] = url
         play_info = self._download_json(
             'https://api.bilibili.com/pgc/player/web/v2/playurl', episode_id,
             'Extracting episode', query={'fnval': '4048', 'ep_id': episode_id},
@@ -724,7 +726,7 @@ def _real_extract(self, url):
             'duration': float_or_none(play_info.get('timelength'), scale=1000),
             'subtitles': self.extract_subtitles(episode_id, episode_info.get('cid'), aid=aid),
             '__post_extractor': self.extract_comments(aid),
-            'http_headers': headers,
+            'http_headers': {'Referer': url},
         }
 
 
@@ -1043,15 +1045,17 @@ def fetch_page(page_idx):
 
             try:
                 response = self._download_json('https://api.bilibili.com/x/space/wbi/arc/search',
-                                               playlist_id, note=f'Downloading page {page_idx}', query=query)
+                                               playlist_id, note=f'Downloading page {page_idx}', query=query,
+                                               headers={'referer': url})
             except ExtractorError as e:
                 if isinstance(e.cause, HTTPError) and e.cause.status == 412:
                     raise ExtractorError(
                         'Request is blocked by server (412), please add cookies, wait and try later.', expected=True)
                 raise
-            if response['code'] == -401:
+            if response['code'] in (-352, -401):
                 raise ExtractorError(
-                    'Request is blocked by server (401), please add cookies, wait and try later.', expected=True)
+                    f'Request is blocked by server ({-response["code"]}), '
+                    'please add cookies, wait and try later.', expected=True)
             return response['data']
 
         def get_metadata(page_data):
@@ -1305,6 +1309,26 @@ class BilibiliPlaylistIE(BilibiliSpaceListBaseIE):
             'upload_date': '20211127',
         },
         'playlist_mincount': 513,
+    }, {
+        'url': 'https://www.bilibili.com/list/1958703906?sid=547718&oid=687146339&bvid=BV1DU4y1r7tz',
+        'info_dict': {
+            'id': 'BV1DU4y1r7tz',
+            'ext': 'mp4',
+            'title': '【直播回放】8.20晚9:30 3d发布喵 2022年8月20日21点场',
+            'upload_date': '20220820',
+            'description': '',
+            'timestamp': 1661016330,
+            'uploader_id': '1958703906',
+            'uploader': '靡烟miya',
+            'thumbnail': r're:^https?://.*\.(jpg|jpeg|png)$',
+            'duration': 9552.903,
+            'tags': list,
+            'comment_count': int,
+            'view_count': int,
+            'like_count': int,
+            '_old_archive_ids': ['bilibili 687146339_part1'],
+        },
+        'params': {'noplaylist': True},
     }, {
         'url': 'https://www.bilibili.com/medialist/play/1958703906?business=space_series&business_id=547718&desc=1',
         'info_dict': {
@@ -1356,6 +1380,11 @@ def _extract_medialist(self, query, list_id):
 
     def _real_extract(self, url):
         list_id = self._match_id(url)
+
+        bvid = traverse_obj(parse_qs(url), ('bvid', 0))
+        if not self._yes_playlist(list_id, bvid):
+            return self.url_result(f'https://www.bilibili.com/video/{bvid}', BiliBiliIE)
+
         webpage = self._download_webpage(url, list_id)
         initial_state = self._search_json(r'window\.__INITIAL_STATE__\s*=', webpage, 'initial state', list_id)
         if traverse_obj(initial_state, ('error', 'code', {int_or_none})) != 200:
@@ -1940,6 +1969,7 @@ class BiliIntlIE(BiliIntlBaseIE):
         'only_matching': True,
     }]
 
+    @staticmethod
     def _make_url(video_id, series_id=None):
         if series_id:
             return f'https://www.bilibili.tv/en/play/{series_id}/{video_id}'
@@ -1971,7 +2001,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
                 'title': get_element_by_class(
                     'bstar-meta__title', webpage) or self._html_search_meta('og:title', webpage),
                 'description': get_element_by_class(
-                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description'),
+                    'bstar-meta__desc', webpage) or self._html_search_meta('og:description', webpage),
             }, self._search_json_ld(webpage, video_id, default={}))
 
     def _get_comments_reply(self, root_id, next_id=0, display_id=None):