]> jfr.im git - yt-dlp.git/commitdiff
[extractor/biliIntl] Add fallback to `video_data` (#5971)
authorHobbyistDev <redacted>
Fri, 6 Jan 2023 06:22:25 +0000 (15:22 +0900)
committerGitHub <redacted>
Fri, 6 Jan 2023 06:22:25 +0000 (11:52 +0530)
Authored by: HobbyistDev

yt_dlp/extractor/bilibili.py

index dbe212b387137f56c0e78f860a108c2e67697243..d4b05248f37f1259aa9b520d094cbe5d55bd83f8 100644 (file)
@@ -16,6 +16,7 @@
     format_field,
     int_or_none,
     make_archive_id,
+    merge_dicts,
     mimetype2ext,
     parse_count,
     parse_qs,
@@ -934,6 +935,10 @@ class BiliIntlIE(BiliIntlBaseIE):
             'title': 'E2 - The First Night',
             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
             'episode_number': 2,
+            'upload_date': '20201009',
+            'episode': 'Episode 2',
+            'timestamp': 1602259500,
+            'description': 'md5:297b5a17155eb645e14a14b385ab547e',
         }
     }, {
         # Non-Bstation page
@@ -944,6 +949,10 @@ class BiliIntlIE(BiliIntlBaseIE):
             'title': 'E3 - Who?',
             'thumbnail': r're:^https://pic\.bstarstatic\.com/ogv/.+\.png$',
             'episode_number': 3,
+            'description': 'md5:e1a775e71a35c43f141484715470ad09',
+            'episode': 'Episode 3',
+            'upload_date': '20211219',
+            'timestamp': 1639928700,
         }
     }, {
         # Subtitle with empty content
@@ -956,6 +965,17 @@ class BiliIntlIE(BiliIntlBaseIE):
             'episode_number': 140,
         },
         'skip': 'According to the copyright owner\'s request, you may only watch the video after you log in.'
+    }, {
+        'url': 'https://www.bilibili.tv/en/video/2041863208',
+        'info_dict': {
+            'id': '2041863208',
+            'ext': 'mp4',
+            'timestamp': 1670874843,
+            'description': 'Scheduled for April 2023.\nStudio: ufotable',
+            'thumbnail': r're:https?://pic[-\.]bstarstatic.+/ugc/.+\.jpg$',
+            'upload_date': '20221212',
+            'title': 'Kimetsu no Yaiba Season 3 Official Trailer - Bstation',
+        }
     }, {
         'url': 'https://www.biliintl.com/en/play/34613/341736',
         'only_matching': True,
@@ -989,7 +1009,7 @@ def _extract_video_metadata(self, url, video_id, season_id):
             self._search_json(r'window\.__INITIAL_(?:DATA|STATE)__\s*=', webpage, 'preload state', video_id, default={})
             or self._search_nuxt_data(webpage, video_id, '__initialState', fatal=False, traverse=None))
         video_data = traverse_obj(
-            initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict)
+            initial_data, ('OgvVideo', 'epDetail'), ('UgcVideo', 'videoData'), ('ugc', 'archive'), expected_type=dict) or {}
 
         if season_id and not video_data:
             # Non-Bstation layout, read through episode list
@@ -998,7 +1018,12 @@ def _extract_video_metadata(self, url, video_id, season_id):
                 'sections', ..., 'episodes', lambda _, v: str(v['episode_id']) == video_id
             ), expected_type=dict, get_all=False)
 
-        return self._parse_video_metadata(video_data)
+        # XXX: webpage metadata may not accurate, it just used to not crash when video_data not found
+        return merge_dicts(
+            self._parse_video_metadata(video_data), self._search_json_ld(webpage, video_id), {
+                'title': self._html_search_meta('og:title', webpage),
+                'description': self._html_search_meta('og:description', webpage)
+            })
 
     def _real_extract(self, url):
         season_id, ep_id, aid = self._match_valid_url(url).group('season_id', 'ep_id', 'aid')