[ntvcojp] Extract NUXT data (#1915)

author The Hatsune Daishi <redacted>

Tue, 7 Dec 2021 17:03:48 +0000 (02:03 +0900)

committer GitHub <redacted>

Tue, 7 Dec 2021 17:03:48 +0000 (22:33 +0530)
author The Hatsune Daishi <redacted>
Tue, 7 Dec 2021 17:03:48 +0000 (02:03 +0900)
committer GitHub <redacted>
Tue, 7 Dec 2021 17:03:48 +0000 (22:33 +0530)
diff --git a/yt_dlp/extractor/ntvcojp.py b/yt_dlp/extractor/ntvcojp.py

index 0c8221b223cb1edbf12867ec245f6f7798fa90f2..c9af91188b6a6ed6b6762147ce33f6dafa17fb5c 100644 (file)
--- a/yt_dlp/extractor/ntvcojp.py
+++ b/yt_dlp/extractor/ntvcojp.py
@@ -3,8 +3,9 @@
  
  from .common import InfoExtractor
  from ..utils import (
-    js_to_json,
+    ExtractorError,
      smuggle_url,
+    traverse_obj,
  )
  
  
@@ -19,7 +20,7 @@ class NTVCoJpCUIE(InfoExtractor):
              'ext': 'mp4',
              'title': '桜エビと炒り卵がポイント！ 「中華風 エビチリおにぎり」──『美虎』五十嵐美幸',
              'upload_date': '20181213',
-            'description': 'md5:211b52f4fd60f3e0e72b68b0c6ba52a9',
+            'description': 'md5:1985b51a9abc285df0104d982a325f2a',
              'uploader_id': '3855502814001',
              'timestamp': 1544669941,
          },
@@ -28,22 +29,30 @@ class NTVCoJpCUIE(InfoExtractor):
              'skip_download': True,
          },
      }
+
      BRIGHTCOVE_URL_TEMPLATE = 'http://players.brightcove.net/%s/default_default/index.html?videoId=%s'
  
      def _real_extract(self, url):
          display_id = self._match_id(url)
          webpage = self._download_webpage(url, display_id)
-        player_config = self._parse_json(self._search_regex(
-            r'(?s)PLAYER_CONFIG\s*=\s*({.+?})',
-            webpage, 'player config'), display_id, js_to_json)
-        video_id = player_config['videoId']
-        account_id = player_config.get('account') or '3855502814001'
+        player_config = self._search_nuxt_data(webpage, display_id)
+        video_id = traverse_obj(player_config, ('movie', 'video_id'))
+        if not video_id:
+            raise ExtractorError('Failed to extract video ID for Brightcove')
+        account_id = traverse_obj(player_config, ('player', 'account')) or '3855502814001'
+        title = traverse_obj(player_config, ('movie', 'name'))
+        if not title:
+            og_title = self._og_search_title(webpage, fatal=False) or traverse_obj(player_config, ('player', 'title'))
+            if og_title:
+                title = og_title.split('(', 1)[0].strip()
+        description = (traverse_obj(player_config, ('movie', 'description'))
+                       or self._html_search_meta(['description', 'og:description'], webpage))
          return {
              '_type': 'url_transparent',
              'id': video_id,
              'display_id': display_id,
-            'title': self._search_regex(r'<h1[^>]+class="title"[^>]*>([^<]+)', webpage, 'title').strip(),
-            'description': self._html_search_meta(['description', 'og:description'], webpage),
+            'title': title,
+            'description': description,
              'url': smuggle_url(self.BRIGHTCOVE_URL_TEMPLATE % (account_id, video_id), {'geo_countries': ['JP']}),
              'ie_key': 'BrightcoveNew',
          }
author	The Hatsune Daishi <redacted>
	Tue, 7 Dec 2021 17:03:48 +0000 (02:03 +0900)
committer	GitHub <redacted>
	Tue, 7 Dec 2021 17:03:48 +0000 (22:33 +0530)