[youtube, cleanup] Minor refactoring

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 031aa35a14e4ac152075081cfe664fcfe55f807b..4ee09ad9a25abb94656d64b84814bc0e13c28733 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -384,6 +384,9 @@ def _initialize_pref(self):
      def _real_initialize(self):
          self._initialize_pref()
          self._initialize_consent()
+        self._check_login_required()
+
+    def _check_login_required(self):
          if (self._LOGIN_REQUIRED
                  and self.get_param('cookiefile') is None
                  and self.get_param('cookiesfrombrowser') is None):
@@ -563,6 +566,18 @@ def generate_api_headers(
              headers['X-Origin'] = origin
          return {h: v for h, v in headers.items() if v is not None}
  
+    def _download_ytcfg(self, client, video_id):
+        url = {
+            'web': 'https://www.youtube.com',
+            'web_music': 'https://music.youtube.com',
+            'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
+        }.get(client)
+        if not url:
+            return {}
+        webpage = self._download_webpage(
+            url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config')
+        return self.extract_ytcfg(video_id, webpage) or {}
+
      @staticmethod
      def _build_api_continuation_query(continuation, ctp=None):
          query = {
@@ -728,6 +743,7 @@ def extract_relative_time(relative_time_text):
                  return None
  
      def _extract_time_text(self, renderer, *path_list):
+        """@returns (timestamp, time_text)"""
          text = self._get_text(renderer, *path_list) or ''
          dt = self.extract_relative_time(text)
          timestamp = None
@@ -2959,16 +2975,6 @@ def _get_requested_clients(self, url, smuggled_data):
  
          return orderedSet(requested_clients)
  
-    def _extract_player_ytcfg(self, client, video_id):
-        url = {
-            'web_music': 'https://music.youtube.com',
-            'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1'
-        }.get(client)
-        if not url:
-            return {}
-        webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip())
-        return self.extract_ytcfg(video_id, webpage) or {}
-
      def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
          initial_pr = None
          if webpage:
@@ -3005,8 +3011,8 @@ def append_client(*client_names):
          while clients:
              client, base_client, variant = _split_innertube_client(clients.pop())
              player_ytcfg = master_ytcfg if client == 'web' else {}
-            if 'configs' not in self._configuration_arg('player_skip'):
-                player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg
+            if 'configs' not in self._configuration_arg('player_skip') and client != 'web':
+                player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg
  
              player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage)
              require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER')
@@ -4347,6 +4353,10 @@ def _reload_with_unavailable_videos(self, item_id, data, ytcfg):
              check_get_keys='contents', fatal=False, ytcfg=ytcfg,
              note='Downloading API JSON with unavailable videos')
  
+    @property
+    def skip_webpage(self):
+        return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key())
+
      def _extract_webpage(self, url, item_id, fatal=True):
          retries = self.get_param('extractor_retries', 3)
          count = -1
@@ -4393,9 +4403,21 @@ def _extract_webpage(self, url, item_id, fatal=True):
  
          return webpage, data
  
+    def _report_playlist_authcheck(self, ytcfg, fatal=True):
+        """Use if failed to extract ytcfg (and data) from initial webpage"""
+        if not ytcfg and self.is_authenticated:
+            msg = 'Playlists that require authentication may not extract correctly without a successful webpage download'
+            if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal:
+                raise ExtractorError(
+                    f'{msg}. If you are not downloading private content, or '
+                    'your cookies are only for the first account and channel,'
+                    ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
+                    expected=True)
+            self.report_warning(msg, only_once=True)
+
      def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'):
          data = None
-        if 'webpage' not in self._configuration_arg('skip'):
+        if not self.skip_webpage:
              webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal)
              ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage)
              # Reject webpage data if redirected to home page without explicitly requesting
@@ -4409,14 +4431,7 @@ def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=Fals
                      raise ExtractorError(msg, expected=True)
                  self.report_warning(msg, only_once=True)
          if not data:
-            if not ytcfg and self.is_authenticated:
-                msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.'
-                if 'authcheck' not in self._configuration_arg('skip') and fatal:
-                    raise ExtractorError(
-                        msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,'
-                              ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check',
-                        expected=True)
-                self.report_warning(msg, only_once=True)
+            self._report_playlist_authcheck(ytcfg, fatal=fatal)
              data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client)
          return data, ytcfg
  
@@ -4454,14 +4469,20 @@ def _search_results(self, query, params=NO_DEFAULT, default_client='web'):
              ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'),
              ('continuationContents', ),
          )
+        display_id = f'query "{query}"'
          check_get_keys = tuple(set(keys[0] for keys in content_keys))
+        ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {}
+        self._report_playlist_authcheck(ytcfg, fatal=False)
  
          continuation_list = [None]
+        search = None
          for page_num in itertools.count(1):
              data.update(continuation_list[0] or {})
+            headers = self.generate_api_headers(
+                ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client)
              search = self._extract_response(
-                item_id='query "%s" page %s' % (query, page_num), ep='search', query=data,
-                default_client=default_client, check_get_keys=check_get_keys)
+                item_id=f'{display_id} page {page_num}', ep='search', query=data,
+                default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers)
              slr_contents = traverse_obj(search, *content_keys)
              yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list)
              if not continuation_list[0]:
@@ -5634,7 +5655,9 @@ class YoutubeFeedsInfoExtractor(InfoExtractor):
      Subclasses must define the _FEED_NAME property.
      """
      _LOGIN_REQUIRED = True
-    _TESTS = []
+
+    def _real_initialize(self):
+        YoutubeBaseInfoExtractor._check_login_required(self)
  
      @property
      def IE_NAME(self):