[Youtube] Fix private feeds/playlists on multi-channel accounts (#143)

author Matthew <redacted>

Fri, 5 Mar 2021 13:59:14 +0000 (13:59 +0000)

committer GitHub <redacted>

Fri, 5 Mar 2021 13:59:14 +0000 (19:29 +0530)
author Matthew <redacted>
Fri, 5 Mar 2021 13:59:14 +0000 (13:59 +0000)
committer GitHub <redacted>
Fri, 5 Mar 2021 13:59:14 +0000 (19:29 +0530)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index f9323d2925d80415966980fa47ebd22a1f712417..3a56f2a421514ae233186bc878e0624ad26d4121 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -301,7 +301,6 @@ def _call_api(self, ep, query, video_id, fatal=True, headers=None,
          auth = self._generate_sapisidhash_header()
          if auth is not None:
              headers.update({'Authorization': auth, 'X-Origin': 'https://www.youtube.com'})
-
          return self._download_json(
              'https://www.youtube.com/youtubei/v1/%s' % ep,
              video_id=video_id, fatal=fatal, note=note, errnote=errnote,
@@ -2704,7 +2703,7 @@ def _extract_continuation(cls, renderer):
              ctp = continuation_ep.get('clickTrackingParams')
              return YoutubeTabIE._build_continuation_query(continuation, ctp)
  
-    def _entries(self, tab, identity_token, item_id):
+    def _entries(self, tab, item_id, identity_token, account_syncid):
  
          def extract_entries(parent_renderer):  # this needs to called again for continuation to work with feeds
              contents = try_get(parent_renderer, lambda x: x['contents'], list) or []
@@ -2764,6 +2763,10 @@ def extract_entries(parent_renderer):  # this needs to called again for continua
          if identity_token:
              headers['x-youtube-identity-token'] = identity_token
  
+        if account_syncid:
+            headers['X-Goog-PageId'] = account_syncid
+            headers['X-Goog-AuthUser'] = 0
+
          for page_num in itertools.count(1):
              if not continuation:
                  break
@@ -2883,7 +2886,7 @@ def _extract_uploader(data):
                          try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], compat_str))
          return {k: v for k, v in uploader.items() if v is not None}
  
-    def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
+    def _extract_from_tabs(self, item_id, webpage, data, tabs):
          playlist_id = title = description = channel_url = channel_name = channel_id = None
          thumbnails_list = tags = []
  
@@ -2947,7 +2950,10 @@ def _extract_from_tabs(self, item_id, webpage, data, tabs, identity_token):
              'channel_id': metadata['uploader_id'],
              'channel_url': metadata['uploader_url']})
          return self.playlist_result(
-            self._entries(selected_tab, identity_token, playlist_id),
+            self._entries(
+                selected_tab, playlist_id,
+                self._extract_identity_token(webpage, item_id),
+                self._extract_account_syncid(data)),
              **metadata)
  
      def _extract_mix_playlist(self, playlist, playlist_id):
@@ -3026,6 +3032,17 @@ def _extract_identity_token(self, webpage, item_id):
              r'\bID_TOKEN["\']\s*:\s*["\'](.+?)["\']', webpage,
              'identity token', default=None)
  
+    @staticmethod
+    def _extract_account_syncid(data):
+        """Extract syncId required to download private playlists of secondary channels"""
+        sync_ids = (
+            try_get(data, lambda x: x['responseContext']['mainAppWebResponseContext']['datasyncId'], compat_str)
+            or '').split("||")
+        if len(sync_ids) >= 2 and sync_ids[1]:
+            # datasyncid is of the form "channel_syncid||user_syncid" for secondary channel
+            # and just "user_syncid||" for primary channel. We only want the channel_syncid
+            return sync_ids[0]
+
      def _extract_webpage(self, url, item_id):
          retries = self._downloader.params.get('extractor_retries', 3)
          count = -1
@@ -3085,8 +3102,7 @@ def _real_extract(self, url):
          tabs = try_get(
              data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
          if tabs:
-            identity_token = self._extract_identity_token(webpage, item_id)
-            return self._extract_from_tabs(item_id, webpage, data, tabs, identity_token)
+            return self._extract_from_tabs(item_id, webpage, data, tabs)
  
          playlist = try_get(
              data, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict)
author	Matthew <redacted>
	Fri, 5 Mar 2021 13:59:14 +0000 (13:59 +0000)
committer	GitHub <redacted>
	Fri, 5 Mar 2021 13:59:14 +0000 (19:29 +0530)