]> jfr.im git - yt-dlp.git/commitdiff
[youtube:tab] Show unavailable videos in playlists (#242)
authorcoletdjnz <redacted>
Fri, 16 Apr 2021 22:39:08 +0000 (22:39 +0000)
committerGitHub <redacted>
Fri, 16 Apr 2021 22:39:08 +0000 (04:09 +0530)
Closes #231

Authored by: colethedj

yt_dlp/extractor/youtube.py

index 2d8aa8b7c22fbbbada3dafb1fb9d7958c98e3a1b..547f5b171dd9e55a36e348cf5d2f7c9fd2292116 100644 (file)
@@ -29,6 +29,7 @@
     clean_html,
     dict_get,
     datetime_from_str,
+    error_to_compat_str,
     ExtractorError,
     format_field,
     float_or_none,
@@ -2628,6 +2629,19 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
             'uploader_id': 'UCXw-G3eDE9trcvY2sBMM_aA',
         },
         'playlist_mincount': 21,
+    }, {
+        'note': 'Playlist with "show unavailable videos" button',
+        'url': 'https://www.youtube.com/playlist?list=UUTYLiWFZy8xtPwxFwX9rV7Q',
+        'info_dict': {
+            'title': 'Uploads from Phim Siêu Nhân Nhật Bản',
+            'id': 'UUTYLiWFZy8xtPwxFwX9rV7Q',
+            'uploader': 'Phim Siêu Nhân Nhật Bản',
+            'uploader_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
+        },
+        'playlist_mincount': 1400,
+        'expected_warnings': [
+            'YouTube said: INFO - Unavailable videos are hidden',
+        ]
     }, {
         # https://github.com/ytdl-org/youtube-dl/issues/21844
         'url': 'https://www.youtube.com/playlist?list=PLzH6n4zXuckpfMu_4Ff8E7Z1behQks5ba',
@@ -3293,8 +3307,49 @@ def _real_extract_alerts():
         if errors:
             raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected)
 
+    def _reload_with_unavailable_videos(self, item_id, data, webpage):
+        """
+        Get playlist with unavailable videos if the 'show unavailable videos' button exists.
+        """
+        sidebar_renderer = try_get(
+            data, lambda x: x['sidebar']['playlistSidebarRenderer']['items'], list) or []
+        for item in sidebar_renderer:
+            if not isinstance(item, dict):
+                continue
+            renderer = item.get('playlistSidebarPrimaryInfoRenderer')
+            menu_renderer = try_get(
+                renderer, lambda x: x['menu']['menuRenderer']['items'], list) or []
+            for menu_item in menu_renderer:
+                if not isinstance(menu_item, dict):
+                    continue
+                nav_item_renderer = menu_item.get('menuNavigationItemRenderer')
+                text = try_get(
+                    nav_item_renderer, lambda x: x['text']['simpleText'], compat_str)
+                if not text or text.lower() != 'show unavailable videos':
+                    continue
+                browse_endpoint = try_get(
+                    nav_item_renderer, lambda x: x['navigationEndpoint']['browseEndpoint'], dict) or {}
+                browse_id = browse_endpoint.get('browseId')
+                params = browse_endpoint.get('params')
+                if not browse_id or not params:
+                    return
+                ytcfg = self._extract_ytcfg(item_id, webpage)
+                headers = self._generate_api_headers(
+                    ytcfg, account_syncid=self._extract_account_syncid(ytcfg),
+                    identity_token=self._extract_identity_token(webpage, item_id=item_id),
+                    visitor_data=try_get(
+                        self._extract_context(ytcfg), lambda x: x['client']['visitorData'], compat_str))
+                query = {
+                    'params': params,
+                    'browseId': browse_id
+                }
+                return self._extract_response(
+                    item_id=item_id, headers=headers, query=query,
+                    check_get_keys='contents', fatal=False,
+                    note='Downloading API JSON with unavailable videos')
+
     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
-                          ytcfg=None, check_get_keys=None, ep='browse'):
+                          ytcfg=None, check_get_keys=None, ep='browse', fatal=True):
         response = None
         last_error = None
         count = -1
@@ -3308,8 +3363,7 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
             try:
                 response = self._call_api(
                     ep=ep, fatal=True, headers=headers,
-                    video_id=item_id,
-                    query=query,
+                    video_id=item_id, query=query,
                     context=self._extract_context(ytcfg),
                     api_key=self._extract_api_key(ytcfg),
                     note='%s%s' % (note, ' (retry #%d)' % count if count else ''))
@@ -3320,7 +3374,12 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                     last_error = 'HTTP Error %s' % e.cause.code
                     if count < retries:
                         continue
-                raise
+                if fatal:
+                    raise
+                else:
+                    self.report_warning(error_to_compat_str(e))
+                    return
+
             else:
                 # Youtube may send alerts if there was an issue with the continuation page
                 self._extract_alerts(response, expected=False)
@@ -3330,7 +3389,11 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                 # See: https://github.com/ytdl-org/youtube-dl/issues/28194
                 last_error = 'Incomplete data received'
                 if count >= retries:
-                    self._downloader.report_error(last_error)
+                    if fatal:
+                        raise ExtractorError(last_error)
+                    else:
+                        self.report_warning(last_error)
+                        return
         return response
 
     def _extract_webpage(self, url, item_id):
@@ -3389,6 +3452,9 @@ def _real_extract(self, url):
 
         webpage, data = self._extract_webpage(url, item_id)
 
+        # YouTube sometimes provides a button to reload playlist with unavailable videos.
+        data = self._reload_with_unavailable_videos(item_id, data, webpage) or data
+
         tabs = try_get(
             data, lambda x: x['contents']['twoColumnBrowseResultsRenderer']['tabs'], list)
         if tabs: