]> jfr.im git - yt-dlp.git/blobdiff - youtube_dlc/extractor/youtube.py
Readme changes
[yt-dlp.git] / youtube_dlc / extractor / youtube.py
index d23c503ad29ff02458f81442968287b7fe709d96..e0f211b741f712b1e1d9b394a54233385e67dc50 100644 (file)
@@ -64,7 +64,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
     _TFA_URL = 'https://accounts.google.com/_/signin/challenge?hl=en&TL={0}'
 
     _RESERVED_NAMES = (
-        r'course|embed|channel|c|user|playlist|watch|w|results|storefront|'
+        r'course|embed|channel|c|user|playlist|watch|w|results|storefront|oops|'
         r'shared|index|account|reporthistory|t/terms|about|upload|signin|logout|'
         r'feed/(watch_later|history|subscriptions|library|trending|recommended)')
 
@@ -506,7 +506,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         '396': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
         '397': {'acodec': 'none', 'vcodec': 'av01.0.05M.08'},
     }
-    _SUBTITLE_FORMATS = ('json3', 'srv1', 'srv2', 'srv3', 'ttml', 'vtt')
+    _SUBTITLE_FORMATS = ('srv1', 'srv2', 'srv3', 'ttml', 'vtt')  # TODO 'json3' raising issues with automatic captions
 
     _GEO_BYPASS = False
 
@@ -3257,7 +3257,7 @@ def _real_extract(self, url):
         video_id = qs.get('v', [None])[0]
         playlist_id = qs.get('list', [None])[0]
 
-        if is_home.group('not_channel').startswith('watch') and not video_id:
+        if is_home.group('not_channel') is not None and is_home.group('not_channel').startswith('watch') and not video_id:
             if playlist_id:
                 self._downloader.report_warning('%s is not a valid Youtube URL. Trying to download playlist %s' % (url, playlist_id))
                 url = 'https://www.youtube.com/playlist?list=%s' % playlist_id
@@ -3469,10 +3469,33 @@ def _entries(self, query, n):
                 list)
             if not slr_contents:
                 break
-            isr_contents = try_get(
-                slr_contents,
-                lambda x: x[0]['itemSectionRenderer']['contents'],
-                list)
+
+            isr_contents = []
+            continuation_token = None
+            # Youtube sometimes adds promoted content to searches,
+            # changing the index location of videos and token.
+            # So we search through all entries till we find them.
+            for index, isr in enumerate(slr_contents):
+                if not isr_contents:
+                    isr_contents = try_get(
+                        slr_contents,
+                        (lambda x: x[index]['itemSectionRenderer']['contents']),
+                        list)
+                    for content in isr_contents:
+                        if content.get('videoRenderer') is not None:
+                            break
+                    else:
+                        isr_contents = []
+
+                if continuation_token is None:
+                    continuation_token = try_get(
+                        slr_contents,
+                        lambda x: x[index]['continuationItemRenderer']['continuationEndpoint']['continuationCommand'][
+                            'token'],
+                        compat_str)
+                if continuation_token is not None and isr_contents:
+                    break
+
             if not isr_contents:
                 break
             for content in isr_contents:
@@ -3506,13 +3529,9 @@ def _entries(self, query, n):
                 }
                 if total == n:
                     return
-            token = try_get(
-                slr_contents,
-                lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
-                compat_str)
-            if not token:
+            if not continuation_token:
                 break
-            data['continuation'] = token
+            data['continuation'] = continuation_token
 
     def _get_n_results(self, query, n):
         """Get a specified number of results for a query"""