]> jfr.im git - yt-dlp.git/commitdiff
[extractor/youtube] Detect and break on looping comments (#6301)
authorcoletdjnz <redacted>
Wed, 1 Mar 2023 07:56:53 +0000 (07:56 +0000)
committerGitHub <redacted>
Wed, 1 Mar 2023 07:56:53 +0000 (07:56 +0000)
Fixes https://github.com/yt-dlp/yt-dlp/issues/6290

Authored by: coletdjnz

yt_dlp/extractor/youtube.py

index 44e9322937b5759b4dbb1e3246dc356ee3b68701..b02e0153af76d00133e709a6b6d7929635f31bd3 100644 (file)
@@ -3341,6 +3341,13 @@ def extract_thread(contents):
                 comment = self._extract_comment(comment_renderer, parent)
                 if not comment:
                     continue
+                # Sometimes YouTube may break and give us infinite looping comments.
+                # See: https://github.com/yt-dlp/yt-dlp/issues/6290
+                if comment['id'] in tracker['seen_comment_ids']:
+                    self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+                    yield
+                else:
+                    tracker['seen_comment_ids'].add(comment['id'])
 
                 tracker['running_total'] += 1
                 tracker['total_reply_comments' if parent else 'total_parent_comments'] += 1
@@ -3365,7 +3372,8 @@ def extract_thread(contents):
                 est_total=0,
                 current_page_thread=0,
                 total_parent_comments=0,
-                total_reply_comments=0)
+                total_reply_comments=0,
+                seen_comment_ids=set())
 
         # TODO: Deprecated
         # YouTube comments have a max depth of 2