]> jfr.im git - yt-dlp.git/commitdiff
[extractor/youtube] Ignore incomplete data error for comment replies (#5490)
authorMatthew <redacted>
Thu, 10 Nov 2022 06:35:22 +0000 (19:35 +1300)
committerGitHub <redacted>
Thu, 10 Nov 2022 06:35:22 +0000 (06:35 +0000)
When --ignore-errors is used.
Closes https://github.com/yt-dlp/yt-dlp/issues/4669
Authored by: coletdjnz

yt_dlp/extractor/youtube.py

index 5b7c94c4ec83a63ec9e7309565ec1608847eeae4..5b39f976518b6ab6ee4ecc0836f14b94c0397964 100644 (file)
@@ -3237,11 +3237,21 @@ def extract_thread(contents):
                 note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
                     '       ' if parent else '', ' replies' if parent else '',
                     page_num, comment_prog_str)
-
-            response = self._extract_response(
-                item_id=None, query=continuation,
-                ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
-                check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+            try:
+                response = self._extract_response(
+                    item_id=None, query=continuation,
+                    ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
+                    check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+            except ExtractorError as e:
+                # Ignore incomplete data error for replies if retries didn't work.
+                # This is to allow any other parent comments and comment threads to be downloaded.
+                # See: https://github.com/yt-dlp/yt-dlp/issues/4669
+                if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
+                    self.report_warning(
+                        'Received incomplete data for a comment reply thread and retrying did not help. '
+                        'Ignoring to let other comments be downloaded.')
+                else:
+                    raise
             is_forced_continuation = False
             continuation_contents = traverse_obj(
                 response, 'onResponseReceivedEndpoints', expected_type=list, default=[])