]> jfr.im git - yt-dlp.git/commitdiff
[ie/youtube] Raise a warning for `Incomplete Data` instead of an error (#8238)
authorcoletdjnz <redacted>
Tue, 3 Oct 2023 06:42:30 +0000 (19:42 +1300)
committerGitHub <redacted>
Tue, 3 Oct 2023 06:42:30 +0000 (06:42 +0000)
Closes https://github.com/yt-dlp/yt-dlp/issues/8206

Adds `raise_incomplete_data` extractor arg to revert this behaviour and raise an error.

Authored by: coletdjnz
Co-authored-by: Simon Sawicki <redacted>
README.md
yt_dlp/extractor/youtube.py

index 7bf44657219d9762b2995c62c53c1fbe742cf14b..a0b69c9a1a16ed6817914e487debd9fe4c18356e 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1809,6 +1809,7 @@ #### youtube
 * `formats`: Change the types of formats to return. `dashy` (convert HTTP to DASH), `duplicate` (identical content but different URLs or protocol; includes `dashy`), `incomplete` (cannot be downloaded completely - live dash and post-live m3u8)
 * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
 * `innertube_key`: Innertube API key to use for all API requests
+* `raise_incomplete_data`: `Incomplete Data Received` raises an error instead of reporting a warning
 
 #### youtubetab (YouTube playlists, channels, feeds, etc.)
 * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
index a39d17cf114b99e38399d525010761d020815b4b..7e13aa77979d8fad502fc24f49d6adf13a99da3e 100644 (file)
@@ -941,7 +941,13 @@ def _parse_time_text(self, text):
     def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                           ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
                           default_client='web'):
-        for retry in self.RetryManager():
+        raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+        # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+        icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+        icd_rm = next(icd_retries)
+        main_retries = iter(self.RetryManager())
+        main_rm = next(main_retries)
+        for _ in range(main_rm.retries + icd_rm.retries + 1):
             try:
                 response = self._call_api(
                     ep=ep, fatal=True, headers=headers,
@@ -953,7 +959,8 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                 if not isinstance(e.cause, network_exceptions):
                     return self._error_or_warning(e, fatal=fatal)
                 elif not isinstance(e.cause, HTTPError):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
 
                 first_bytes = e.cause.response.read(512)
@@ -965,27 +972,32 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                     if yt_error:
                         self._report_alerts([('ERROR', yt_error)], fatal=False)
                 # Downloading page may result in intermittent 5xx HTTP error
-                # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+                # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                 # We also want to catch all other network exceptions since errors in later pages can be troublesome
                 # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
                 if e.cause.status not in (403, 429):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
                 return self._error_or_warning(e, fatal=fatal)
 
             try:
                 self._extract_and_report_alerts(response, only_once=True)
             except ExtractorError as e:
-                # YouTube servers may return errors we want to retry on in a 200 OK response
+                # YouTube's servers may return errors we want to retry on in a 200 OK response
                 # See: https://github.com/yt-dlp/yt-dlp/issues/839
                 if 'unknown error' in e.msg.lower():
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                     continue
                 return self._error_or_warning(e, fatal=fatal)
             # Youtube sometimes sends incomplete data
             # See: https://github.com/ytdl-org/youtube-dl/issues/28194
             if not traverse_obj(response, *variadic(check_get_keys)):
-                retry.error = ExtractorError('Incomplete data received', expected=True)
+                icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+                should_retry = next(icd_retries, None)
+                if not should_retry:
+                    return None
                 continue
 
             return response