[youtube] Fix error reporting of "Incomplete data"

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index ef289e48cef2505e85783e5b5180e77f28d17ac8..12634483e643bac8b4677b124729af7581c9d586 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -809,7 +809,7 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
              # Youtube sometimes sends incomplete data
              # See: https://github.com/ytdl-org/youtube-dl/issues/28194
              if not traverse_obj(response, *variadic(check_get_keys)):
-                retry.error = ExtractorError('Incomplete data received')
+                retry.error = ExtractorError('Incomplete data received', expected=True)
                  continue
  
              return response
@@ -3168,7 +3168,7 @@ def append_client(*client_names):
  
      def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
          itags, stream_ids = {}, []
-        itag_qualities, res_qualities = {}, {}
+        itag_qualities, res_qualities = {}, {0: -1}
          q = qualities([
              # Normally tiny is the smallest video-only formats. But
              # audio-only formats with unknown quality may get tagged as tiny
@@ -3247,9 +3247,9 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i
                  else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
                  else -1)
              # Some formats may have much smaller duration than others (possibly damaged during encoding)
-            # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+            # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
              # Make sure to avoid false positives with small duration differences.
-            # Eg: __2ABJjxzNo, ySuUZEjARPY
+            # E.g. __2ABJjxzNo, ySuUZEjARPY
              is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
              if is_damaged:
                  self.report_warning(
@@ -3320,10 +3320,9 @@ def process_manifest_format(f, proto, itag):
                  f['format_id'] = itag
                  itags[itag] = proto
  
-            f['quality'] = next((
-                q(qdict[val])
-                for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
-                if val in qdict), -1)
+            f['quality'] = itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)
+            if f['quality'] == -1 and f.get('height'):
+                f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
              return True
  
          subtitles = {}
@@ -5834,7 +5833,7 @@ def _real_extract(self, url):
  
  
  class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
-    IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
+    IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
      IE_NAME = 'youtube:music:search_url'
      _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
      _TESTS = [{