Revert 39abae23546160fa98ac2b0c91e3d69fa965b573

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 023d8fd8c162e2d56aa154776712210942c76183..adbac8e955d85e767f3d3a4bfb8e0c3e3262a93c 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -902,7 +902,7 @@ def extract_relative_time(relative_time_text):
          e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
          """
  
-        # XXX: this could be moved to a general function in utils.py
+        # XXX: this could be moved to a general function in utils/_utils.py
          # The relative time text strings are roughly the same as what
          # Javascript's Intl.RelativeTimeFormat function generates.
          # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
@@ -941,7 +941,16 @@ def _parse_time_text(self, text):
      def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                            ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
                            default_client='web'):
-        for retry in self.RetryManager():
+        raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+        # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+        icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+        icd_rm = next(icd_retries)
+        main_retries = iter(self.RetryManager())
+        main_rm = next(main_retries)
+        # Manual retry loop for multiple RetryManagers
+        # The proper RetryManager MUST be advanced after an error
+        # and its result MUST be checked if the manager is non fatal
+        while True:
              try:
                  response = self._call_api(
                      ep=ep, fatal=True, headers=headers,
@@ -953,7 +962,8 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                  if not isinstance(e.cause, network_exceptions):
                      return self._error_or_warning(e, fatal=fatal)
                  elif not isinstance(e.cause, HTTPError):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                      continue
  
                  first_bytes = e.cause.response.read(512)
@@ -965,27 +975,32 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
                      if yt_error:
                          self._report_alerts([('ERROR', yt_error)], fatal=False)
                  # Downloading page may result in intermittent 5xx HTTP error
-                # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+                # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                  # We also want to catch all other network exceptions since errors in later pages can be troublesome
                  # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
                  if e.cause.status not in (403, 429):
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                      continue
                  return self._error_or_warning(e, fatal=fatal)
  
              try:
                  self._extract_and_report_alerts(response, only_once=True)
              except ExtractorError as e:
-                # YouTube servers may return errors we want to retry on in a 200 OK response
+                # YouTube's servers may return errors we want to retry on in a 200 OK response
                  # See: https://github.com/yt-dlp/yt-dlp/issues/839
                  if 'unknown error' in e.msg.lower():
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                      continue
                  return self._error_or_warning(e, fatal=fatal)
              # Youtube sometimes sends incomplete data
              # See: https://github.com/ytdl-org/youtube-dl/issues/28194
              if not traverse_obj(response, *variadic(check_get_keys)):
-                retry.error = ExtractorError('Incomplete data received', expected=True)
+                icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+                should_retry = next(icd_retries, None)
+                if not should_retry:
+                    return None
                  continue
  
              return response
@@ -3280,16 +3295,15 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                            chapter_time, chapter_title, duration)
              for contents in content_list)), [])
  
-    def _extract_heatmap_from_player_overlay(self, data):
-        content_list = traverse_obj(data, (
-            'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
-            'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
-        return next(filter(None, (
-            traverse_obj(contents, (..., 'heatMarkerRenderer', {
-                'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
-                'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
-                'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
-            })) for contents in content_list)), None)
+    def _extract_heatmap(self, data):
+        return traverse_obj(data, (
+            'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+            lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+            'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+                'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+                'value': ('intensityScoreNormalized', {float_or_none}),
+            })) or None
  
      def _extract_comment(self, comment_renderer, parent=None):
          comment_id = comment_renderer.get('commentId')
@@ -4423,7 +4437,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
                  or self._extract_chapters_from_description(video_description, duration)
                  or None)
  
-            info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+            info['heatmap'] = self._extract_heatmap(initial_data)
  
          contents = traverse_obj(
              initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
@@ -4546,6 +4560,14 @@ def process_language(container, base_url, lang_code, sub_name, query):
                  self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
          info['upload_date'] = upload_date
  
+        if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'):
+            # Newly uploaded videos' HLS formats are potentially problematic and need to be checked
+            upload_datetime = datetime_from_str(upload_date).replace(tzinfo=datetime.timezone.utc)
+            if upload_datetime >= datetime_from_str('today-1day'):
+                for fmt in info['formats']:
+                    if fmt.get('protocol') == 'm3u8_native':
+                        fmt['__needs_testing'] = True
+
          for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
              v = info.get(s_k)
              if v: