X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/378ae9f9fb8e8c86e6ac89c4c5b815b48ce93620..4ce2f29a50fcfb9920e6f2ffe42192945a2bad7e:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index 023d8fd8c..ac28ed7d2 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -902,7 +902,7 @@ def extract_relative_time(relative_time_text): e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' """ - # XXX: this could be moved to a general function in utils.py + # XXX: this could be moved to a general function in utils/_utils.py # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat @@ -941,7 +941,16 @@ def _parse_time_text(self, text): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - for retry in self.RetryManager(): + raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) + # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal. + icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete)) + icd_rm = next(icd_retries) + main_retries = iter(self.RetryManager()) + main_rm = next(main_retries) + # Manual retry loop for multiple RetryManagers + # The proper RetryManager MUST be advanced after an error + # and its result MUST be checked if the manager is non fatal + while True: try: response = self._call_api( ep=ep, fatal=True, headers=headers, @@ -953,7 +962,8 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers if not isinstance(e.cause, network_exceptions): return self._error_or_warning(e, fatal=fatal) elif not isinstance(e.cause, HTTPError): - retry.error = e + main_rm.error = e + next(main_retries) continue first_bytes = e.cause.response.read(512) @@ -965,27 +975,32 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers if yt_error: self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 if e.cause.status not in (403, 429): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) try: self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response + # YouTube's servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 if 'unknown error' in e.msg.lower(): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) # Youtube sometimes sends incomplete data # See: https://github.com/ytdl-org/youtube-dl/issues/28194 if not traverse_obj(response, *variadic(check_get_keys)): - retry.error = ExtractorError('Incomplete data received', expected=True) + icd_rm.error = ExtractorError('Incomplete data received', expected=True) + should_retry = next(icd_retries, None) + if not should_retry: + return None continue return response @@ -3280,16 +3295,15 @@ def _extract_chapters_from_engagement_panel(self, data, duration): chapter_time, chapter_title, duration) for contents in content_list)), []) - def _extract_heatmap_from_player_overlay(self, data): - content_list = traverse_obj(data, ( - 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar', - 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list})) - return next(filter(None, ( - traverse_obj(contents, (..., 'heatMarkerRenderer', { - 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}), - 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000}, - 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}), - })) for contents in content_list)), None) + def _extract_heatmap(self, data): + return traverse_obj(data, ( + 'frameworkUpdates', 'entityBatchUpdate', 'mutations', + lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP', + 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., { + 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}), + 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000}, + 'value': ('intensityScoreNormalized', {float_or_none}), + })) or None def _extract_comment(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') @@ -4423,7 +4437,7 @@ def process_language(container, base_url, lang_code, sub_name, query): or self._extract_chapters_from_description(video_description, duration) or None) - info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data) + info['heatmap'] = self._extract_heatmap(initial_data) contents = traverse_obj( initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),