]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/extractor/youtube.py
[extractor/youtube] Extract `heatmap` data (#7100)
[yt-dlp.git] / yt_dlp / extractor / youtube.py
index 654bf5e6b63d2fc7473b13fd7b75dbfc0730ed19..80edcd77dac8094976da18e6bf5d7acc1ebf5106 100644 (file)
@@ -1273,6 +1273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                 'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
             }
         },
         {
@@ -1426,6 +1427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'FlyingKitty',
                 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
                 'uploader_id': '@FlyingKitty900',
+                'comment_count': int,
             },
         },
         {
@@ -3244,6 +3246,17 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                           chapter_time, chapter_title, duration)
             for contents in content_list)), [])
 
+    def _extract_heatmap_from_player_overlay(self, data):
+        content_list = traverse_obj(data, (
+            'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
+            'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
+        return next(filter(None, (
+            traverse_obj(contents, (..., 'heatMarkerRenderer', {
+                'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
+                'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
+            })) for contents in content_list)), None)
+
     def _extract_comment(self, comment_renderer, parent=None):
         comment_id = comment_renderer.get('commentId')
         if not comment_id:
@@ -4313,6 +4326,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
                 or self._extract_chapters_from_description(video_description, duration)
                 or None)
 
+            info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+
         contents = traverse_obj(
             initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
             expected_type=list, default=[])