]> jfr.im git - yt-dlp.git/commitdiff
[extractor/youtube] Extract `heatmap` data (#7100)
authorAudrey <redacted>
Fri, 26 May 2023 12:24:39 +0000 (08:24 -0400)
committerGitHub <redacted>
Fri, 26 May 2023 12:24:39 +0000 (17:54 +0530)
Closes #3888
Authored by: tntmod54321

yt_dlp/extractor/common.py
yt_dlp/extractor/youtube.py

index 78288f8091c95458479d8241d2b35a4afea6a657..1b1dd560fd85f2dd1a7b077be936123b63369379 100644 (file)
@@ -350,6 +350,10 @@ class InfoExtractor:
                         * "start_time" - The start time of the chapter in seconds
                         * "end_time" - The end time of the chapter in seconds
                         * "title" (optional, string)
+    heatmap:        A list of dictionaries, with the following entries:
+                        * "start_time" - The start time of the data point in seconds
+                        * "end_time" - The end time of the data point in seconds
+                        * "value" - The normalized value of the data point (float between 0 and 1)
     playable_in_embed: Whether this video is allowed to play in embedded
                     players on other sites. Can be True (=always allowed),
                     False (=never allowed), None (=unknown), or a string
index 654bf5e6b63d2fc7473b13fd7b75dbfc0730ed19..80edcd77dac8094976da18e6bf5d7acc1ebf5106 100644 (file)
@@ -1273,6 +1273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'Philipp Hagemeister',
                 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                 'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
             }
         },
         {
@@ -1426,6 +1427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'uploader': 'FlyingKitty',
                 'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
                 'uploader_id': '@FlyingKitty900',
+                'comment_count': int,
             },
         },
         {
@@ -3244,6 +3246,17 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                           chapter_time, chapter_title, duration)
             for contents in content_list)), [])
 
+    def _extract_heatmap_from_player_overlay(self, data):
+        content_list = traverse_obj(data, (
+            'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
+            'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
+        return next(filter(None, (
+            traverse_obj(contents, (..., 'heatMarkerRenderer', {
+                'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
+                'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
+            })) for contents in content_list)), None)
+
     def _extract_comment(self, comment_renderer, parent=None):
         comment_id = comment_renderer.get('commentId')
         if not comment_id:
@@ -4313,6 +4326,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
                 or self._extract_chapters_from_description(video_description, duration)
                 or None)
 
+            info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+
         contents = traverse_obj(
             initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
             expected_type=list, default=[])