[extractor/youtube] Extract `heatmap` data (#7100)

author Audrey <redacted>

Fri, 26 May 2023 12:24:39 +0000 (08:24 -0400)

committer GitHub <redacted>

Fri, 26 May 2023 12:24:39 +0000 (17:54 +0530)
author Audrey <redacted>
Fri, 26 May 2023 12:24:39 +0000 (08:24 -0400)
committer GitHub <redacted>
Fri, 26 May 2023 12:24:39 +0000 (17:54 +0530)
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 78288f8091c95458479d8241d2b35a4afea6a657..1b1dd560fd85f2dd1a7b077be936123b63369379 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -350,6 +350,10 @@ class InfoExtractor:
                          * "start_time" - The start time of the chapter in seconds
                          * "end_time" - The end time of the chapter in seconds
                          * "title" (optional, string)
+    heatmap:        A list of dictionaries, with the following entries:
+                        * "start_time" - The start time of the data point in seconds
+                        * "end_time" - The end time of the data point in seconds
+                        * "value" - The normalized value of the data point (float between 0 and 1)
      playable_in_embed: Whether this video is allowed to play in embedded
                      players on other sites. Can be True (=always allowed),
                      False (=never allowed), None (=unknown), or a string
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 654bf5e6b63d2fc7473b13fd7b75dbfc0730ed19..80edcd77dac8094976da18e6bf5d7acc1ebf5106 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -1273,6 +1273,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Philipp Hagemeister',
                  'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                  'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
              }
          },
          {
@@ -1426,6 +1427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'FlyingKitty',
                  'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
                  'uploader_id': '@FlyingKitty900',
+                'comment_count': int,
              },
          },
          {
@@ -3244,6 +3246,17 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                            chapter_time, chapter_title, duration)
              for contents in content_list)), [])
  
+    def _extract_heatmap_from_player_overlay(self, data):
+        content_list = traverse_obj(data, (
+            'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar',
+            'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list}))
+        return next(filter(None, (
+            traverse_obj(contents, (..., 'heatMarkerRenderer', {
+                'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000},
+                'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}),
+            })) for contents in content_list)), None)
+
      def _extract_comment(self, comment_renderer, parent=None):
          comment_id = comment_renderer.get('commentId')
          if not comment_id:
@@ -4313,6 +4326,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
                  or self._extract_chapters_from_description(video_description, duration)
                  or None)
  
+            info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data)
+
          contents = traverse_obj(
              initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
              expected_type=list, default=[])
author	Audrey <redacted>
	Fri, 26 May 2023 12:24:39 +0000 (08:24 -0400)
committer	GitHub <redacted>
	Fri, 26 May 2023 12:24:39 +0000 (17:54 +0530)
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history