[extractor/youtube] Extract concurrent view count for livestreams (#5152)

author Matthew <redacted>

Fri, 7 Oct 2022 07:00:40 +0000 (20:00 +1300)

committer GitHub <redacted>

Fri, 7 Oct 2022 07:00:40 +0000 (07:00 +0000)
author Matthew <redacted>
Fri, 7 Oct 2022 07:00:40 +0000 (20:00 +1300)
committer GitHub <redacted>
Fri, 7 Oct 2022 07:00:40 +0000 (07:00 +0000)
diff --git a/README.md b/README.md

index e0a1ea059bc5adb2c1d7cd50d903d0df419113bd..9b59e096a9ea6a5282400120cbff4845c44cc785 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1226,6 +1226,7 @@ # OUTPUT TEMPLATE
   - `duration` (numeric): Length of the video in seconds
   - `duration_string` (string): Length of the video (HH:mm:ss)
   - `view_count` (numeric): How many users have watched the video on the platform
+ - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
   - `like_count` (numeric): Number of positive ratings of the video
   - `dislike_count` (numeric): Number of negative ratings of the video
   - `repost_count` (numeric): Number of reposts of the video
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index 944b196a118d6ea57db300ff0c57f4a7a83b2e1b..31a45b37a22ffd3992e7408c3e29185157f261f2 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -284,6 +284,7 @@ class InfoExtractor:
                      captions instead of normal subtitles
      duration:       Length of the video in seconds, as an integer or float.
      view_count:     How many users have watched the video on the platform.
+    concurrent_view_count: How many users are currently watching the video on the platform.
      like_count:     Number of positive ratings of the video
      dislike_count:  Number of negative ratings of the video
      repost_count:   Number of reposts of the video
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 4456110f6c89ae74785db147cee266f89669b847..6f153bb3cf9cfd7d63bfd2250136ef0c76d70db1 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -912,8 +912,7 @@ def _extract_video(self, renderer):
                  traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
                  video_id, default=None, group='duration'))
  
-        view_count = self._get_count(renderer, 'viewCountText')
-
+        view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')
          uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
          channel_id = traverse_obj(
              renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
@@ -932,6 +931,12 @@ def _extract_video(self, renderer):
          if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
              url = f'https://www.youtube.com/shorts/{video_id}'
  
+        live_status = (
+            'is_upcoming' if scheduled_timestamp is not None
+            else 'was_live' if 'streamed' in time_text.lower()
+            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+            else None)
+
          return {
              '_type': 'url',
              'ie_key': YoutubeIE.ie_key(),
@@ -940,17 +945,12 @@ def _extract_video(self, renderer):
              'title': title,
              'description': description,
              'duration': duration,
-            'view_count': view_count,
              'uploader': uploader,
              'channel_id': channel_id,
              'thumbnails': thumbnails,
              'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
                              if self._configuration_arg('approximate_date', ie_key='youtubetab')
                              else None),
-            'live_status': ('is_upcoming' if scheduled_timestamp is not None
-                            else 'was_live' if 'streamed' in time_text.lower()
-                            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
-                            else None),
              'release_timestamp': scheduled_timestamp,
              'availability':
                  'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
@@ -958,7 +958,8 @@ def _extract_video(self, renderer):
                      is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
                      needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
                      needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
-                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
+                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+            'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,
          }
  
  
@@ -2328,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'view_count': int,
                  'playable_in_embed': True,
                  'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+                'concurrent_view_count': int,
              },
              'params': {'skip_download': True}
          }, {
@@ -4115,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
                      'like_count': str_to_int(like_count),
                      'dislike_count': str_to_int(dislike_count),
                  })
+            vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+            if vcr:
+                vc = self._get_count(vcr, 'viewCount')
+                # Upcoming premieres with waiting count are treated as live here
+                if vcr.get('isLive'):
+                    info['concurrent_view_count'] = vc
+                elif info.get('view_count') is None:
+                    info['view_count'] = vc
+
          vsir = get_first(contents, 'videoSecondaryInfoRenderer')
          if vsir:
              vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
author	Matthew <redacted>
	Fri, 7 Oct 2022 07:00:40 +0000 (20:00 +1300)
committer	GitHub <redacted>
	Fri, 7 Oct 2022 07:00:40 +0000 (07:00 +0000)
README.md		patch \| blob \| blame \| history
yt_dlp/extractor/common.py		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history