]> jfr.im git - yt-dlp.git/commitdiff
[extractor/youtube] Extract concurrent view count for livestreams (#5152)
authorMatthew <redacted>
Fri, 7 Oct 2022 07:00:40 +0000 (20:00 +1300)
committerGitHub <redacted>
Fri, 7 Oct 2022 07:00:40 +0000 (07:00 +0000)
Adds new field `concurrent_view_count`
Closes https://github.com/yt-dlp/yt-dlp/issues/4843

Authored by: coletdjnz

README.md
yt_dlp/extractor/common.py
yt_dlp/extractor/youtube.py

index e0a1ea059bc5adb2c1d7cd50d903d0df419113bd..9b59e096a9ea6a5282400120cbff4845c44cc785 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1226,6 +1226,7 @@ # OUTPUT TEMPLATE
  - `duration` (numeric): Length of the video in seconds
  - `duration_string` (string): Length of the video (HH:mm:ss)
  - `view_count` (numeric): How many users have watched the video on the platform
+ - `concurrent_view_count` (numeric): How many users are currently watching the video on the platform.
  - `like_count` (numeric): Number of positive ratings of the video
  - `dislike_count` (numeric): Number of negative ratings of the video
  - `repost_count` (numeric): Number of reposts of the video
index 944b196a118d6ea57db300ff0c57f4a7a83b2e1b..31a45b37a22ffd3992e7408c3e29185157f261f2 100644 (file)
@@ -284,6 +284,7 @@ class InfoExtractor:
                     captions instead of normal subtitles
     duration:       Length of the video in seconds, as an integer or float.
     view_count:     How many users have watched the video on the platform.
+    concurrent_view_count: How many users are currently watching the video on the platform.
     like_count:     Number of positive ratings of the video
     dislike_count:  Number of negative ratings of the video
     repost_count:   Number of reposts of the video
index 4456110f6c89ae74785db147cee266f89669b847..6f153bb3cf9cfd7d63bfd2250136ef0c76d70db1 100644 (file)
@@ -912,8 +912,7 @@ def _extract_video(self, renderer):
                 traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
                 video_id, default=None, group='duration'))
 
-        view_count = self._get_count(renderer, 'viewCountText')
-
+        view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')
         uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
         channel_id = traverse_obj(
             renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
@@ -932,6 +931,12 @@ def _extract_video(self, renderer):
         if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
             url = f'https://www.youtube.com/shorts/{video_id}'
 
+        live_status = (
+            'is_upcoming' if scheduled_timestamp is not None
+            else 'was_live' if 'streamed' in time_text.lower()
+            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+            else None)
+
         return {
             '_type': 'url',
             'ie_key': YoutubeIE.ie_key(),
@@ -940,17 +945,12 @@ def _extract_video(self, renderer):
             'title': title,
             'description': description,
             'duration': duration,
-            'view_count': view_count,
             'uploader': uploader,
             'channel_id': channel_id,
             'thumbnails': thumbnails,
             'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
                             if self._configuration_arg('approximate_date', ie_key='youtubetab')
                             else None),
-            'live_status': ('is_upcoming' if scheduled_timestamp is not None
-                            else 'was_live' if 'streamed' in time_text.lower()
-                            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
-                            else None),
             'release_timestamp': scheduled_timestamp,
             'availability':
                 'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
@@ -958,7 +958,8 @@ def _extract_video(self, renderer):
                     is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
                     needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
                     needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
-                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
+                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+            'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,
         }
 
 
@@ -2328,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'view_count': int,
                 'playable_in_embed': True,
                 'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+                'concurrent_view_count': int,
             },
             'params': {'skip_download': True}
         }, {
@@ -4115,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
                     'like_count': str_to_int(like_count),
                     'dislike_count': str_to_int(dislike_count),
                 })
+            vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+            if vcr:
+                vc = self._get_count(vcr, 'viewCount')
+                # Upcoming premieres with waiting count are treated as live here
+                if vcr.get('isLive'):
+                    info['concurrent_view_count'] = vc
+                elif info.get('view_count') is None:
+                    info['view_count'] = vc
+
         vsir = get_first(contents, 'videoSecondaryInfoRenderer')
         if vsir:
             vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))