[extractor/youtube] Extract concurrent view count for livestreams (#5152)

[yt-dlp.git] / yt_dlp / extractor / common.py
diff --git a/yt_dlp/extractor/common.py b/yt_dlp/extractor/common.py

index d36f025ab823a5675ad1e428dd4ddc94925efc46..31a45b37a22ffd3992e7408c3e29185157f261f2 100644 (file)
--- a/yt_dlp/extractor/common.py
+++ b/yt_dlp/extractor/common.py
@@ -284,6 +284,7 @@ class InfoExtractor:
                      captions instead of normal subtitles
      duration:       Length of the video in seconds, as an integer or float.
      view_count:     How many users have watched the video on the platform.
+    concurrent_view_count: How many users are currently watching the video on the platform.
      like_count:     Number of positive ratings of the video
      dislike_count:  Number of negative ratings of the video
      repost_count:   Number of reposts of the video
@@ -1227,7 +1228,7 @@ def _search_regex(self, pattern, string, name, default=NO_DEFAULT, fatal=True, f
              return None
  
      def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
-                     contains_pattern='(?s:.+)', fatal=True, default=NO_DEFAULT, **kwargs):
+                     contains_pattern=r'{(?s:.+)}', fatal=True, default=NO_DEFAULT, **kwargs):
          """Searches string for the JSON object specified by start_pattern"""
          # NB: end_pattern is only used to reduce the size of the initial match
          if default is NO_DEFAULT:
@@ -1236,7 +1237,7 @@ def _search_json(self, start_pattern, string, name, video_id, *, end_pattern='',
              fatal, has_default = False, True
  
          json_string = self._search_regex(
-            rf'(?:{start_pattern})\s*(?P<json>{{\s*(?:{contains_pattern})\s*}})\s*(?:{end_pattern})',
+            rf'(?:{start_pattern})\s*(?P<json>{contains_pattern})\s*(?:{end_pattern})',
              string, name, group='json', fatal=fatal, default=None if has_default else NO_DEFAULT)
          if not json_string:
              return default
@@ -1862,7 +1863,7 @@ def add_item(field, reverse, closest, limit_text):
                      alias, field = field, self._get_field_setting(field, 'field')
                      if self._get_field_setting(alias, 'deprecated'):
                          self.ydl.deprecated_feature(f'Format sorting alias {alias} is deprecated and may '
-                                                    'be removed in a future version. Please use {field} instead')
+                                                    f'be removed in a future version. Please use {field} instead')
                  reverse = match.group('reverse') is not None
                  closest = match.group('separator') == '~'
                  limit_text = match.group('limit')
@@ -3124,9 +3125,10 @@ def _parse_ism_formats_and_subtitles(self, ism_doc, ism_url, ism_id=None):
              stream_name = stream.get('Name')
              stream_language = stream.get('Language', 'und')
              for track in stream.findall('QualityLevel'):
-                fourcc = track.get('FourCC') or ('AACL' if track.get('AudioTag') == '255' else None)
+                KNOWN_TAGS = {'255': 'AACL', '65534': 'EC-3'}
+                fourcc = track.get('FourCC') or KNOWN_TAGS.get(track.get('AudioTag'))
                  # TODO: add support for WVC1 and WMAP
-                if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML'):
+                if fourcc not in ('H264', 'AVC1', 'AACL', 'TTML', 'EC-3'):
                      self.report_warning('%s is not a supported codec' % fourcc)
                      continue
                  tbr = int(track.attrib['Bitrate']) // 1000
@@ -3586,7 +3588,8 @@ def _parse_jwplayer_formats(self, jwplayer_sources_data, video_id=None,
                      'url': source_url,
                      'width': int_or_none(source.get('width')),
                      'height': height,
-                    'tbr': int_or_none(source.get('bitrate')),
+                    'tbr': int_or_none(source.get('bitrate'), scale=1000),
+                    'filesize': int_or_none(source.get('filesize')),
                      'ext': ext,
                  }
                  if source_url.startswith('rtmp'):