[extractor/youtube] Extract concurrent view count for livestreams (#5152)

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index f55a2760ff790d4fe70f1cc321c8283898338ff2..6f153bb3cf9cfd7d63bfd2250136ef0c76d70db1 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2,6 +2,7 @@
  import calendar
  import copy
  import datetime
+import enum
  import hashlib
  import itertools
  import json
@@ -23,12 +24,14 @@
  from ..utils import (
      NO_DEFAULT,
      ExtractorError,
+    LazyList,
      UserNotLive,
      bug_reports_message,
      classproperty,
      clean_html,
      datetime_from_str,
      dict_get,
+    filter_dict,
      float_or_none,
      format_field,
      get_first,
@@ -164,9 +167,9 @@
          'INNERTUBE_CONTEXT': {
              'client': {
                  'clientName': 'IOS',
-                'clientVersion': '17.30.1',
+                'clientVersion': '17.33.2',
                  'deviceModel': 'iPhone14,3',
-                'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+                'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
              }
          },
          'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
@@ -176,9 +179,9 @@
          'INNERTUBE_CONTEXT': {
              'client': {
                  'clientName': 'IOS_MESSAGES_EXTENSION',
-                'clientVersion': '17.30.1',
+                'clientVersion': '17.33.2',
                  'deviceModel': 'iPhone14,3',
-                'userAgent': 'com.google.ios.youtube/17.30.1 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
+                'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
              },
          },
          'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
@@ -189,7 +192,9 @@
          'INNERTUBE_CONTEXT': {
              'client': {
                  'clientName': 'IOS_MUSIC',
-                'clientVersion': '5.18',
+                'clientVersion': '5.21',
+                'deviceModel': 'iPhone14,3',
+                'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
              },
          },
          'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
@@ -199,7 +204,9 @@
          'INNERTUBE_CONTEXT': {
              'client': {
                  'clientName': 'IOS_CREATOR',
-                'clientVersion': '22.29.101',
+                'clientVersion': '22.33.101',
+                'deviceModel': 'iPhone14,3',
+                'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
              },
          },
          'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
@@ -271,14 +278,23 @@ def build_innertube_clients():
  build_innertube_clients()
  
  
+class BadgeType(enum.Enum):
+    AVAILABILITY_UNLISTED = enum.auto()
+    AVAILABILITY_PRIVATE = enum.auto()
+    AVAILABILITY_PUBLIC = enum.auto()
+    AVAILABILITY_PREMIUM = enum.auto()
+    AVAILABILITY_SUBSCRIPTION = enum.auto()
+    LIVE_NOW = enum.auto()
+
+
  class YoutubeBaseInfoExtractor(InfoExtractor):
      """Provide base functions for Youtube extractors"""
  
      _RESERVED_NAMES = (
          r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|'
          r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|'
-        r'browse|oembed|get_video_info|iframe_api|s/player|'
-        r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout')
+        r'browse|oembed|get_video_info|iframe_api|s/player|source|'
+        r'storefront|oops|index|account|t/terms|about|upload|signin|logout')
  
      _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)'
  
@@ -363,6 +379,38 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          r'(?:www\.)?piped\.privacy\.com\.de',
      )
  
+    # extracted from account/account_menu ep
+    # XXX: These are the supported YouTube UI and API languages,
+    # which is slightly different from languages supported for translation in YouTube studio
+    _SUPPORTED_LANG_CODES = [
+        'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
+        'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
+        'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
+        'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
+        'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
+        'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
+    ]
+
+    _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'}
+
+    @functools.cached_property
+    def _preferred_lang(self):
+        """
+        Returns a language code supported by YouTube for the user preferred language.
+        Returns None if no preferred language set.
+        """
+        preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
+        if not preferred_lang:
+            return
+        if preferred_lang not in self._SUPPORTED_LANG_CODES:
+            raise ExtractorError(
+                f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
+                expected=True)
+        elif preferred_lang != 'en':
+            self.report_warning(
+                f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
+        return preferred_lang
+
      def _initialize_consent(self):
          cookies = self._get_cookies('https://www.youtube.com/')
          if cookies.get('__Secure-3PSID'):
@@ -387,7 +435,7 @@ def _initialize_pref(self):
                  pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
              except ValueError:
                  self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
-        pref.update({'hl': 'en', 'tz': 'UTC'})
+        pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
          self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
  
      def _real_initialize(self):
@@ -435,7 +483,7 @@ def _extract_context(self, ytcfg=None, default_client='web'):
              (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
          # Enforce language and tz for extraction
          client_context = traverse_obj(context, 'client', expected_type=dict, default={})
-        client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
+        client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
          return context
  
      _SAPISID = None
@@ -573,7 +621,7 @@ def generate_api_headers(
          if auth is not None:
              headers['Authorization'] = auth
              headers['X-Origin'] = origin
-        return {h: v for h, v in headers.items() if v is not None}
+        return filter_dict(headers)
  
      def _download_ytcfg(self, client, video_id):
          url = {
@@ -628,20 +676,10 @@ def _extract_continuation(cls, renderer):
          if next_continuation:
              return next_continuation
  
-        contents = []
-        for key in ('contents', 'items'):
-            contents.extend(try_get(renderer, lambda x: x[key], list) or [])
-
-        for content in contents:
-            if not isinstance(content, dict):
-                continue
-            continuation_ep = try_get(
-                content, (lambda x: x['continuationItemRenderer']['continuationEndpoint'],
-                          lambda x: x['continuationItemRenderer']['button']['buttonRenderer']['command']),
-                dict)
-            continuation = cls._extract_continuation_ep_data(continuation_ep)
-            if continuation:
-                return continuation
+        return traverse_obj(renderer, (
+            ('contents', 'items', 'rows'), ..., 'continuationItemRenderer',
+            ('continuationEndpoint', ('button', 'buttonRenderer', 'command'))
+        ), get_all=False, expected_type=cls._extract_continuation_ep_data)
  
      @classmethod
      def _extract_alerts(cls, data):
@@ -657,12 +695,11 @@ def _extract_alerts(cls, data):
                      yield alert_type, message
  
      def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
-        errors = []
-        warnings = []
+        errors, warnings = [], []
          for alert_type, alert_message in alerts:
              if alert_type.lower() == 'error' and fatal:
                  errors.append([alert_type, alert_message])
-            else:
+            elif alert_message not in self._IGNORED_WARNINGS:
                  warnings.append([alert_type, alert_message])
  
          for alert_type, alert_message in (warnings + errors[:-1]):
@@ -674,13 +711,49 @@ def _extract_and_report_alerts(self, data, *args, **kwargs):
          return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
  
      def _extract_badges(self, renderer: dict):
-        badges = set()
-        for badge in try_get(renderer, lambda x: x['badges'], list) or []:
-            label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
-            if label:
-                badges.add(label.lower())
+        privacy_icon_map = {
+            'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
+            'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
+            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+        }
+
+        badge_style_map = {
+            'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
+            'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
+            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+        }
+
+        label_map = {
+            'unlisted': BadgeType.AVAILABILITY_UNLISTED,
+            'private': BadgeType.AVAILABILITY_PRIVATE,
+            'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
+            'live': BadgeType.LIVE_NOW,
+            'premium': BadgeType.AVAILABILITY_PREMIUM
+        }
+
+        badges = []
+        for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
+            badge_type = (
+                privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+                or badge_style_map.get(traverse_obj(badge, 'style'))
+            )
+            if badge_type:
+                badges.append({'type': badge_type})
+                continue
+
+            # fallback, won't work in some languages
+            label = traverse_obj(badge, 'label', expected_type=str, default='')
+            for match, label_badge_type in label_map.items():
+                if match in label.lower():
+                    badges.append({'type': badge_type})
+                    continue
+
          return badges
  
+    @staticmethod
+    def _has_badge(badges, badge_type):
+        return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
+
      @staticmethod
      def _get_text(data, *path_list, max_runs=None):
          for path in path_list or [None]:
@@ -751,9 +824,9 @@ def extract_relative_time(relative_time_text):
              except ValueError:
                  return None
  
-    def _extract_time_text(self, renderer, *path_list):
-        """@returns (timestamp, time_text)"""
-        text = self._get_text(renderer, *path_list) or ''
+    def _parse_time_text(self, text):
+        if not text:
+            return
          dt = self.extract_relative_time(text)
          timestamp = None
          if isinstance(dt, datetime.datetime):
@@ -766,9 +839,10 @@ def _extract_time_text(self, renderer, *path_list):
                          (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
                          text.lower(), 'time text', default=None)))
  
-        if text and timestamp is None:
-            self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
-        return timestamp, text
+        if text and timestamp is None and self._preferred_lang in (None, 'en'):
+            self.report_warning(
+                f'Cannot parse localized time text "{text}"', only_once=True)
+        return timestamp
  
      def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                            ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
@@ -838,13 +912,12 @@ def _extract_video(self, renderer):
                  traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str),
                  video_id, default=None, group='duration'))
  
-        view_count = self._get_count(renderer, 'viewCountText')
-
+        view_count = self._get_count(renderer, 'viewCountText', 'shortViewCountText')
          uploader = self._get_text(renderer, 'ownerText', 'shortBylineText')
          channel_id = traverse_obj(
              renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
              expected_type=str, get_all=False)
-        timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
+        time_text = self._get_text(renderer, 'publishedTimeText') or ''
          scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
          overlay_style = traverse_obj(
              renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
@@ -858,6 +931,12 @@ def _extract_video(self, renderer):
          if overlay_style == 'SHORTS' or '/shorts/' in navigation_url:
              url = f'https://www.youtube.com/shorts/{video_id}'
  
+        live_status = (
+            'is_upcoming' if scheduled_timestamp is not None
+            else 'was_live' if 'streamed' in time_text.lower()
+            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
+            else None)
+
          return {
              '_type': 'url',
              'ie_key': YoutubeIE.ie_key(),
@@ -866,19 +945,21 @@ def _extract_video(self, renderer):
              'title': title,
              'description': description,
              'duration': duration,
-            'view_count': view_count,
              'uploader': uploader,
              'channel_id': channel_id,
              'thumbnails': thumbnails,
-            'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
+            'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
                              if self._configuration_arg('approximate_date', ie_key='youtubetab')
                              else None),
-            'live_status': ('is_upcoming' if scheduled_timestamp is not None
-                            else 'was_live' if 'streamed' in time_text.lower()
-                            else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
-                            else None),
              'release_timestamp': scheduled_timestamp,
-            'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
+            'availability':
+                'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+                else self._availability(
+                    is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
+                    needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+                    needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
+            'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count': view_count,
          }
  
  
@@ -919,19 +1000,26 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                       (?:\#|$)""" % {
          'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES),
      }
-    _EMBED_REGEX = [r'''(?x)
-        (?:
-            <iframe[^>]+?src=|
-            data-video-url=|
-            <embed[^>]+?src=|
-            embedSWF\(?:\s*|
-            <object[^>]+data=|
-            new\s+SWFObject\(
-        )
-        (["\'])
-            (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
-            (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
-        \1''']
+    _EMBED_REGEX = [
+        r'''(?x)
+            (?:
+                <(?:[0-9A-Za-z-]+?)?iframe[^>]+?src=|
+                data-video-url=|
+                <embed[^>]+?src=|
+                embedSWF\(?:\s*|
+                <object[^>]+data=|
+                new\s+SWFObject\(
+            )
+            (["\'])
+                (?P<url>(?:https?:)?//(?:www\.)?youtube(?:-nocookie)?\.com/
+                (?:embed|v|p)/[0-9A-Za-z_-]{11}.*?)
+            \1''',
+        # https://wordpress.org/plugins/lazy-load-for-videos/
+        r'''(?xs)
+            <a\s[^>]*\bhref="(?P<url>https://www\.youtube\.com/watch\?v=[0-9A-Za-z_-]{11})"
+            \s[^>]*\bclass="[^"]*\blazy-load-youtube''',
+    ]
+
      _PLAYER_INFO_RE = (
          r'/s/player/(?P<id>[a-zA-Z0-9_-]{8,})/player',
          r'/(?P<id>[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$',
@@ -2155,6 +2243,35 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'comment_count': int,
                  'channel_follower_count': int
              }
+        }, {
+            # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
+            'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4',
+            'info_dict': {
+                'id': '2NUZ8W2llS4',
+                'ext': 'mp4',
+                'title': 'The NP that test your phone performance 🙂',
+                'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d',
+                'uploader': 'Leon Nguyen',
+                'uploader_id': 'VNSXIII',
+                'uploader_url': 'http://www.youtube.com/user/VNSXIII',
+                'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA',
+                'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA',
+                'duration': 21,
+                'view_count': int,
+                'age_limit': 0,
+                'categories': ['Gaming'],
+                'tags': 'count:23',
+                'playable_in_embed': True,
+                'live_status': 'not_live',
+                'upload_date': '20220102',
+                'like_count': int,
+                'availability': 'public',
+                'channel': 'Leon Nguyen',
+                'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp',
+                'comment_count': int,
+                'channel_follower_count': int
+            },
+            'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
          }, {
              # date text is premiered video, ensure upload date in UTC (published 1641172509)
              'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM',
@@ -2212,6 +2329,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'view_count': int,
                  'playable_in_embed': True,
                  'description': 'md5:2ef1d002cad520f65825346e2084e49d',
+                'concurrent_view_count': int,
              },
              'params': {'skip_download': True}
          }, {
@@ -2266,6 +2384,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'tags': [],
                  'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
              }
+        }, {
+            # Prefer primary title+description language metadata by default
+            # Do not prefer translated description if primary is empty
+            'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
+            'info_dict': {
+                'id': 'el3E4MbxRqQ',
+                'ext': 'mp4',
+                'title': 'dlp test video 2 - primary sv no desc',
+                'description': '',
+                'channel': 'cole-dlp-test-acc',
+                'tags': [],
+                'view_count': int,
+                'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'like_count': int,
+                'playable_in_embed': True,
+                'availability': 'unlisted',
+                'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
+                'age_limit': 0,
+                'duration': 5,
+                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'live_status': 'not_live',
+                'upload_date': '20220908',
+                'categories': ['People & Blogs'],
+                'uploader': 'cole-dlp-test-acc',
+                'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            },
+            'params': {'skip_download': True}
+        }, {
+            # Extractor argument: prefer translated title+description
+            'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
+            'info_dict': {
+                'id': 'gHKT4uU8Zng',
+                'ext': 'mp4',
+                'channel': 'cole-dlp-test-acc',
+                'tags': [],
+                'duration': 5,
+                'live_status': 'not_live',
+                'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'upload_date': '20220728',
+                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'view_count': int,
+                'categories': ['People & Blogs'],
+                'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
+                'title': 'dlp test video title translated (fr)',
+                'availability': 'public',
+                'uploader': 'cole-dlp-test-acc',
+                'age_limit': 0,
+                'description': 'dlp test video description translated (fr)',
+                'playable_in_embed': True,
+                'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            },
+            'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
+            'expected_warnings': [r'Preferring "fr" translated fields'],
          }, {
              'note': '6 channel audio',
              'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
@@ -2323,10 +2496,8 @@ def __init__(self, *args, **kwargs):
          self._code_cache = {}
          self._player_cache = {}
  
-    def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data):
+    def _prepare_live_from_start_formats(self, formats, video_id, live_start_time, url, webpage_url, smuggled_data, is_live):
          lock = threading.Lock()
-
-        is_live = True
          start_time = time.time()
          formats = [f for f in formats if f.get('is_from_start')]
  
@@ -2341,7 +2512,8 @@ def refetch_manifest(format_id, delay):
              microformats = traverse_obj(
                  prs, (..., 'microformat', 'playerMicroformatRenderer'),
                  expected_type=dict, default=[])
-            _, is_live, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+            _, live_status, _, formats, _ = self._list_formats(video_id, microformats, video_details, prs, player_url)
+            is_live = live_status == 'is_live'
              start_time = time.time()
  
          def mpd_feed(format_id, delay):
@@ -2362,12 +2534,17 @@ def mpd_feed(format_id, delay):
              return f['manifest_url'], f['manifest_stream_number'], is_live
  
          for f in formats:
-            f['is_live'] = True
-            f['protocol'] = 'http_dash_segments_generator'
-            f['fragments'] = functools.partial(
-                self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed)
+            f['is_live'] = is_live
+            gen = functools.partial(self._live_dash_fragments, video_id, f['format_id'],
+                                    live_start_time, mpd_feed, not is_live and f.copy())
+            if is_live:
+                f['fragments'] = gen
+                f['protocol'] = 'http_dash_segments_generator'
+            else:
+                f['fragments'] = LazyList(gen({}))
+                del f['is_from_start']
  
-    def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx):
+    def _live_dash_fragments(self, video_id, format_id, live_start_time, mpd_feed, manifestless_orig_fmt, ctx):
          FETCH_SPAN, MAX_DURATION = 5, 432000
  
          mpd_url, stream_number, is_live = None, None, True
@@ -2398,15 +2575,18 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
                      return False, last_seq
                  elif old_mpd_url == mpd_url:
                      return True, last_seq
-            try:
-                fmts, _ = self._extract_mpd_formats_and_subtitles(
-                    mpd_url, None, note=False, errnote=False, fatal=False)
-            except ExtractorError:
-                fmts = None
-            if not fmts:
-                no_fragment_score += 2
-                return False, last_seq
-            fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
+            if manifestless_orig_fmt:
+                fmt_info = manifestless_orig_fmt
+            else:
+                try:
+                    fmts, _ = self._extract_mpd_formats_and_subtitles(
+                        mpd_url, None, note=False, errnote=False, fatal=False)
+                except ExtractorError:
+                    fmts = None
+                if not fmts:
+                    no_fragment_score += 2
+                    return False, last_seq
+                fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number)
              fragments = fmt_info['fragments']
              fragment_base_url = fmt_info['fragment_base_url']
              assert fragment_base_url
@@ -2414,6 +2594,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
              _last_seq = int(re.search(r'(?:/|^)sq/(\d+)', fragments[-1]['path']).group(1))
              return True, _last_seq
  
+        self.write_debug(f'[{video_id}] Generating fragments for format {format_id}')
          while is_live:
              fetch_time = time.time()
              if no_fragment_score > 30:
@@ -2467,6 +2648,11 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
              except ExtractorError:
                  continue
  
+            if manifestless_orig_fmt:
+                # Stop at the first iteration if running for post-live manifestless;
+                # fragment count no longer increase since it starts
+                break
+
              time.sleep(max(0, FETCH_SPAN + fetch_time - time.time()))
  
      def _extract_player_url(self, *ytcfgs, webpage=None):
@@ -2628,7 +2814,10 @@ def _decrypt_nsig(self, s, video_id, player_url):
              raise ExtractorError('Cannot decrypt nsig without player_url')
          player_url = urljoin('https://www.youtube.com', player_url)
  
-        jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+        try:
+            jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+        except ExtractorError as e:
+            raise ExtractorError('Unable to extract nsig function code', cause=e)
          if self.get_param('youtube_print_sig_code'):
              self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
  
@@ -2666,14 +2855,27 @@ def _extract_n_function_name(self, jscode):
  
      def _extract_n_function_code(self, video_id, player_url):
          player_id = self._extract_player_info(player_url)
-        func_code = self.cache.load('youtube-nsig', player_id, after='2022.08.19')
+        func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.09.1')
          jscode = func_code or self._load_player(video_id, player_url)
          jsi = JSInterpreter(jscode)
  
          if func_code:
              return jsi, player_id, func_code
  
-        func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
+        func_name = self._extract_n_function_name(jscode)
+
+        # For redundancy
+        func_code = self._search_regex(
+            r'''(?xs)%s\s*=\s*function\s*\((?P<var>[\w$]+)\)\s*
+                     # NB: The end of the regex is intentionally kept strict
+                     {(?P<code>.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name,
+            jscode, 'nsig function', group=('var', 'code'), default=None)
+        if func_code:
+            func_code = ([func_code[0]], func_code[1])
+        else:
+            self.write_debug('Extracting nsig function with jsinterp')
+            func_code = jsi.extract_function_code(func_name)
+
          self.cache.store('youtube-nsig', player_id, func_code)
          return jsi, player_id, func_code
  
@@ -2840,8 +3042,9 @@ def _extract_chapters(self, chapter_list, chapter_time, chapter_title, duration,
                  self.report_warning(f'Incomplete chapter {idx}')
              elif chapters[-1]['start_time'] <= chapter['start_time'] <= duration:
                  chapters.append(chapter)
-            else:
-                self.report_warning(f'Invalid start time for chapter "{chapter["title"]}"')
+            elif chapter not in chapters:
+                self.report_warning(
+                    f'Invalid start time ({chapter["start_time"]} < {chapters[-1]["start_time"]}) for chapter "{chapter["title"]}"')
          return chapters[1:]
  
      def _extract_comment(self, comment_renderer, parent=None):
@@ -2851,8 +3054,10 @@ def _extract_comment(self, comment_renderer, parent=None):
  
          text = self._get_text(comment_renderer, 'contentText')
  
-        # note: timestamp is an estimate calculated from the current time and time_text
-        timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
+        # Timestamp is an estimate calculated from the current time and time_text
+        time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
+        timestamp = self._parse_time_text(time_text)
+
          author = self._get_text(comment_renderer, 'authorText')
          author_id = try_get(comment_renderer,
                              lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
@@ -2955,8 +3160,8 @@ def extract_thread(contents):
          # YouTube comments have a max depth of 2
          max_depth = int_or_none(get_single_config_arg('max_comment_depth'))
          if max_depth:
-            self._downloader.deprecation_warning(
-                '[youtube] max_comment_depth extractor argument is deprecated. Set max replies in the max-comments extractor argument instead.')
+            self._downloader.deprecated_feature('[youtube] max_comment_depth extractor argument is deprecated. '
+                                                'Set max replies in the max-comments extractor argument instead')
          if max_depth == 1 and parent:
              return
  
@@ -3208,7 +3413,12 @@ def append_client(*client_names):
              self.report_warning(last_error)
          return prs, player_url
  
-    def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
+    def _needs_live_processing(self, live_status, duration):
+        if (live_status == 'is_live' and self.get_param('live_from_start')
+                or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+            return live_status
+
+    def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
          itags, stream_ids = {}, []
          itag_qualities, res_qualities = {}, {0: None}
          q = qualities([
@@ -3278,11 +3488,17 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i
                  except ExtractorError as e:
                      phantomjs_hint = ''
                      if isinstance(e, JSInterpreter.Exception):
-                        phantomjs_hint = f'         Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} to workaround the issue\n'
-                    self.report_warning(
-                        f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
-                        f'         n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
-                    self.write_debug(e, only_once=True)
+                        phantomjs_hint = (f'         Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
+                                          f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
+                    if player_url:
+                        self.report_warning(
+                            f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+                            f'         n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
+                        self.write_debug(e, only_once=True)
+                    else:
+                        self.report_warning(
+                            'Cannot decrypt nsig without player_url: You may experience throttling for some formats',
+                            video_id=video_id, only_once=True)
                      throttled = True
  
              if itag:
@@ -3349,15 +3565,22 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, i
                      dct['container'] = dct['ext'] + '_dash'
              yield dct
  
-        live_from_start = is_live and self.get_param('live_from_start')
-        skip_manifests = self._configuration_arg('skip')
-        if not self.get_param('youtube_include_hls_manifest', True):
-            skip_manifests.append('hls')
+        needs_live_processing = self._needs_live_processing(live_status, duration)
+        skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+
+        skip_manifests = set(self._configuration_arg('skip'))
+        if (not self.get_param('youtube_include_hls_manifest', True)
+                or needs_live_processing == 'is_live'  # These will be filtered out by YoutubeDL anyway
+                or needs_live_processing and skip_bad_formats):
+            skip_manifests.add('hls')
+
          if not self.get_param('youtube_include_dash_manifest', True):
-            skip_manifests.append('dash')
-        get_dash = 'dash' not in skip_manifests and (
-            not is_live or live_from_start or self._configuration_arg('include_live_dash'))
-        get_hls = not live_from_start and 'hls' not in skip_manifests
+            skip_manifests.add('dash')
+        if self._configuration_arg('include_live_dash'):
+            self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
+                                                'Use include_incomplete_formats extractor argument instead')
+        elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
+            skip_manifests.add('dash')
  
          def process_manifest_format(f, proto, itag):
              if itag in itags:
@@ -3375,16 +3598,17 @@ def process_manifest_format(f, proto, itag):
  
          subtitles = {}
          for sd in streaming_data:
-            hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
+            hls_manifest_url = 'hls' not in skip_manifests and sd.get('hlsManifestUrl')
              if hls_manifest_url:
-                fmts, subs = self._extract_m3u8_formats_and_subtitles(hls_manifest_url, video_id, 'mp4', fatal=False, live=is_live)
+                fmts, subs = self._extract_m3u8_formats_and_subtitles(
+                    hls_manifest_url, video_id, 'mp4', fatal=False, live=live_status == 'is_live')
                  subtitles = self._merge_subtitles(subs, subtitles)
                  for f in fmts:
                      if process_manifest_format(f, 'hls', self._search_regex(
                              r'/itag/(\d+)', f['url'], 'itag', default=None)):
                          yield f
  
-            dash_manifest_url = get_dash and sd.get('dashManifestUrl')
+            dash_manifest_url = 'dash' not in skip_manifests and sd.get('dashManifestUrl')
              if dash_manifest_url:
                  formats, subs = self._extract_mpd_formats_and_subtitles(dash_manifest_url, video_id, fatal=False)
                  subtitles = self._merge_subtitles(subs, subtitles)  # Prioritize HLS subs over DASH
@@ -3392,7 +3616,7 @@ def process_manifest_format(f, proto, itag):
                      if process_manifest_format(f, 'dash', f['format_id']):
                          f['filesize'] = int_or_none(self._search_regex(
                              r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
-                        if live_from_start:
+                        if needs_live_processing:
                              f['is_from_start'] = True
  
                          yield f
@@ -3458,11 +3682,23 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
          is_live = get_first(video_details, 'isLive')
          if is_live is None:
              is_live = get_first(live_broadcast_details, 'isLiveNow')
+        live_content = get_first(video_details, 'isLiveContent')
+        is_upcoming = get_first(video_details, 'isUpcoming')
+        if is_live is None and is_upcoming or live_content is False:
+            is_live = False
+        if is_upcoming is None and (live_content or is_live):
+            is_upcoming = False
+        post_live = get_first(video_details, 'isPostLiveDvr')
+        live_status = ('post_live' if post_live
+                       else 'is_live' if is_live
+                       else 'is_upcoming' if is_upcoming
+                       else None if None in (is_live, is_upcoming, live_content)
+                       else 'was_live' if live_content else 'not_live')
  
          streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[])
-        *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, is_live, duration)
+        *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
  
-        return live_broadcast_details, is_live, streaming_data, formats, subtitles
+        return live_broadcast_details, live_status, streaming_data, formats, subtitles
  
      def _real_extract(self, url):
          url, smuggled_data = unsmuggle_url(url, {})
@@ -3492,11 +3728,19 @@ def _real_extract(self, url):
          microformats = traverse_obj(
              player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
              expected_type=dict, default=[])
-        video_title = (
-            get_first(video_details, 'title')
-            or self._get_text(microformats, (..., 'title'))
-            or search_meta(['og:title', 'twitter:title', 'title']))
-        video_description = get_first(video_details, 'shortDescription')
+
+        translated_title = self._get_text(microformats, (..., 'title'))
+        video_title = (self._preferred_lang and translated_title
+                       or get_first(video_details, 'title')  # primary
+                       or translated_title
+                       or search_meta(['og:title', 'twitter:title', 'title']))
+        translated_description = self._get_text(microformats, (..., 'description'))
+        original_description = get_first(video_details, 'shortDescription')
+        video_description = (
+            self._preferred_lang and translated_description
+            # If original description is blank, it will be an empty string.
+            # Do not prefer translated description in this case.
+            or original_description if original_description is not None else translated_description)
  
          multifeed_metadata_list = get_first(
              player_responses,
@@ -3546,8 +3790,10 @@ def feed_entry(name):
              or get_first(microformats, 'lengthSeconds')
              or parse_duration(search_meta('duration'))) or None
  
-        live_broadcast_details, is_live, streaming_data, formats, automatic_captions = \
-            self._list_formats(video_id, microformats, video_details, player_responses, player_url)
+        live_broadcast_details, live_status, streaming_data, formats, automatic_captions = \
+            self._list_formats(video_id, microformats, video_details, player_responses, player_url, duration)
+        if live_status == 'post_live':
+            self.write_debug(f'{video_id}: Video is in Post-Live Manifestless mode')
  
          if not formats:
              if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')):
@@ -3606,7 +3852,7 @@ def feed_entry(name):
          thumbnails.extend({
              'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
                  video_id=video_id, name=name, ext=ext,
-                webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
+                webp='_webp' if ext == 'webp' else '', live='_live' if live_status == 'is_live' else ''),
          } for name in thumbnail_names for ext in ('webp', 'jpg'))
          for thumb in thumbnails:
              i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
@@ -3621,20 +3867,27 @@ def feed_entry(name):
              or search_meta('channelId'))
          owner_profile_url = get_first(microformats, 'ownerProfileUrl')
  
-        live_content = get_first(video_details, 'isLiveContent')
-        is_upcoming = get_first(video_details, 'isUpcoming')
-        if is_live is None:
-            if is_upcoming or live_content is False:
-                is_live = False
-        if is_upcoming is None and (live_content or is_live):
-            is_upcoming = False
          live_start_time = parse_iso8601(get_first(live_broadcast_details, 'startTimestamp'))
          live_end_time = parse_iso8601(get_first(live_broadcast_details, 'endTimestamp'))
          if not duration and live_end_time and live_start_time:
              duration = live_end_time - live_start_time
  
-        if is_live and self.get_param('live_from_start'):
-            self._prepare_live_from_start_formats(formats, video_id, live_start_time, url, webpage_url, smuggled_data)
+        needs_live_processing = self._needs_live_processing(live_status, duration)
+
+        def is_bad_format(fmt):
+            if needs_live_processing and not fmt.get('is_from_start'):
+                return True
+            elif (live_status == 'is_live' and needs_live_processing != 'is_live'
+                    and fmt.get('protocol') == 'http_dash_segments'):
+                return True
+
+        for fmt in filter(is_bad_format, formats):
+            fmt['preference'] = (fmt.get('preference') or -1) - 10
+            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+
+        if needs_live_processing:
+            self._prepare_live_from_start_formats(
+                formats, video_id, live_start_time, url, webpage_url, smuggled_data, live_status == 'is_live')
  
          formats.extend(self._extract_storyboard(player_responses, duration))
  
@@ -3669,22 +3922,10 @@ def feed_entry(name):
              'categories': [category] if category else None,
              'tags': keywords,
              'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'),
-            'is_live': is_live,
-            'was_live': (False if is_live or is_upcoming or live_content is False
-                         else None if is_live is None or is_upcoming is None
-                         else live_content),
-            'live_status': 'is_upcoming' if is_upcoming else None,  # rest will be set by YoutubeDL
+            'live_status': live_status,
              'release_timestamp': live_start_time,
          }
  
-        if get_first(video_details, 'isPostLiveDvr'):
-            self.write_debug('Video is in Post-Live Manifestless mode')
-            info['live_status'] = 'post_live'
-            if (duration or 0) > 4 * 3600:
-                self.report_warning(
-                    'The livestream has not finished processing. Only 4 hours of the video can be currently downloaded. '
-                    'This is a known issue and patches are welcome')
-
          subtitles = {}
          pctr = traverse_obj(player_responses, (..., 'captions', 'playerCaptionsTracklistRenderer'), expected_type=dict)
          if pctr:
@@ -3814,7 +4055,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
                  'url': f'https://www.youtube.com/watch?v={video_id}&bpctr=9999999999&has_verified=1',
                  'video_id': video_id,
                  'ext': 'json',
-                'protocol': 'youtube_live_chat' if is_live or is_upcoming else 'youtube_live_chat_replay',
+                'protocol': ('youtube_live_chat' if live_status in ('is_live', 'is_upcoming')
+                             else 'youtube_live_chat_replay'),
              }]
  
          if initial_data:
@@ -3849,19 +4091,24 @@ def process_language(container, base_url, lang_code, sub_name, query):
                      vpir,
                      lambda x: x['videoActions']['menuRenderer']['topLevelButtons'],
                      list) or []):
-                tbr = tlb.get('toggleButtonRenderer') or {}
-                for getter, regex in [(
-                        lambda x: x['defaultText']['accessibility']['accessibilityData'],
-                        r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
-                            lambda x: x['accessibility'],
-                            lambda x: x['accessibilityData']['accessibilityData'],
-                        ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
-                    label = (try_get(tbr, getter, dict) or {}).get('label')
-                    if label:
-                        mobj = re.match(regex, label)
-                        if mobj:
-                            info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
-                            break
+                tbrs = variadic(
+                    traverse_obj(
+                        tlb, 'toggleButtonRenderer',
+                        ('segmentedLikeDislikeButtonRenderer', ..., 'toggleButtonRenderer'),
+                        default=[]))
+                for tbr in tbrs:
+                    for getter, regex in [(
+                            lambda x: x['defaultText']['accessibility']['accessibilityData'],
+                            r'(?P<count>[\d,]+)\s*(?P<type>(?:dis)?like)'), ([
+                                lambda x: x['accessibility'],
+                                lambda x: x['accessibilityData']['accessibilityData'],
+                            ], r'(?P<type>(?:dis)?like) this video along with (?P<count>[\d,]+) other people')]:
+                        label = (try_get(tbr, getter, dict) or {}).get('label')
+                        if label:
+                            mobj = re.match(regex, label)
+                            if mobj:
+                                info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count'))
+                                break
              sbr_tooltip = try_get(
                  vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip'])
              if sbr_tooltip:
@@ -3870,6 +4117,15 @@ def process_language(container, base_url, lang_code, sub_name, query):
                      'like_count': str_to_int(like_count),
                      'dislike_count': str_to_int(dislike_count),
                  })
+            vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer'))
+            if vcr:
+                vc = self._get_count(vcr, 'viewCount')
+                # Upcoming premieres with waiting count are treated as live here
+                if vcr.get('isLive'):
+                    info['concurrent_view_count'] = vc
+                elif info.get('view_count') is None:
+                    info['view_count'] = vc
+
          vsir = get_first(contents, 'videoSecondaryInfoRenderer')
          if vsir:
              vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer'))
@@ -3915,8 +4171,12 @@ def process_language(container, base_url, lang_code, sub_name, query):
          upload_date = (
              unified_strdate(get_first(microformats, 'uploadDate'))
              or unified_strdate(search_meta('uploadDate')))
-        if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'):
-            upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
+        if not upload_date or (
+            live_status in ('not_live', None)
+            and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
+        ):
+            upload_date = strftime_or_none(
+                self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
          info['upload_date'] = upload_date
  
          for to, frm in fallbacks.items():
@@ -3928,33 +4188,25 @@ def process_language(container, base_url, lang_code, sub_name, query):
              if v:
                  info[d_k] = v
  
-        is_private = get_first(video_details, 'isPrivate', expected_type=bool)
-        is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
-        is_membersonly = None
-        is_premium = None
-        if initial_data and is_private is not None:
-            is_membersonly = False
-            is_premium = False
-            contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
-            badge_labels = set()
-            for content in contents:
-                if not isinstance(content, dict):
-                    continue
-                badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
-            for badge_label in badge_labels:
-                if badge_label.lower() == 'members only':
-                    is_membersonly = True
-                elif badge_label.lower() == 'premium':
-                    is_premium = True
-                elif badge_label.lower() == 'unlisted':
-                    is_unlisted = True
-
-        info['availability'] = self._availability(
-            is_private=is_private,
-            needs_premium=is_premium,
-            needs_subscription=is_membersonly,
-            needs_auth=info['age_limit'] >= 18,
-            is_unlisted=None if is_private is None else is_unlisted)
+        badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+
+        is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+                      or get_first(video_details, 'isPrivate', expected_type=bool))
+
+        info['availability'] = (
+            'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+            else self._availability(
+                is_private=is_private,
+                needs_premium=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
+                    or False if initial_data and is_private is not None else None),
+                needs_subscription=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
+                    or False if initial_data and is_private is not None else None),
+                needs_auth=info['age_limit'] >= 18,
+                is_unlisted=None if is_private is None else (
+                    self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+                    or get_first(microformats, 'isUnlisted', expected_type=bool))))
  
          info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
  
@@ -4114,8 +4366,8 @@ def _playlist_entries(self, video_list_renderer):
              yield self._extract_video(renderer)
  
      def _rich_entries(self, rich_grid_renderer):
-        renderer = try_get(
-            rich_grid_renderer, lambda x: x['content']['videoRenderer'], dict) or {}
+        renderer = traverse_obj(
+            rich_grid_renderer, ('content', ('videoRenderer', 'reelItemRenderer')), get_all=False) or {}
          video_id = renderer.get('videoId')
          if not video_id:
              return
@@ -4192,6 +4444,13 @@ def _rich_grid_entries(self, contents):
                      yield entry
      '''
  
+    def _report_history_entries(self, renderer):
+        for url in traverse_obj(renderer, (
+                'rows', ..., 'reportHistoryTableRowRenderer', 'cells', ...,
+                'reportHistoryTableCellRenderer', 'cell', 'reportHistoryTableTextCellRenderer', 'text', 'runs', ...,
+                'navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url')):
+            yield self.url_result(urljoin('https://www.youtube.com', url), YoutubeIE)
+
      def _extract_entries(self, parent_renderer, continuation_list):
          # continuation_list is modified in-place with continuation_list = [continuation_token]
          continuation_list[:] = [None]
@@ -4203,12 +4462,16 @@ def _extract_entries(self, parent_renderer, continuation_list):
                  content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation',
                  expected_type=dict)
              if not is_renderer:
-                renderer = content.get('richItemRenderer')
-                if renderer:
-                    for entry in self._rich_entries(renderer):
+                if content.get('richItemRenderer'):
+                    for entry in self._rich_entries(content['richItemRenderer']):
                          yield entry
                      continuation_list[0] = self._extract_continuation(parent_renderer)
+                elif content.get('reportHistorySectionRenderer'):  # https://www.youtube.com/reporthistory
+                    table = traverse_obj(content, ('reportHistorySectionRenderer', 'table', 'tableRenderer'))
+                    yield from self._report_history_entries(table)
+                    continuation_list[0] = self._extract_continuation(table)
                  continue
+
              isr_contents = try_get(is_renderer, lambda x: x['contents'], list) or []
              for isr_content in isr_contents:
                  if not isinstance(isr_content, dict):
@@ -4269,26 +4532,6 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
              # See: https://github.com/ytdl-org/youtube-dl/issues/28702
              visitor_data = self._extract_visitor_data(response) or visitor_data
  
-            known_continuation_renderers = {
-                'playlistVideoListContinuation': self._playlist_entries,
-                'gridContinuation': self._grid_entries,
-                'itemSectionContinuation': self._post_thread_continuation_entries,
-                'sectionListContinuation': extract_entries,  # for feeds
-            }
-            continuation_contents = try_get(
-                response, lambda x: x['continuationContents'], dict) or {}
-            continuation_renderer = None
-            for key, value in continuation_contents.items():
-                if key not in known_continuation_renderers:
-                    continue
-                continuation_renderer = value
-                continuation_list = [None]
-                yield from known_continuation_renderers[key](continuation_renderer)
-                continuation = continuation_list[0] or self._extract_continuation(continuation_renderer)
-                break
-            if continuation_renderer:
-                continue
-
              known_renderers = {
                  'videoRenderer': (self._grid_entries, 'items'),  # for membership tab
                  'gridPlaylistRenderer': (self._grid_entries, 'items'),
@@ -4297,24 +4540,32 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
                  'playlistVideoRenderer': (self._playlist_entries, 'contents'),
                  'itemSectionRenderer': (extract_entries, 'contents'),  # for feeds
                  'richItemRenderer': (extract_entries, 'contents'),  # for hashtag
-                'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents')
+                'backstagePostThreadRenderer': (self._post_thread_continuation_entries, 'contents'),
+                'reportHistoryTableRowRenderer': (self._report_history_entries, 'rows'),
+                'playlistVideoListContinuation': (self._playlist_entries, None),
+                'gridContinuation': (self._grid_entries, None),
+                'itemSectionContinuation': (self._post_thread_continuation_entries, None),
+                'sectionListContinuation': (extract_entries, None),  # for feeds
              }
-            on_response_received = dict_get(response, ('onResponseReceivedActions', 'onResponseReceivedEndpoints'))
-            continuation_items = try_get(
-                on_response_received, lambda x: x[0]['appendContinuationItemsAction']['continuationItems'], list)
-            continuation_item = try_get(continuation_items, lambda x: x[0], dict) or {}
+
+            continuation_items = traverse_obj(response, (
+                ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ...,
+                'appendContinuationItemsAction', 'continuationItems'
+            ), 'continuationContents', get_all=False)
+            continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={})
+
              video_items_renderer = None
-            for key, value in continuation_item.items():
+            for key in continuation_item.keys():
                  if key not in known_renderers:
                      continue
-                video_items_renderer = {known_renderers[key][1]: continuation_items}
+                func, parent_key = known_renderers[key]
+                video_items_renderer = {parent_key: continuation_items} if parent_key else continuation_items
                  continuation_list = [None]
-                yield from known_renderers[key][0](video_items_renderer)
+                yield from func(video_items_renderer)
                  continuation = continuation_list[0] or self._extract_continuation(video_items_renderer)
+
+            if not video_items_renderer:
                  break
-            if video_items_renderer:
-                continue
-            break
  
      @staticmethod
      def _extract_selected_tab(tabs, fatal=True):
@@ -4340,7 +4591,7 @@ def _extract_uploader(self, data):
              uploader['uploader_url'] = urljoin(
                  'https://www.youtube.com/',
                  try_get(owner, lambda x: x['navigationEndpoint']['browseEndpoint']['canonicalBaseUrl'], str))
-        return {k: v for k, v in uploader.items() if v is not None}
+        return filter_dict(uploader)
  
      def _extract_from_tabs(self, item_id, ytcfg, data, tabs):
          playlist_id = title = description = channel_url = channel_name = channel_id = None
@@ -4400,7 +4651,7 @@ def _get_uncropped(url):
              playlist_id = item_id
  
          playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
-        last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
+        last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
          if title is None:
              title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
          title += format_field(selected_tab, 'title', ' - %s')
@@ -4494,31 +4745,37 @@ def _extract_availability(self, data):
          Note: Unless YouTube tells us explicitly, we do not assume it is public
          @param data: response
          """
-        is_private = is_unlisted = None
          renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
-        badge_labels = self._extract_badges(renderer)
+
+        player_header_privacy = traverse_obj(
+            data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
+
+        badges = self._extract_badges(renderer)
  
          # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
-        privacy_dropdown_entries = try_get(
-            renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
-        for renderer_dict in privacy_dropdown_entries:
-            is_selected = try_get(
-                renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
-            if not is_selected:
-                continue
-            label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
-            if label:
-                badge_labels.add(label.lower())
-                break
+        privacy_setting_icon = traverse_obj(
+            renderer, (
+                'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
+                lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
+            get_all=False, expected_type=str)
  
-        for badge_label in badge_labels:
-            if badge_label == 'unlisted':
-                is_unlisted = True
-            elif badge_label == 'private':
-                is_private = True
-            elif badge_label == 'public':
-                is_unlisted = is_private = False
-        return self._availability(is_private, False, False, False, is_unlisted)
+        return (
+            'public' if (
+                self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+                or player_header_privacy == 'PUBLIC'
+                or privacy_setting_icon == 'PRIVACY_PUBLIC')
+            else self._availability(
+                is_private=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+                    or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
+                    else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
+                is_unlisted=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+                    or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
+                    else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
+                needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+                needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+                needs_auth=False))
  
      @staticmethod
      def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
@@ -4794,6 +5051,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
              'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
              'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+            'availability': 'public',
          },
          'playlist_count': 1,
      }, {
@@ -4811,6 +5069,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
              'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
              'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+            'availability': 'public',
          },
          'playlist_count': 0,
      }, {
@@ -4957,6 +5216,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
              'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
              'channel': 'Christiaan008',
+            'availability': 'public',
          },
          'playlist_count': 96,
      }, {
@@ -4975,6 +5235,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'view_count': int,
              'description': '',
              'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+            'availability': 'public',
          },
          'playlist_mincount': 1123,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -4998,6 +5259,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel': 'Interstellar Movie',
              'description': '',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'playlist_mincount': 21,
      }, {
@@ -5016,6 +5278,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
              'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'playlist_mincount': 200,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5035,6 +5298,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/c/blanktv',
              'modified_date': r're:\d{8}',
              'description': '',
+            'availability': 'public',
          },
          'playlist_mincount': 1000,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5053,6 +5317,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
              'channel_url': 'https://www.youtube.com/user/Computerphile',
              'channel': 'Computerphile',
+            'availability': 'public',
          },
          'playlist_mincount': 11,
      }, {
@@ -5218,6 +5483,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
              'tags': [],
              'channel': 'NoCopyrightSounds',
+            'availability': 'public',
          },
          'playlist_mincount': 166,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5238,6 +5504,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'modified_date': r're:\d{8}',
              'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
              'description': '',
+            'availability': 'public',
          },
          'expected_warnings': [
              'The URL does not have a videos tab',
@@ -5338,6 +5605,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel': 'Royalty Free Music - Topic',
              'view_count': int,
              'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
+            'availability': 'public',
          },
          'expected_warnings': [
              'does not have a videos tab',
@@ -5371,6 +5639,55 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
          },
          'playlist_mincount': 2
+    }, {
+        'note': 'translated tab name',
+        'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
+        'info_dict': {
+            'id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'tags': [],
+            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'description': '',
+            'title': 'cole-dlp-test-acc - 再生リスト',
+            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'uploader': 'cole-dlp-test-acc',
+            'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel': 'cole-dlp-test-acc',
+        },
+        'playlist_mincount': 1,
+        'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+        'expected_warnings': ['Preferring "ja"'],
+    }, {
+        # XXX: this should really check flat playlist entries, but the test suite doesn't support that
+        'note': 'preferred lang set with playlist with translated video titles',
+        'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+        'info_dict': {
+            'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+            'tags': [],
+            'view_count': int,
+            'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'uploader': 'cole-dlp-test-acc',
+            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel': 'cole-dlp-test-acc',
+            'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'description': 'test',
+            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'title': 'dlp test playlist',
+            'availability': 'public',
+        },
+        'playlist_mincount': 1,
+        'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+        'expected_warnings': ['Preferring "ja"'],
+    }, {
+        # shorts audio pivot for 2GtVksBMYFM.
+        'url': 'https://www.youtube.com/feed/sfv_audio_pivot?bp=8gUrCikSJwoLMkd0VmtzQk1ZRk0SCzJHdFZrc0JNWUZNGgsyR3RWa3NCTVlGTQ==',
+        'info_dict': {
+            'id': 'sfv_audio_pivot',
+            'title': 'sfv_audio_pivot',
+            'tags': [],
+        },
+        'playlist_mincount': 50,
+
      }]
  
      @classmethod
@@ -5455,10 +5772,20 @@ def get_mobj(url):
          tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
          if tabs:
              selected_tab = self._extract_selected_tab(tabs)
-            selected_tab_name = selected_tab.get('title', '').lower()
+            selected_tab_url = urljoin(
+                url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
+            translated_tab_name = selected_tab.get('title', '').lower()
+
+            # Prefer tab name from tab url as it is always in en,
+            # but only when preferred lang is set as it may not extract reliably in all cases.
+            selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
+                                 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:]  # primary
+                                 or translated_tab_name)
+
              if selected_tab_name == 'home':
                  selected_tab_name = 'featured'
              requested_tab_name = mobj['tab'][1:]
+
              if 'no-youtube-channel-redirect' not in compat_opts:
                  if requested_tab_name == 'live':  # Live tab should have redirected to the video
                      raise UserNotLive(video_id=mobj['id'])
@@ -5570,6 +5897,7 @@ class YoutubePlaylistIE(InfoExtractor):
              'channel': 'milan',
              'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
              'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
+            'availability': 'public',
          },
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }, {
@@ -5588,6 +5916,7 @@ class YoutubePlaylistIE(InfoExtractor):
              'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
              'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }, {
@@ -5776,7 +6105,7 @@ def _extract_notification_renderer(self, notification):
          title = self._search_regex(
              rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
              'video title', default=None)
-        upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
+        upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
                         if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
                         else None)
          return {
@@ -6026,6 +6355,30 @@ def _real_extract(self, url):
              ie=YoutubeTabIE, video_id=playlist_id)
  
  
+class YoutubeShortsAudioPivotIE(InfoExtractor):
+    IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
+    IE_NAME = 'youtube:shorts:pivot:audio'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/source/(?P<id>[\w-]{11})/shorts'
+    _TESTS = [{
+        'url': 'https://www.youtube.com/source/Lyj-MZSAA9o/shorts',
+        'only_matching': True,
+    }]
+
+    @staticmethod
+    def _generate_audio_pivot_params(video_id):
+        """
+        Generates sfv_audio_pivot browse params for this video id
+        """
+        pb_params = b'\xf2\x05+\n)\x12\'\n\x0b%b\x12\x0b%b\x1a\x0b%b' % ((video_id.encode(),) * 3)
+        return urllib.parse.quote(base64.b64encode(pb_params).decode())
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        return self.url_result(
+            f'https://www.youtube.com/feed/sfv_audio_pivot?bp={self._generate_audio_pivot_params(video_id)}',
+            ie=YoutubeTabIE)
+
+
  class YoutubeTruncatedURLIE(InfoExtractor):
      IE_NAME = 'youtube:truncated_url'
      IE_DESC = False  # Do not list