[ie/generic] Improve direct video link ext detection (#8340)

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 654bf5e6b63d2fc7473b13fd7b75dbfc0730ed19..ac28ed7d282ea0c91a0c6427a6d3c4a65ee2dbe7 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -15,13 +15,13 @@
  import threading
  import time
  import traceback
-import urllib.error
  import urllib.parse
  
  from .common import InfoExtractor, SearchInfoExtractor
  from .openload import PhantomJSwrapper
  from ..compat import functools
  from ..jsinterp import JSInterpreter
+from ..networking.exceptions import HTTPError, network_exceptions
  from ..utils import (
      NO_DEFAULT,
      ExtractorError,
@@ -41,7 +41,6 @@
      join_nonempty,
      js_to_json,
      mimetype2ext,
-    network_exceptions,
      orderedSet,
      parse_codecs,
      parse_count,
@@ -258,7 +257,7 @@ def build_innertube_clients():
      THIRD_PARTY = {
          'embedUrl': 'https://www.youtube.com/',  # Can be any valid URL
      }
-    BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
+    BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
      priority = qualities(BASE_CLIENTS[::-1])
  
      for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -292,6 +291,7 @@ class BadgeType(enum.Enum):
      AVAILABILITY_PREMIUM = enum.auto()
      AVAILABILITY_SUBSCRIPTION = enum.auto()
      LIVE_NOW = enum.auto()
+    VERIFIED = enum.auto()
  
  
  class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -496,16 +496,10 @@ def _initialize_consent(self):
          cookies = self._get_cookies('https://www.youtube.com/')
          if cookies.get('__Secure-3PSID'):
              return
-        consent_id = None
-        consent = cookies.get('CONSENT')
-        if consent:
-            if 'YES' in consent.value:
-                return
-            consent_id = self._search_regex(
-                r'PENDING\+(\d+)', consent.value, 'consent', default=None)
-        if not consent_id:
-            consent_id = random.randint(100, 999)
-        self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id)
+        socs = cookies.get('SOCS')
+        if socs and not socs.value.startswith('CAA'):  # not consented
+            return
+        self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True)  # accept all (required for mixes)
  
      def _initialize_pref(self):
          cookies = self._get_cookies('https://www.youtube.com/')
@@ -791,17 +785,26 @@ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
      def _extract_and_report_alerts(self, data, *args, **kwargs):
          return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
  
-    def _extract_badges(self, renderer: dict):
-        privacy_icon_map = {
+    def _extract_badges(self, badge_list: list):
+        """
+        Extract known BadgeType's from a list of badge renderers.
+        @returns [{'type': BadgeType}]
+        """
+        icon_type_map = {
              'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
              'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
-            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
+            'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
+            'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
+            'CHECK': BadgeType.VERIFIED,
          }
  
          badge_style_map = {
              'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
              'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
-            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
+            'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
+            'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
          }
  
          label_map = {
@@ -809,13 +812,15 @@ def _extract_badges(self, renderer: dict):
              'private': BadgeType.AVAILABILITY_PRIVATE,
              'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
              'live': BadgeType.LIVE_NOW,
-            'premium': BadgeType.AVAILABILITY_PREMIUM
+            'premium': BadgeType.AVAILABILITY_PREMIUM,
+            'verified': BadgeType.VERIFIED,
+            'official artist channel': BadgeType.VERIFIED,
          }
  
          badges = []
-        for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
+        for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
              badge_type = (
-                privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+                icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
                  or badge_style_map.get(traverse_obj(badge, 'style'))
              )
              if badge_type:
@@ -823,11 +828,12 @@ def _extract_badges(self, renderer: dict):
                  continue
  
              # fallback, won't work in some languages
-            label = traverse_obj(badge, 'label', expected_type=str, default='')
+            label = traverse_obj(
+                badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
              for match, label_badge_type in label_map.items():
                  if match in label.lower():
-                    badges.append({'type': badge_type})
-                    continue
+                    badges.append({'type': label_badge_type})
+                    break
  
          return badges
  
@@ -893,9 +899,16 @@ def _extract_thumbnails(data, *path_list):
      def extract_relative_time(relative_time_text):
          """
          Extracts a relative time from string and converts to dt object
-        e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today'
+        e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago'
          """
-        mobj = re.search(r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>microsecond|second|minute|hour|day|week|month|year)s?\s*ago', relative_time_text)
+
+        # XXX: this could be moved to a general function in utils/_utils.py
+        # The relative time text strings are roughly the same as what
+        # Javascript's Intl.RelativeTimeFormat function generates.
+        # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat
+        mobj = re.search(
+            r'(?P<start>today|yesterday|now)|(?P<time>\d+)\s*(?P<unit>sec(?:ond)?|s|min(?:ute)?|h(?:our|r)?|d(?:ay)?|w(?:eek|k)?|mo(?:nth)?|y(?:ear|r)?)s?\s*ago',
+            relative_time_text)
          if mobj:
              start = mobj.group('start')
              if start:
@@ -928,7 +941,16 @@ def _parse_time_text(self, text):
      def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                            ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
                            default_client='web'):
-        for retry in self.RetryManager():
+        raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE))
+        # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal.
+        icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete))
+        icd_rm = next(icd_retries)
+        main_retries = iter(self.RetryManager())
+        main_rm = next(main_retries)
+        # Manual retry loop for multiple RetryManagers
+        # The proper RetryManager MUST be advanced after an error
+        # and its result MUST be checked if the manager is non fatal
+        while True:
              try:
                  response = self._call_api(
                      ep=ep, fatal=True, headers=headers,
@@ -939,40 +961,46 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers
              except ExtractorError as e:
                  if not isinstance(e.cause, network_exceptions):
                      return self._error_or_warning(e, fatal=fatal)
-                elif not isinstance(e.cause, urllib.error.HTTPError):
-                    retry.error = e
+                elif not isinstance(e.cause, HTTPError):
+                    main_rm.error = e
+                    next(main_retries)
                      continue
  
-                first_bytes = e.cause.read(512)
+                first_bytes = e.cause.response.read(512)
                  if not is_html(first_bytes):
                      yt_error = try_get(
                          self._parse_json(
-                            self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
+                            self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False),
                          lambda x: x['error']['message'], str)
                      if yt_error:
                          self._report_alerts([('ERROR', yt_error)], fatal=False)
                  # Downloading page may result in intermittent 5xx HTTP error
-                # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289
+                # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289
                  # We also want to catch all other network exceptions since errors in later pages can be troublesome
                  # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210
-                if e.cause.code not in (403, 429):
-                    retry.error = e
+                if e.cause.status not in (403, 429):
+                    main_rm.error = e
+                    next(main_retries)
                      continue
                  return self._error_or_warning(e, fatal=fatal)
  
              try:
                  self._extract_and_report_alerts(response, only_once=True)
              except ExtractorError as e:
-                # YouTube servers may return errors we want to retry on in a 200 OK response
+                # YouTube's servers may return errors we want to retry on in a 200 OK response
                  # See: https://github.com/yt-dlp/yt-dlp/issues/839
                  if 'unknown error' in e.msg.lower():
-                    retry.error = e
+                    main_rm.error = e
+                    next(main_retries)
                      continue
                  return self._error_or_warning(e, fatal=fatal)
              # Youtube sometimes sends incomplete data
              # See: https://github.com/ytdl-org/youtube-dl/issues/28194
              if not traverse_obj(response, *variadic(check_get_keys)):
-                retry.error = ExtractorError('Incomplete data received', expected=True)
+                icd_rm.error = ExtractorError('Incomplete data received', expected=True)
+                should_retry = next(icd_retries, None)
+                if not should_retry:
+                    return None
                  continue
  
              return response
@@ -1013,8 +1041,8 @@ def _extract_video(self, renderer):
          overlay_style = traverse_obj(
              renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
              get_all=False, expected_type=str)
-        badges = self._extract_badges(renderer)
-
+        badges = self._extract_badges(traverse_obj(renderer, 'badges'))
+        owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
          navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
              renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
              expected_type=str)) or ''
@@ -1038,6 +1066,13 @@ def _extract_video(self, renderer):
                        else self._get_count({'simpleText': view_count_text}))
          view_count_field = 'concurrent_view_count' if live_status in ('is_live', 'is_upcoming') else 'view_count'
  
+        channel = (self._get_text(renderer, 'ownerText', 'shortBylineText')
+                   or self._get_text(reel_header_renderer, 'channelTitleText'))
+
+        channel_handle = traverse_obj(renderer, (
+            'shortBylineText', 'runs', ..., 'navigationEndpoint',
+            (('commandMetadata', 'webCommandMetadata', 'url'), ('browseEndpoint', 'canonicalBaseUrl'))),
+            expected_type=self.handle_from_url, get_all=False)
          return {
              '_type': 'url',
              'ie_key': YoutubeIE.ie_key(),
@@ -1047,9 +1082,11 @@ def _extract_video(self, renderer):
              'description': description,
              'duration': duration,
              'channel_id': channel_id,
-            'channel': (self._get_text(renderer, 'ownerText', 'shortBylineText')
-                        or self._get_text(reel_header_renderer, 'channelTitleText')),
+            'channel': channel,
              'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None,
+            'uploader': channel,
+            'uploader_id': channel_handle,
+            'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
              'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
              'timestamp': (self._parse_time_text(time_text)
                            if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE)
@@ -1063,7 +1100,8 @@ def _extract_video(self, renderer):
                      needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
                      is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
              view_count_field: view_count,
-            'live_status': live_status
+            'live_status': live_status,
+            'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
          }
  
  
@@ -1273,6 +1311,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Philipp Hagemeister',
                  'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                  'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
              }
          },
          {
@@ -1315,6 +1354,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Philipp Hagemeister',
                  'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                  'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -1398,6 +1438,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'The Witcher',
                  'uploader_url': 'https://www.youtube.com/@thewitcher',
                  'uploader_id': '@thewitcher',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
          },
          {
@@ -1426,6 +1469,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'FlyingKitty',
                  'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
                  'uploader_id': '@FlyingKitty900',
+                'comment_count': int,
+                'channel_is_verified': True,
              },
          },
          {
@@ -1559,6 +1604,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Olympics',
                  'uploader_url': 'https://www.youtube.com/@Olympics',
                  'uploader_id': '@Olympics',
+                'channel_is_verified': True,
              },
              'params': {
                  'skip_download': 'requires avconv',
@@ -1876,6 +1922,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Bernie Sanders',
                  'uploader_url': 'https://www.youtube.com/@BernieSanders',
                  'uploader_id': '@BernieSanders',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -1937,6 +1985,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Vsauce',
                  'uploader_url': 'https://www.youtube.com/@Vsauce',
                  'uploader_id': '@Vsauce',
+                'comment_count': int,
+                'channel_is_verified': True,
              },
              'params': {
                  'skip_download': True,
@@ -2129,6 +2179,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'kudvenkat',
                  'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
                  'uploader_id': '@Csharp-video-tutorialsBlogspot',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -2209,6 +2261,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'CBS Mornings',
                  'uploader_url': 'https://www.youtube.com/@CBSMornings',
                  'uploader_id': '@CBSMornings',
+                'comment_count': int,
+                'channel_is_verified': True,
              }
          },
          {
@@ -2279,6 +2333,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'colinfurze',
                  'uploader_url': 'https://www.youtube.com/@colinfurze',
                  'uploader_id': '@colinfurze',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'format': '17',  # 3gp format available on android
@@ -2324,6 +2381,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'SciShow',
                  'uploader_url': 'https://www.youtube.com/@SciShow',
                  'uploader_id': '@SciShow',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              }, 'params': {'format': 'mhtml', 'skip_download': True}
          }, {
              # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2352,6 +2412,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Leon Nguyen',
                  'uploader_url': 'https://www.youtube.com/@LeonNguyen',
                  'uploader_id': '@LeonNguyen',
+                'heatmap': 'count:100',
              }
          }, {
              # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
@@ -2380,6 +2441,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Leon Nguyen',
                  'uploader_url': 'https://www.youtube.com/@LeonNguyen',
                  'uploader_id': '@LeonNguyen',
+                'heatmap': 'count:100',
              },
              'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
          }, {
@@ -2410,6 +2472,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Quackity',
                  'uploader_id': '@Quackity',
                  'uploader_url': 'https://www.youtube.com/@Quackity',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              }
          },
          {   # continuous livestream. Microformat upload date should be preferred.
@@ -2442,29 +2507,6 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader_id': '@abaointokyo',
              },
              'params': {'skip_download': True}
-        }, {
-            # Story. Requires specific player params to work.
-            'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI',
-            'info_dict': {
-                'id': 'vv8qTUWmulI',
-                'ext': 'mp4',
-                'availability': 'unlisted',
-                'view_count': int,
-                'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA',
-                'upload_date': '20220526',
-                'categories': ['Education'],
-                'title': 'Story',
-                'channel': 'IT\'S HISTORY',
-                'description': '',
-                'duration': 12,
-                'playable_in_embed': True,
-                'age_limit': 0,
-                'live_status': 'not_live',
-                'tags': [],
-                'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp',
-                'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA',
-            },
-            'skip': 'stories get removed after some period of time',
          }, {
              'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA',
              'info_dict': {
@@ -2576,6 +2618,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'MrBeast',
                  'uploader_url': 'https://www.youtube.com/@MrBeast',
                  'uploader_id': '@MrBeast',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
          }, {
@@ -2637,6 +2682,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'さなちゃんねる',
                  'uploader_url': 'https://www.youtube.com/@sana_natori',
                  'uploader_id': '@sana_natori',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
          },
          {
@@ -2666,6 +2713,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'thumbnail': r're:^https?://.*\.webp',
                  'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
                  'playable_in_embed': True,
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
@@ -2702,6 +2752,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Christopher Sykes',
                  'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
                  'uploader_id': '@ChristopherSykesDocumentaries',
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -2794,7 +2845,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate):
              # Obtain from MPD's maximum seq value
              old_mpd_url = mpd_url
              last_error = ctx.pop('last_error', None)
-            expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403
+            expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403
              mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000)
                                                 or (mpd_url, stream_number, False))
              if not refresh_sequence:
@@ -3074,7 +3125,7 @@ def _extract_n_function_name(self, jscode):
              return funcname
  
          return json.loads(js_to_json(self._search_regex(
-            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode,
              f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
  
      def _extract_n_function_code(self, video_id, player_url):
@@ -3244,42 +3295,64 @@ def _extract_chapters_from_engagement_panel(self, data, duration):
                                            chapter_time, chapter_title, duration)
              for contents in content_list)), [])
  
+    def _extract_heatmap(self, data):
+        return traverse_obj(data, (
+            'frameworkUpdates', 'entityBatchUpdate', 'mutations',
+            lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP',
+            'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., {
+                'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}),
+                'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000},
+                'value': ('intensityScoreNormalized', {float_or_none}),
+            })) or None
+
      def _extract_comment(self, comment_renderer, parent=None):
          comment_id = comment_renderer.get('commentId')
          if not comment_id:
              return
  
-        text = self._get_text(comment_renderer, 'contentText')
+        info = {
+            'id': comment_id,
+            'text': self._get_text(comment_renderer, 'contentText'),
+            'like_count': self._get_count(comment_renderer, 'voteCount'),
+            'author_id': traverse_obj(comment_renderer, ('authorEndpoint', 'browseEndpoint', 'browseId', {self.ucid_or_none})),
+            'author': self._get_text(comment_renderer, 'authorText'),
+            'author_thumbnail': traverse_obj(comment_renderer, ('authorThumbnail', 'thumbnails', -1, 'url', {url_or_none})),
+            'parent': parent or 'root',
+        }
  
          # Timestamp is an estimate calculated from the current time and time_text
          time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
          timestamp = self._parse_time_text(time_text)
  
-        author = self._get_text(comment_renderer, 'authorText')
-        author_id = try_get(comment_renderer,
-                            lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
+        info.update({
+            # FIXME: non-standard, but we need a way of showing that it is an estimate.
+            '_time_text': time_text,
+            'timestamp': timestamp,
+        })
  
-        votes = parse_count(try_get(comment_renderer, (lambda x: x['voteCount']['simpleText'],
-                                                       lambda x: x['likeCount']), str)) or 0
-        author_thumbnail = try_get(comment_renderer,
-                                   lambda x: x['authorThumbnail']['thumbnails'][-1]['url'], str)
+        info['author_url'] = urljoin(
+            'https://www.youtube.com', traverse_obj(comment_renderer, ('authorEndpoint', (
+                ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'))),
+                expected_type=str, get_all=False))
  
-        author_is_uploader = try_get(comment_renderer, lambda x: x['authorIsChannelOwner'], bool)
-        is_favorited = 'creatorHeart' in (try_get(
-            comment_renderer, lambda x: x['actionButtons']['commentActionButtonsRenderer'], dict) or {})
-        return {
-            'id': comment_id,
-            'text': text,
-            'timestamp': timestamp,
-            'time_text': time_text,
-            'like_count': votes,
-            'is_favorited': is_favorited,
-            'author': author,
-            'author_id': author_id,
-            'author_thumbnail': author_thumbnail,
-            'author_is_uploader': author_is_uploader,
-            'parent': parent or 'root'
-        }
+        author_is_uploader = traverse_obj(comment_renderer, 'authorIsChannelOwner')
+        if author_is_uploader is not None:
+            info['author_is_uploader'] = author_is_uploader
+
+        comment_abr = traverse_obj(
+            comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
+        if comment_abr is not None:
+            info['is_favorited'] = 'creatorHeart' in comment_abr
+
+        badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
+        if self._has_badge(badges, BadgeType.VERIFIED):
+            info['author_is_verified'] = True
+
+        is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
+        if is_pinned:
+            info['is_pinned'] = True
+
+        return info
  
      def _comment_entries(self, root_continuation_data, ytcfg, video_id, parent=None, tracker=None):
  
@@ -3292,7 +3365,7 @@ def extract_header(contents):
                  expected_comment_count = self._get_count(
                      comments_header_renderer, 'countText', 'commentsCount')
  
-                if expected_comment_count:
+                if expected_comment_count is not None:
                      tracker['est_total'] = expected_comment_count
                      self.to_screen(f'Downloading ~{expected_comment_count} comments')
                  comment_sort_index = int(get_single_config_arg('comment_sort') != 'top')  # 1 = new, 0 = top
@@ -3327,18 +3400,19 @@ def extract_thread(contents):
                  comment = self._extract_comment(comment_renderer, parent)
                  if not comment:
                      continue
-                is_pinned = bool(traverse_obj(comment_renderer, 'pinnedCommentBadge'))
                  comment_id = comment['id']
-                if is_pinned:
+                if comment.get('is_pinned'):
                      tracker['pinned_comment_ids'].add(comment_id)
                  # Sometimes YouTube may break and give us infinite looping comments.
                  # See: https://github.com/yt-dlp/yt-dlp/issues/6290
                  if comment_id in tracker['seen_comment_ids']:
-                    if comment_id in tracker['pinned_comment_ids'] and not is_pinned:
+                    if comment_id in tracker['pinned_comment_ids'] and not comment.get('is_pinned'):
                          # Pinned comments may appear a second time in newest first sort
                          # See: https://github.com/yt-dlp/yt-dlp/issues/6712
                          continue
-                    self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+                    self.report_warning(
+                        'Detected YouTube comments looping. Stopping comment extraction '
+                        f'{"for this thread" if parent else ""} as we probably cannot get any more.')
                      yield
                  else:
                      tracker['seen_comment_ids'].add(comment['id'])
@@ -3363,7 +3437,7 @@ def extract_thread(contents):
          if not tracker:
              tracker = dict(
                  running_total=0,
-                est_total=0,
+                est_total=None,
                  current_page_thread=0,
                  total_parent_comments=0,
                  total_reply_comments=0,
@@ -3396,11 +3470,13 @@ def extract_thread(contents):
              continuation = self._build_api_continuation_query(self._generate_comment_continuation(video_id))
              is_forced_continuation = True
  
+        continuation_items_path = (
+            'onResponseReceivedEndpoints', ..., ('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems')
          for page_num in itertools.count(0):
              if not continuation:
                  break
              headers = self.generate_api_headers(ytcfg=ytcfg, visitor_data=self._extract_visitor_data(response))
-            comment_prog_str = f"({tracker['running_total']}/{tracker['est_total']})"
+            comment_prog_str = f"({tracker['running_total']}/~{tracker['est_total']})"
              if page_num == 0:
                  if is_first_continuation:
                      note_prefix = 'Downloading comment section API JSON'
@@ -3411,31 +3487,37 @@ def extract_thread(contents):
                  note_prefix = '%sDownloading comment%s API JSON page %d %s' % (
                      '       ' if parent else '', ' replies' if parent else '',
                      page_num, comment_prog_str)
+
+            # Do a deep check for incomplete data as sometimes YouTube may return no comments for a continuation
+            # Ignore check if YouTube says the comment count is 0.
+            check_get_keys = None
+            if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0):
+                check_get_keys = [[*continuation_items_path, ..., (
+                    'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]]
              try:
                  response = self._extract_response(
                      item_id=None, query=continuation,
                      ep='next', ytcfg=ytcfg, headers=headers, note=note_prefix,
-                    check_get_keys='onResponseReceivedEndpoints' if not is_forced_continuation else None)
+                    check_get_keys=check_get_keys)
              except ExtractorError as e:
                  # Ignore incomplete data error for replies if retries didn't work.
                  # This is to allow any other parent comments and comment threads to be downloaded.
                  # See: https://github.com/yt-dlp/yt-dlp/issues/4669
-                if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
-                    self.report_warning(
-                        'Received incomplete data for a comment reply thread and retrying did not help. '
-                        'Ignoring to let other comments be downloaded.')
-                else:
-                    raise
+                if 'incomplete data' in str(e).lower() and parent:
+                    if self.get_param('ignoreerrors') in (True, 'only_download'):
+                        self.report_warning(
+                            'Received incomplete data for a comment reply thread and retrying did not help. '
+                            'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+                        return
+                    else:
+                        raise ExtractorError(
+                            'Incomplete data received for comment reply thread. '
+                            'Pass --ignore-errors to ignore and allow rest of comments to download.',
+                            expected=True)
+                raise
              is_forced_continuation = False
-            continuation_contents = traverse_obj(
-                response, 'onResponseReceivedEndpoints', expected_type=list, default=[])
-
              continuation = None
-            for continuation_section in continuation_contents:
-                continuation_items = traverse_obj(
-                    continuation_section,
-                    (('reloadContinuationItemsCommand', 'appendContinuationItemsAction'), 'continuationItems'),
-                    get_all=False, expected_type=list) or []
+            for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
                  if is_first_continuation:
                      continuation = extract_header(continuation_items)
                      is_first_continuation = False
@@ -3509,8 +3591,6 @@ def _is_agegated(player_response):
      def _is_unplayable(player_response):
          return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
  
-    _STORY_PLAYER_PARAMS = '8AEB'
-
      def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
  
          session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
@@ -3522,8 +3602,12 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
          yt_query = {
              'videoId': video_id,
          }
-        if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
-            yt_query['params'] = self._STORY_PLAYER_PARAMS
+        if _split_innertube_client(client)[0] == 'android':
+            yt_query['params'] = 'CgIQBg=='
+
+        pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0]
+        if pp_arg:
+            yt_query['params'] = pp_arg
  
          yt_query.update(self._generate_player_context(sts))
          return self._extract_response(
@@ -3535,7 +3619,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
  
      def _get_requested_clients(self, url, smuggled_data):
          requested_clients = []
-        default = ['android', 'web']
+        default = ['ios', 'android', 'web']
          allowed_clients = sorted(
              (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
              key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@@ -3647,7 +3731,7 @@ def append_client(*client_names):
  
      def _needs_live_processing(self, live_status, duration):
          if (live_status == 'is_live' and self.get_param('live_from_start')
-                or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+                or live_status == 'post_live' and (duration or 0) > 2 * 3600):
              return live_status
  
      def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
@@ -3662,7 +3746,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
              'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
          ])
          streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
-        all_formats = self._configuration_arg('include_duplicate_formats')
+        format_types = self._configuration_arg('formats')
+        all_formats = 'duplicate' in format_types
+        if self._configuration_arg('include_duplicate_formats'):
+            all_formats = True
+            self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+                                                'Use formats=duplicate extractor argument instead')
  
          def build_fragments(f):
              return LazyList({
@@ -3758,6 +3847,8 @@ def build_fragments(f):
                      f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
  
              client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+            name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+            fps = int_or_none(fmt.get('fps')) or 0
              dct = {
                  'asr': int_or_none(fmt.get('audioSampleRate')),
                  'filesize': int_or_none(fmt.get('contentLength')),
@@ -3765,16 +3856,16 @@ def build_fragments(f):
                  'format_note': join_nonempty(
                      join_nonempty(audio_track.get('displayName'),
                                    language_preference > 0 and ' (default)', delim=''),
-                    fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
-                    fmt.get('isDrc') and 'DRC',
+                    name, fmt.get('isDrc') and 'DRC',
                      try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                      try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
                      throttled and 'THROTTLED', is_damaged and 'DAMAGED',
                      (self.get_param('verbose') or all_formats) and client_name,
                      delim=', '),
                  # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
-                'source_preference': -10 if throttled else -5 if itag == '22' else -1,
-                'fps': int_or_none(fmt.get('fps')) or None,
+                'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+                                      + (100 if 'Premium' in name else 0)),
+                'fps': fps if fps > 1 else None,  # For some formats, fps is wrongly returned as 1
                  'audio_channels': fmt.get('audioChannels'),
                  'height': height,
                  'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
@@ -3800,18 +3891,23 @@ def build_fragments(f):
              if single_stream and dct.get('ext'):
                  dct['container'] = dct['ext'] + '_dash'
  
-            if all_formats and dct['filesize']:
+            if (all_formats or 'dashy' in format_types) and dct['filesize']:
                  yield {
                      **dct,
                      'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
                      'protocol': 'http_dash_segments',
                      'fragments': build_fragments(dct),
                  }
-            dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
-            yield dct
+            if all_formats or 'dashy' not in format_types:
+                dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+                yield dct
  
          needs_live_processing = self._needs_live_processing(live_status, duration)
-        skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+        skip_bad_formats = 'incomplete' not in format_types
+        if self._configuration_arg('include_incomplete_formats'):
+            skip_bad_formats = False
+            self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+                                                'Use formats=incomplete extractor argument instead')
  
          skip_manifests = set(self._configuration_arg('skip'))
          if (not self.get_param('youtube_include_hls_manifest', True)
@@ -3823,7 +3919,7 @@ def build_fragments(f):
              skip_manifests.add('dash')
          if self._configuration_arg('include_live_dash'):
              self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
-                                                'Use include_incomplete_formats extractor argument instead')
+                                                'Use formats=incomplete extractor argument instead')
          elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
              skip_manifests.add('dash')
  
@@ -3840,11 +3936,24 @@ def process_manifest_format(f, proto, client_name, itag):
              elif itag:
                  f['format_id'] = itag
  
+            if f.get('source_preference') is None:
+                f['source_preference'] = -1
+
+            if itag in ('616', '235'):
+                f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+                f['source_preference'] += 100
+
              f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
              if f['quality'] == -1 and f.get('height'):
                  f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
-            if self.get_param('verbose'):
+            if self.get_param('verbose') or all_formats:
                  f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
+            if f.get('fps') and f['fps'] <= 1:
+                del f['fps']
+
+            if proto == 'hls' and f.get('has_drm'):
+                f['has_drm'] = 'maybe'
+                f['source_preference'] -= 5
              return True
  
          subtitles = {}
@@ -3917,8 +4026,9 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
          webpage = None
          if 'webpage' not in self._configuration_arg('player_skip'):
              query = {'bpctr': '9999999999', 'has_verified': '1'}
-            if smuggled_data.get('is_story'):
-                query['pp'] = self._STORY_PLAYER_PARAMS
+            pp = self._configuration_arg('player_params', [None], casesense=True)[0]
+            if pp:
+                query['pp'] = pp
              webpage = self._download_webpage(
                  webpage_url, video_id, fatal=False, query=query)
  
@@ -3946,6 +4056,10 @@ def _list_formats(self, video_id, microformats, video_details, player_responses,
                         else None)
          streaming_data = traverse_obj(player_responses, (..., 'streamingData'))
          *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration)
+        if all(f.get('has_drm') for f in formats):
+            # If there are no formats that definitely don't have DRM, all have DRM
+            for f in formats:
+                f['has_drm'] = True
  
          return live_broadcast_details, live_status, streaming_data, formats, subtitles
  
@@ -4130,7 +4244,7 @@ def is_bad_format(fmt):
  
          for fmt in filter(is_bad_format, formats):
              fmt['preference'] = (fmt.get('preference') or -1) - 10
-            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
  
          if needs_live_processing:
              self._prepare_live_from_start_formats(
@@ -4222,9 +4336,13 @@ def process_language(container, base_url, lang_code, sub_name, query):
                              continue
                          trans_code += f'-{lang_code}'
                          trans_name += format_field(lang_name, None, ' from %s')
-                    # Add an "-orig" label to the original language so that it can be distinguished.
-                    # The subs are returned without "-orig" as well for compatibility
                      if lang_code == f'a-{orig_trans_code}':
+                        # Set audio language based on original subtitles
+                        for f in formats:
+                            if f.get('acodec') != 'none' and not f.get('language'):
+                                f['language'] = orig_trans_code
+                        # Add an "-orig" label to the original language so that it can be distinguished.
+                        # The subs are returned without "-orig" as well for compatibility
                          process_language(
                              automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
                      # Setting tlang=lang returns damaged subtitles.
@@ -4244,15 +4362,21 @@ def process_language(container, base_url, lang_code, sub_name, query):
                          info[d_k] = parse_duration(query[k][0])
  
          # Youtube Music Auto-generated description
-        if video_description:
+        if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+            # XXX: Causes catastrophic backtracking if description has "·"
+            # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+            # Simulating atomic groups:  (?P<a>[^xy]+)x  =>  (?=(?P<a>[^xy]+))(?P=a)x
+            # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
              mobj = re.search(
                  r'''(?xs)
-                    (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
-                    (?P<album>[^\n]+)
+                    (?=(?P<track>[^\n·]+))(?P=track)·
+                    (?=(?P<artist>[^\n]+))(?P=artist)\n+
+                    (?=(?P<album>[^\n]+))(?P=album)\n
                      (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
                      (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
-                    (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
-                    .+\nAuto-generated\ by\ YouTube\.\s*$
+                    (.+?\nArtist\s*:\s*
+                        (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+                    )?.+\nAuto-generated\ by\ YouTube\.\s*$
                  ''', video_description)
              if mobj:
                  release_year = mobj.group('release_year')
@@ -4313,6 +4437,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
                  or self._extract_chapters_from_description(video_description, duration)
                  or None)
  
+            info['heatmap'] = self._extract_heatmap(initial_data)
+
          contents = traverse_obj(
              initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'),
              expected_type=list, default=[])
@@ -4411,6 +4537,9 @@ def process_language(container, base_url, lang_code, sub_name, query):
                          info['artist'] = mrr_contents_text
                      elif mrr_title == 'Song':
                          info['track'] = mrr_contents_text
+            owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
+            if self._has_badge(owner_badges, BadgeType.VERIFIED):
+                info['channel_is_verified'] = True
  
          info.update({
              'uploader': info.get('channel'),
@@ -4428,7 +4557,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
              and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
          ):
              upload_date = strftime_or_none(
-                self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
+                self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
          info['upload_date'] = upload_date
  
          for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
@@ -4436,7 +4565,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
              if v:
                  info[d_k] = v
  
-        badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+        badges = self._extract_badges(traverse_obj(vpir, 'badges'))
  
          is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
                        or get_first(video_details, 'isPrivate', expected_type=bool))
@@ -4509,13 +4638,14 @@ def _extract_channel_renderer(self, renderer):
          channel_id = self.ucid_or_none(renderer['channelId'])
          title = self._get_text(renderer, 'title')
          channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
-        # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
-        # However we can expect them to change that in the future.
          channel_handle = self.handle_from_url(
              traverse_obj(renderer, (
                  'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
                                         ('browseEndpoint', 'canonicalBaseUrl')),
                  {str}), get_all=False))
+        if not channel_handle:
+            # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
+            channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
          return {
              '_type': 'url',
              'url': channel_url,
@@ -4528,10 +4658,18 @@ def _extract_channel_renderer(self, renderer):
              'title': title,
              'uploader_id': channel_handle,
              'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
-            'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
+            # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
+            # However, in feed/channels this is set correctly to the subscriber count
+            'channel_follower_count': traverse_obj(
+                renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
              'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
-            'playlist_count': self._get_count(renderer, 'videoCountText'),
+            'playlist_count': (
+                # videoCountText may be the subscriber count
+                self._get_count(renderer, 'videoCountText')
+                if self._get_count(renderer, 'subscriberCountText') is not None else None),
              'description': self._get_text(renderer, 'descriptionSnippet'),
+            'channel_is_verified': True if self._has_badge(
+                self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
          }
  
      def _grid_entries(self, grid_renderer):
@@ -4766,7 +4904,8 @@ def _extract_entries(self, parent_renderer, continuation_list):
                      'videoRenderer': lambda x: [self._video_entry(x)],
                      'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
                      'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
-                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
+                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+                    'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
                  }
                  for key, renderer in isr_content.items():
                      if key not in known_renderers:
@@ -4794,10 +4933,15 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data):
              or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {})
          yield from extract_entries(parent_renderer)
          continuation = continuation_list[0]
-
+        seen_continuations = set()
          for page_num in itertools.count(1):
              if not continuation:
                  break
+            continuation_token = continuation.get('continuation')
+            if continuation_token is not None and continuation_token in seen_continuations:
+                self.write_debug('Detected YouTube feed looping - assuming end of feed.')
+                break
+            seen_continuations.add(continuation_token)
              headers = self.generate_api_headers(
                  ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data)
              response = self._extract_response(
@@ -4947,6 +5091,10 @@ def _get_uncropped(url):
                  'uploader_id': channel_handle,
                  'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
              })
+
+        channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
+        if self._has_badge(channel_badges, BadgeType.VERIFIED):
+            info['channel_is_verified'] = True
          # Playlist stats is a text runs array containing [video count, view count, last updated].
          # last updated or (view count and last updated) may be missing.
          playlist_stats = get_first(
@@ -4955,7 +5103,7 @@ def _get_uncropped(url):
          last_updated_unix = self._parse_time_text(
              self._get_text(playlist_stats, 2)  # deprecated, remove when old layout discontinued
              or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
-        info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
+        info['modified_date'] = strftime_or_none(last_updated_unix)
  
          info['view_count'] = self._get_count(playlist_stats, 1)
          if info['view_count'] is None:  # 0 is allowed
@@ -5055,7 +5203,7 @@ def _extract_availability(self, data):
          playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
          player_header_privacy = playlist_header_renderer.get('privacy')
  
-        badges = self._extract_badges(sidebar_renderer)
+        badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
  
          # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
          privacy_setting_icon = get_first(
@@ -5127,7 +5275,7 @@ def _extract_webpage(self, url, item_id, fatal=True):
                  data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {}
              except ExtractorError as e:
                  if isinstance(e.cause, network_exceptions):
-                    if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429):
+                    if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429):
                          retry.error = e
                          continue
                  self._error_or_warning(e, fatal=fatal)
@@ -5305,7 +5453,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader': '3Blue1Brown',
              'tags': ['Mathematics'],
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'channel_is_verified': True,
          },
      }, {
          'note': 'playlists, singlepage',
@@ -5482,6 +5631,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader_id': '@3blue1brown',
              'uploader': '3Blue1Brown',
+            'channel_is_verified': True,
          },
      }, {
          'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@@ -5645,7 +5795,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
      }, {
          'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
          'info_dict': {
-            'id': 'AlTsmyW4auo',  # This will keep changing
+            'id': 'hGkQjiJLjWQ',  # This will keep changing
              'ext': 'mp4',
              'title': str,
              'upload_date': r're:\d{8}',
@@ -5669,6 +5819,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@SkyNews',
              'uploader_id': '@SkyNews',
              'uploader': 'Sky News',
+            'channel_is_verified': True,
          },
          'params': {
              'skip_download': True,
@@ -5836,7 +5987,25 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_id': '@colethedj1894',
              'uploader': 'colethedj',
          },
+        'playlist': [{
+            'info_dict': {
+                'title': 'youtube-dl test video "\'/\\ä↭𝕐',
+                'id': 'BaW_jenozKc',
+                '_type': 'url',
+                'ie_key': 'Youtube',
+                'duration': 10,
+                'channel_id': 'UCLqxVugv74EIW3VWh2NOa3Q',
+                'channel_url': 'https://www.youtube.com/channel/UCLqxVugv74EIW3VWh2NOa3Q',
+                'view_count': int,
+                'url': 'https://www.youtube.com/watch?v=BaW_jenozKc',
+                'channel': 'Philipp Hagemeister',
+                'uploader_id': '@PhilippHagemeister',
+                'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
+                'uploader': 'Philipp Hagemeister',
+            }
+        }],
          'playlist_count': 1,
+        'params': {'extract_flat': True},
      }, {
          'note': 'API Fallback: Recommended - redirects to home page. Requires visitorData',
          'url': 'https://www.youtube.com/feed/recommended',
@@ -6137,6 +6306,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                  'channel_url': str,
                  'concurrent_view_count': int,
                  'channel': str,
+                'uploader': str,
+                'uploader_url': str,
+                'uploader_id': str,
+                'channel_is_verified': bool,  # this will keep changing
              }
          }],
          'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -6172,6 +6345,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                  'uploader': 'PewDiePie',
                  'uploader_url': 'https://www.youtube.com/@PewDiePie',
                  'uploader_id': '@PewDiePie',
+                'channel_is_verified': True,
              }
          }],
          'params': {'extract_flat': True},
@@ -6190,6 +6364,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader_id': '@3blue1brown',
              'uploader': '3Blue1Brown',
+            'channel_is_verified': True,
          },
          'playlist_count': 0,
      }, {
@@ -6224,8 +6399,31 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'description': 'I make music',
              'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
              'channel_follower_count': int,
+            'channel_is_verified': True,
          },
          'playlist_mincount': 10,
+    }, {
+        # Playlist with only shorts, shown as reel renderers
+        # FIXME: future: YouTube currently doesn't give continuation for this,
+        # may do in future.
+        'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+        'info_dict': {
+            'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+            'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+            'view_count': int,
+            'uploader_id': '@BangyShorts',
+            'description': '',
+            'uploader_url': 'https://www.youtube.com/@BangyShorts',
+            'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+            'channel': 'Bangy Shorts',
+            'uploader': 'Bangy Shorts',
+            'tags': [],
+            'availability': 'public',
+            'modified_date': '20230626',
+            'title': 'Uploads from Bangy Shorts',
+        },
+        'playlist_mincount': 100,
+        'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }]
  
      @classmethod
@@ -6799,12 +6997,15 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
                  'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
                  'title': 'Kurzgesagt – In a Nutshell',
                  'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
-                'playlist_count': int,  # XXX: should have a way of saying > 1
+                # No longer available for search as it is set to the handle.
+                # 'playlist_count': int,
                  'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
                  'thumbnails': list,
                  'uploader_id': '@kurzgesagt',
                  'uploader_url': 'https://www.youtube.com/@kurzgesagt',
                  'uploader': 'Kurzgesagt – In a Nutshell',
+                'channel_is_verified': True,
+                'channel_follower_count': int,
              }
          }],
          'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -6947,22 +7148,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
      }]
  
  
-class YoutubeStoriesIE(InfoExtractor):
-    IE_DESC = 'YouTube channel stories; "ytstories:" prefix'
-    IE_NAME = 'youtube:stories'
-    _VALID_URL = r'ytstories:UC(?P<id>[A-Za-z0-9_-]{21}[AQgw])$'
-    _TESTS = [{
-        'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg',
-        'only_matching': True,
-    }]
-
-    def _real_extract(self, url):
-        playlist_id = f'RLTD{self._match_id(url)}'
-        return self.url_result(
-            smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
-            ie=YoutubeTabIE, video_id=playlist_id)
-
-
  class YoutubeShortsAudioPivotIE(InfoExtractor):
      IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)'
      IE_NAME = 'youtube:shorts:pivot:audio'
@@ -7068,6 +7253,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
              'live_status': 'not_live',
              'channel_follower_count': int,
              'chapters': 'count:20',
+            'comment_count': int,
+            'heatmap': 'count:100',
          }
      }]
  
@@ -7128,6 +7315,8 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
              'channel': 'さなちゃんねる',
              'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
              'uploader': 'さなちゃんねる',
+            'channel_is_verified': True,
+            'heatmap': 'count:100',
          },
          'add_ie': ['Youtube'],
          'params': {'skip_download': 'Youtube'},