[extractor/youtube] Ignore incomplete data for comment threads by default (#7475)

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 6e7485c03010a07e4421acf9f05792891900b652..2c64f8e84525160a592583a2df73de8f19d8f7f3 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -258,7 +258,7 @@ def build_innertube_clients():
      THIRD_PARTY = {
          'embedUrl': 'https://www.youtube.com/',  # Can be any valid URL
      }
-    BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb')
+    BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb')
      priority = qualities(BASE_CLIENTS[::-1])
  
      for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()):
@@ -292,6 +292,7 @@ class BadgeType(enum.Enum):
      AVAILABILITY_PREMIUM = enum.auto()
      AVAILABILITY_SUBSCRIPTION = enum.auto()
      LIVE_NOW = enum.auto()
+    VERIFIED = enum.auto()
  
  
  class YoutubeBaseInfoExtractor(InfoExtractor):
@@ -791,17 +792,26 @@ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False):
      def _extract_and_report_alerts(self, data, *args, **kwargs):
          return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
  
-    def _extract_badges(self, renderer: dict):
-        privacy_icon_map = {
+    def _extract_badges(self, badge_list: list):
+        """
+        Extract known BadgeType's from a list of badge renderers.
+        @returns [{'type': BadgeType}]
+        """
+        icon_type_map = {
              'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
              'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
-            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC,
+            'CHECK_CIRCLE_THICK': BadgeType.VERIFIED,
+            'OFFICIAL_ARTIST_BADGE': BadgeType.VERIFIED,
+            'CHECK': BadgeType.VERIFIED,
          }
  
          badge_style_map = {
              'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
              'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
-            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW,
+            'BADGE_STYLE_TYPE_VERIFIED': BadgeType.VERIFIED,
+            'BADGE_STYLE_TYPE_VERIFIED_ARTIST': BadgeType.VERIFIED,
          }
  
          label_map = {
@@ -809,13 +819,15 @@ def _extract_badges(self, renderer: dict):
              'private': BadgeType.AVAILABILITY_PRIVATE,
              'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
              'live': BadgeType.LIVE_NOW,
-            'premium': BadgeType.AVAILABILITY_PREMIUM
+            'premium': BadgeType.AVAILABILITY_PREMIUM,
+            'verified': BadgeType.VERIFIED,
+            'official artist channel': BadgeType.VERIFIED,
          }
  
          badges = []
-        for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer')):
+        for badge in traverse_obj(badge_list, (..., lambda key, _: re.search(r'[bB]adgeRenderer$', key))):
              badge_type = (
-                privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+                icon_type_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
                  or badge_style_map.get(traverse_obj(badge, 'style'))
              )
              if badge_type:
@@ -823,11 +835,12 @@ def _extract_badges(self, renderer: dict):
                  continue
  
              # fallback, won't work in some languages
-            label = traverse_obj(badge, 'label', expected_type=str, default='')
+            label = traverse_obj(
+                badge, 'label', ('accessibilityData', 'label'), 'tooltip', 'iconTooltip', get_all=False, expected_type=str, default='')
              for match, label_badge_type in label_map.items():
                  if match in label.lower():
-                    badges.append({'type': badge_type})
-                    continue
+                    badges.append({'type': label_badge_type})
+                    break
  
          return badges
  
@@ -1020,8 +1033,8 @@ def _extract_video(self, renderer):
          overlay_style = traverse_obj(
              renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
              get_all=False, expected_type=str)
-        badges = self._extract_badges(renderer)
-
+        badges = self._extract_badges(traverse_obj(renderer, 'badges'))
+        owner_badges = self._extract_badges(traverse_obj(renderer, 'ownerBadges'))
          navigation_url = urljoin('https://www.youtube.com/', traverse_obj(
              renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
              expected_type=str)) or ''
@@ -1079,7 +1092,8 @@ def _extract_video(self, renderer):
                      needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
                      is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None),
              view_count_field: view_count,
-            'live_status': live_status
+            'live_status': live_status,
+            'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None
          }
  
  
@@ -1332,6 +1346,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Philipp Hagemeister',
                  'uploader_url': 'https://www.youtube.com/@PhilippHagemeister',
                  'uploader_id': '@PhilippHagemeister',
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -1415,6 +1430,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'The Witcher',
                  'uploader_url': 'https://www.youtube.com/@thewitcher',
                  'uploader_id': '@thewitcher',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
          },
          {
@@ -1444,6 +1462,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader_url': 'https://www.youtube.com/@FlyingKitty900',
                  'uploader_id': '@FlyingKitty900',
                  'comment_count': int,
+                'channel_is_verified': True,
              },
          },
          {
@@ -1577,6 +1596,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Olympics',
                  'uploader_url': 'https://www.youtube.com/@Olympics',
                  'uploader_id': '@Olympics',
+                'channel_is_verified': True,
              },
              'params': {
                  'skip_download': 'requires avconv',
@@ -1894,6 +1914,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Bernie Sanders',
                  'uploader_url': 'https://www.youtube.com/@BernieSanders',
                  'uploader_id': '@BernieSanders',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -1955,6 +1977,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Vsauce',
                  'uploader_url': 'https://www.youtube.com/@Vsauce',
                  'uploader_id': '@Vsauce',
+                'comment_count': int,
+                'channel_is_verified': True,
              },
              'params': {
                  'skip_download': True,
@@ -2147,6 +2171,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'kudvenkat',
                  'uploader_url': 'https://www.youtube.com/@Csharp-video-tutorialsBlogspot',
                  'uploader_id': '@Csharp-video-tutorialsBlogspot',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -2227,6 +2253,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'CBS Mornings',
                  'uploader_url': 'https://www.youtube.com/@CBSMornings',
                  'uploader_id': '@CBSMornings',
+                'comment_count': int,
+                'channel_is_verified': True,
              }
          },
          {
@@ -2297,6 +2325,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'colinfurze',
                  'uploader_url': 'https://www.youtube.com/@colinfurze',
                  'uploader_id': '@colinfurze',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'format': '17',  # 3gp format available on android
@@ -2342,6 +2373,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'SciShow',
                  'uploader_url': 'https://www.youtube.com/@SciShow',
                  'uploader_id': '@SciShow',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              }, 'params': {'format': 'mhtml', 'skip_download': True}
          }, {
              # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939)
@@ -2370,6 +2404,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Leon Nguyen',
                  'uploader_url': 'https://www.youtube.com/@LeonNguyen',
                  'uploader_id': '@LeonNguyen',
+                'heatmap': 'count:100',
              }
          }, {
              # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date
@@ -2398,6 +2433,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Leon Nguyen',
                  'uploader_url': 'https://www.youtube.com/@LeonNguyen',
                  'uploader_id': '@LeonNguyen',
+                'heatmap': 'count:100',
              },
              'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']}
          }, {
@@ -2428,6 +2464,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Quackity',
                  'uploader_id': '@Quackity',
                  'uploader_url': 'https://www.youtube.com/@Quackity',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              }
          },
          {   # continuous livestream. Microformat upload date should be preferred.
@@ -2594,6 +2633,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'MrBeast',
                  'uploader_url': 'https://www.youtube.com/@MrBeast',
                  'uploader_id': '@MrBeast',
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {'extractor_args': {'youtube': {'player_client': ['ios']}}, 'format': '233-1'},
          }, {
@@ -2655,6 +2697,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'さなちゃんねる',
                  'uploader_url': 'https://www.youtube.com/@sana_natori',
                  'uploader_id': '@sana_natori',
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
          },
          {
@@ -2684,6 +2728,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'thumbnail': r're:^https?://.*\.webp',
                  'channel_url': 'https://www.youtube.com/channel/UCxzC4EngIsMrPmbm6Nxvb-A',
                  'playable_in_embed': True,
+                'comment_count': int,
+                'channel_is_verified': True,
+                'heatmap': 'count:100',
              },
              'params': {
                  'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}},
@@ -2720,6 +2767,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'uploader': 'Christopher Sykes',
                  'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries',
                  'uploader_id': '@ChristopherSykesDocumentaries',
+                'heatmap': 'count:100',
              },
              'params': {
                  'skip_download': True,
@@ -3092,7 +3140,7 @@ def _extract_n_function_name(self, jscode):
              return funcname
  
          return json.loads(js_to_json(self._search_regex(
-            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+            rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])[,;]', jscode,
              f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
  
      def _extract_n_function_code(self, video_id, player_url):
@@ -3308,14 +3356,13 @@ def _extract_comment(self, comment_renderer, parent=None):
              info['author_is_uploader'] = author_is_uploader
  
          comment_abr = traverse_obj(
-            comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict)
+            comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict)
          if comment_abr is not None:
              info['is_favorited'] = 'creatorHeart' in comment_abr
  
-        comment_ab_icontype = traverse_obj(
-            comment_renderer, ('authorCommentBadge', 'authorCommentBadgeRenderer', 'icon', 'iconType'))
-        if comment_ab_icontype is not None:
-            info['author_is_verified'] = comment_ab_icontype in ('CHECK_CIRCLE_THICK', 'OFFICIAL_ARTIST_BADGE')
+        badges = self._extract_badges([traverse_obj(comment_renderer, 'authorCommentBadge')])
+        if self._has_badge(badges, BadgeType.VERIFIED):
+            info['author_is_verified'] = True
  
          is_pinned = traverse_obj(comment_renderer, 'pinnedCommentBadge')
          if is_pinned:
@@ -3379,7 +3426,9 @@ def extract_thread(contents):
                          # Pinned comments may appear a second time in newest first sort
                          # See: https://github.com/yt-dlp/yt-dlp/issues/6712
                          continue
-                    self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.')
+                    self.report_warning(
+                        'Detected YouTube comments looping. Stopping comment extraction '
+                        f'{"for this thread" if parent else ""} as we probably cannot get any more.')
                      yield
                  else:
                      tracker['seen_comment_ids'].add(comment['id'])
@@ -3470,12 +3519,18 @@ def extract_thread(contents):
                  # Ignore incomplete data error for replies if retries didn't work.
                  # This is to allow any other parent comments and comment threads to be downloaded.
                  # See: https://github.com/yt-dlp/yt-dlp/issues/4669
-                if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True:
-                    self.report_warning(
-                        'Received incomplete data for a comment reply thread and retrying did not help. '
-                        'Ignoring to let other comments be downloaded.')
-                else:
-                    raise
+                if 'incomplete data' in str(e).lower() and parent:
+                    if self.get_param('ignoreerrors') in (True, 'only_download'):
+                        self.report_warning(
+                            'Received incomplete data for a comment reply thread and retrying did not help. '
+                            'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.')
+                        return
+                    else:
+                        raise ExtractorError(
+                            'Incomplete data received for comment reply thread. '
+                            'Pass --ignore-errors to ignore and allow rest of comments to download.',
+                            expected=True)
+                raise
              is_forced_continuation = False
              continuation = None
              for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]):
@@ -3552,7 +3607,7 @@ def _is_agegated(player_response):
      def _is_unplayable(player_response):
          return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
  
-    _STORY_PLAYER_PARAMS = '8AEB'
+    _PLAYER_PARAMS = 'CgIQBg=='
  
      def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
  
@@ -3566,7 +3621,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
              'videoId': video_id,
          }
          if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
-            yt_query['params'] = self._STORY_PLAYER_PARAMS
+            yt_query['params'] = self._PLAYER_PARAMS
  
          yt_query.update(self._generate_player_context(sts))
          return self._extract_response(
@@ -3578,7 +3633,7 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg,
  
      def _get_requested_clients(self, url, smuggled_data):
          requested_clients = []
-        default = ['android', 'web']
+        default = ['ios', 'android', 'web']
          allowed_clients = sorted(
              (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'),
              key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True)
@@ -3690,7 +3745,7 @@ def append_client(*client_names):
  
      def _needs_live_processing(self, live_status, duration):
          if (live_status == 'is_live' and self.get_param('live_from_start')
-                or live_status == 'post_live' and (duration or 0) > 4 * 3600):
+                or live_status == 'post_live' and (duration or 0) > 2 * 3600):
              return live_status
  
      def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration):
@@ -3705,7 +3760,12 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l
              'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres'
          ])
          streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...))
-        all_formats = self._configuration_arg('include_duplicate_formats')
+        format_types = self._configuration_arg('formats')
+        all_formats = 'duplicate' in format_types
+        if self._configuration_arg('include_duplicate_formats'):
+            all_formats = True
+            self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. '
+                                                'Use formats=duplicate extractor argument instead')
  
          def build_fragments(f):
              return LazyList({
@@ -3801,6 +3861,8 @@ def build_fragments(f):
                      f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True)
  
              client_name = fmt.get(STREAMING_DATA_CLIENT_NAME)
+            name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or ''
+            fps = int_or_none(fmt.get('fps')) or 0
              dct = {
                  'asr': int_or_none(fmt.get('audioSampleRate')),
                  'filesize': int_or_none(fmt.get('contentLength')),
@@ -3808,16 +3870,16 @@ def build_fragments(f):
                  'format_note': join_nonempty(
                      join_nonempty(audio_track.get('displayName'),
                                    language_preference > 0 and ' (default)', delim=''),
-                    fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
-                    fmt.get('isDrc') and 'DRC',
+                    name, fmt.get('isDrc') and 'DRC',
                      try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
                      try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
                      throttled and 'THROTTLED', is_damaged and 'DAMAGED',
                      (self.get_param('verbose') or all_formats) and client_name,
                      delim=', '),
                  # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
-                'source_preference': -10 if throttled else -5 if itag == '22' else -1,
-                'fps': int_or_none(fmt.get('fps')) or None,
+                'source_preference': ((-10 if throttled else -5 if itag == '22' else -1)
+                                      + (100 if 'Premium' in name else 0)),
+                'fps': fps if fps > 1 else None,  # For some formats, fps is wrongly returned as 1
                  'audio_channels': fmt.get('audioChannels'),
                  'height': height,
                  'quality': q(quality) - bool(fmt.get('isDrc')) / 2,
@@ -3843,18 +3905,23 @@ def build_fragments(f):
              if single_stream and dct.get('ext'):
                  dct['container'] = dct['ext'] + '_dash'
  
-            if all_formats and dct['filesize']:
+            if (all_formats or 'dashy' in format_types) and dct['filesize']:
                  yield {
                      **dct,
                      'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'],
                      'protocol': 'http_dash_segments',
                      'fragments': build_fragments(dct),
                  }
-            dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
-            yield dct
+            if all_formats or 'dashy' not in format_types:
+                dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE}
+                yield dct
  
          needs_live_processing = self._needs_live_processing(live_status, duration)
-        skip_bad_formats = not self._configuration_arg('include_incomplete_formats')
+        skip_bad_formats = 'incomplete' not in format_types
+        if self._configuration_arg('include_incomplete_formats'):
+            skip_bad_formats = False
+            self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. '
+                                                'Use formats=incomplete extractor argument instead')
  
          skip_manifests = set(self._configuration_arg('skip'))
          if (not self.get_param('youtube_include_hls_manifest', True)
@@ -3866,7 +3933,7 @@ def build_fragments(f):
              skip_manifests.add('dash')
          if self._configuration_arg('include_live_dash'):
              self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. '
-                                                'Use include_incomplete_formats extractor argument instead')
+                                                'Use formats=incomplete extractor argument instead')
          elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live':
              skip_manifests.add('dash')
  
@@ -3883,11 +3950,17 @@ def process_manifest_format(f, proto, client_name, itag):
              elif itag:
                  f['format_id'] = itag
  
+            if itag in ('616', '235'):
+                f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ')
+                f['source_preference'] = (f.get('source_preference') or -1) + 100
+
              f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
              if f['quality'] == -1 and f.get('height'):
                  f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
-            if self.get_param('verbose'):
+            if self.get_param('verbose') or all_formats:
                  f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ')
+            if f.get('fps') and f['fps'] <= 1:
+                del f['fps']
              return True
  
          subtitles = {}
@@ -3960,8 +4033,8 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
          webpage = None
          if 'webpage' not in self._configuration_arg('player_skip'):
              query = {'bpctr': '9999999999', 'has_verified': '1'}
-            if smuggled_data.get('is_story'):
-                query['pp'] = self._STORY_PLAYER_PARAMS
+            if smuggled_data.get('is_story'):  # XXX: Deprecated
+                query['pp'] = self._PLAYER_PARAMS
              webpage = self._download_webpage(
                  webpage_url, video_id, fatal=False, query=query)
  
@@ -4173,7 +4246,7 @@ def is_bad_format(fmt):
  
          for fmt in filter(is_bad_format, formats):
              fmt['preference'] = (fmt.get('preference') or -1) - 10
-            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ')
+            fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ')
  
          if needs_live_processing:
              self._prepare_live_from_start_formats(
@@ -4265,9 +4338,13 @@ def process_language(container, base_url, lang_code, sub_name, query):
                              continue
                          trans_code += f'-{lang_code}'
                          trans_name += format_field(lang_name, None, ' from %s')
-                    # Add an "-orig" label to the original language so that it can be distinguished.
-                    # The subs are returned without "-orig" as well for compatibility
                      if lang_code == f'a-{orig_trans_code}':
+                        # Set audio language based on original subtitles
+                        for f in formats:
+                            if f.get('acodec') != 'none' and not f.get('language'):
+                                f['language'] = orig_trans_code
+                        # Add an "-orig" label to the original language so that it can be distinguished.
+                        # The subs are returned without "-orig" as well for compatibility
                          process_language(
                              automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {})
                      # Setting tlang=lang returns damaged subtitles.
@@ -4287,15 +4364,21 @@ def process_language(container, base_url, lang_code, sub_name, query):
                          info[d_k] = parse_duration(query[k][0])
  
          # Youtube Music Auto-generated description
-        if video_description:
+        if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'):
+            # XXX: Causes catastrophic backtracking if description has "·"
+            # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI
+            # Simulating atomic groups:  (?P<a>[^xy]+)x  =>  (?=(?P<a>[^xy]+))(?P=a)x
+            # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2
              mobj = re.search(
                  r'''(?xs)
-                    (?P<track>[^·\n]+)·(?P<artist>[^\n]+)\n+
-                    (?P<album>[^\n]+)
+                    (?=(?P<track>[^\n·]+))(?P=track)·
+                    (?=(?P<artist>[^\n]+))(?P=artist)\n+
+                    (?=(?P<album>[^\n]+))(?P=album)\n
                      (?:.+?℗\s*(?P<release_year>\d{4})(?!\d))?
                      (?:.+?Released on\s*:\s*(?P<release_date>\d{4}-\d{2}-\d{2}))?
-                    (.+?\nArtist\s*:\s*(?P<clean_artist>[^\n]+))?
-                    .+\nAuto-generated\ by\ YouTube\.\s*$
+                    (.+?\nArtist\s*:\s*
+                        (?=(?P<clean_artist>[^\n]+))(?P=clean_artist)\n
+                    )?.+\nAuto-generated\ by\ YouTube\.\s*$
                  ''', video_description)
              if mobj:
                  release_year = mobj.group('release_year')
@@ -4456,6 +4539,9 @@ def process_language(container, base_url, lang_code, sub_name, query):
                          info['artist'] = mrr_contents_text
                      elif mrr_title == 'Song':
                          info['track'] = mrr_contents_text
+            owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges')))
+            if self._has_badge(owner_badges, BadgeType.VERIFIED):
+                info['channel_is_verified'] = True
  
          info.update({
              'uploader': info.get('channel'),
@@ -4473,7 +4559,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
              and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
          ):
              upload_date = strftime_or_none(
-                self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
+                self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date
          info['upload_date'] = upload_date
  
          for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]:
@@ -4481,7 +4567,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
              if v:
                  info[d_k] = v
  
-        badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+        badges = self._extract_badges(traverse_obj(vpir, 'badges'))
  
          is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
                        or get_first(video_details, 'isPrivate', expected_type=bool))
@@ -4554,13 +4640,14 @@ def _extract_channel_renderer(self, renderer):
          channel_id = self.ucid_or_none(renderer['channelId'])
          title = self._get_text(renderer, 'title')
          channel_url = format_field(channel_id, None, 'https://www.youtube.com/channel/%s', default=None)
-        # As of 2023-03-01 YouTube doesn't use the channel handles on these renderers yet.
-        # However we can expect them to change that in the future.
          channel_handle = self.handle_from_url(
              traverse_obj(renderer, (
                  'navigationEndpoint', (('commandMetadata', 'webCommandMetadata', 'url'),
                                         ('browseEndpoint', 'canonicalBaseUrl')),
                  {str}), get_all=False))
+        if not channel_handle:
+            # As of 2023-06-01, YouTube sets subscriberCountText to the handle in search
+            channel_handle = self.handle_or_none(self._get_text(renderer, 'subscriberCountText'))
          return {
              '_type': 'url',
              'url': channel_url,
@@ -4573,10 +4660,18 @@ def _extract_channel_renderer(self, renderer):
              'title': title,
              'uploader_id': channel_handle,
              'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
-            'channel_follower_count': self._get_count(renderer, 'subscriberCountText'),
+            # See above. YouTube sets videoCountText to the subscriber text in search channel renderers.
+            # However, in feed/channels this is set correctly to the subscriber count
+            'channel_follower_count': traverse_obj(
+                renderer, 'subscriberCountText', 'videoCountText', expected_type=self._get_count),
              'thumbnails': self._extract_thumbnails(renderer, 'thumbnail'),
-            'playlist_count': self._get_count(renderer, 'videoCountText'),
+            'playlist_count': (
+                # videoCountText may be the subscriber count
+                self._get_count(renderer, 'videoCountText')
+                if self._get_count(renderer, 'subscriberCountText') is not None else None),
              'description': self._get_text(renderer, 'descriptionSnippet'),
+            'channel_is_verified': True if self._has_badge(
+                self._extract_badges(traverse_obj(renderer, 'ownerBadges')), BadgeType.VERIFIED) else None,
          }
  
      def _grid_entries(self, grid_renderer):
@@ -4811,7 +4906,8 @@ def _extract_entries(self, parent_renderer, continuation_list):
                      'videoRenderer': lambda x: [self._video_entry(x)],
                      'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}),
                      'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}),
-                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)]
+                    'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)],
+                    'richGridRenderer': lambda x: self._extract_entries(x, continuation_list),
                  }
                  for key, renderer in isr_content.items():
                      if key not in known_renderers:
@@ -4992,6 +5088,10 @@ def _get_uncropped(url):
                  'uploader_id': channel_handle,
                  'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None),
              })
+
+        channel_badges = self._extract_badges(traverse_obj(data, ('header', ..., 'badges'), get_all=False))
+        if self._has_badge(channel_badges, BadgeType.VERIFIED):
+            info['channel_is_verified'] = True
          # Playlist stats is a text runs array containing [video count, view count, last updated].
          # last updated or (view count and last updated) may be missing.
          playlist_stats = get_first(
@@ -5000,7 +5100,7 @@ def _get_uncropped(url):
          last_updated_unix = self._parse_time_text(
              self._get_text(playlist_stats, 2)  # deprecated, remove when old layout discontinued
              or self._get_text(playlist_header_renderer, ('byline', 1, 'playlistBylineRenderer', 'text')))
-        info['modified_date'] = strftime_or_none(last_updated_unix, '%Y%m%d')
+        info['modified_date'] = strftime_or_none(last_updated_unix)
  
          info['view_count'] = self._get_count(playlist_stats, 1)
          if info['view_count'] is None:  # 0 is allowed
@@ -5100,7 +5200,7 @@ def _extract_availability(self, data):
          playlist_header_renderer = traverse_obj(data, ('header', 'playlistHeaderRenderer')) or {}
          player_header_privacy = playlist_header_renderer.get('privacy')
  
-        badges = self._extract_badges(sidebar_renderer)
+        badges = self._extract_badges(traverse_obj(sidebar_renderer, 'badges'))
  
          # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
          privacy_setting_icon = get_first(
@@ -5350,7 +5450,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader': '3Blue1Brown',
              'tags': ['Mathematics'],
-            'channel_follower_count': int
+            'channel_follower_count': int,
+            'channel_is_verified': True,
          },
      }, {
          'note': 'playlists, singlepage',
@@ -5527,6 +5628,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader_id': '@3blue1brown',
              'uploader': '3Blue1Brown',
+            'channel_is_verified': True,
          },
      }, {
          'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
@@ -5690,7 +5792,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
      }, {
          'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live',
          'info_dict': {
-            'id': 'AlTsmyW4auo',  # This will keep changing
+            'id': 'hGkQjiJLjWQ',  # This will keep changing
              'ext': 'mp4',
              'title': str,
              'upload_date': r're:\d{8}',
@@ -5714,6 +5816,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@SkyNews',
              'uploader_id': '@SkyNews',
              'uploader': 'Sky News',
+            'channel_is_verified': True,
          },
          'params': {
              'skip_download': True,
@@ -6202,7 +6305,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                  'channel': str,
                  'uploader': str,
                  'uploader_url': str,
-                'uploader_id': str
+                'uploader_id': str,
+                'channel_is_verified': bool,  # this will keep changing
              }
          }],
          'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -6238,6 +6342,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
                  'uploader': 'PewDiePie',
                  'uploader_url': 'https://www.youtube.com/@PewDiePie',
                  'uploader_id': '@PewDiePie',
+                'channel_is_verified': True,
              }
          }],
          'params': {'extract_flat': True},
@@ -6256,6 +6361,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/@3blue1brown',
              'uploader_id': '@3blue1brown',
              'uploader': '3Blue1Brown',
+            'channel_is_verified': True,
          },
          'playlist_count': 0,
      }, {
@@ -6290,8 +6396,31 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'description': 'I make music',
              'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A',
              'channel_follower_count': int,
+            'channel_is_verified': True,
          },
          'playlist_mincount': 10,
+    }, {
+        # Playlist with only shorts, shown as reel renderers
+        # FIXME: future: YouTube currently doesn't give continuation for this,
+        # may do in future.
+        'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg',
+        'info_dict': {
+            'id': 'UUxqPAgubo4coVn9Lx1FuKcg',
+            'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg',
+            'view_count': int,
+            'uploader_id': '@BangyShorts',
+            'description': '',
+            'uploader_url': 'https://www.youtube.com/@BangyShorts',
+            'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg',
+            'channel': 'Bangy Shorts',
+            'uploader': 'Bangy Shorts',
+            'tags': [],
+            'availability': 'public',
+            'modified_date': '20230626',
+            'title': 'Uploads from Bangy Shorts',
+        },
+        'playlist_mincount': 100,
+        'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }]
  
      @classmethod
@@ -6865,12 +6994,15 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
                  'description': 'md5:4ae48dfa9505ffc307dad26342d06bfc',
                  'title': 'Kurzgesagt – In a Nutshell',
                  'channel_id': 'UCsXVk37bltHxD1rDPwtNM8Q',
-                'playlist_count': int,  # XXX: should have a way of saying > 1
+                # No longer available for search as it is set to the handle.
+                # 'playlist_count': int,
                  'channel_url': 'https://www.youtube.com/channel/UCsXVk37bltHxD1rDPwtNM8Q',
                  'thumbnails': list,
                  'uploader_id': '@kurzgesagt',
                  'uploader_url': 'https://www.youtube.com/@kurzgesagt',
                  'uploader': 'Kurzgesagt – In a Nutshell',
+                'channel_is_verified': True,
+                'channel_follower_count': int,
              }
          }],
          'params': {'extract_flat': True, 'playlist_items': '1'},
@@ -7134,6 +7266,8 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor):
              'live_status': 'not_live',
              'channel_follower_count': int,
              'chapters': 'count:20',
+            'comment_count': int,
+            'heatmap': 'count:100',
          }
      }]
  
@@ -7194,6 +7328,8 @@ class YoutubeConsentRedirectIE(YoutubeBaseInfoExtractor):
              'channel': 'さなちゃんねる',
              'description': 'md5:6aebf95cc4a1d731aebc01ad6cc9806d',
              'uploader': 'さなちゃんねる',
+            'channel_is_verified': True,
+            'heatmap': 'count:100',
          },
          'add_ie': ['Youtube'],
          'params': {'skip_download': 'Youtube'},