[extractor/youtube] Support changing extraction language (#4470)

author coletdjnz <redacted>

Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)

committer GitHub <redacted>

Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)
author coletdjnz <redacted>
Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)
committer GitHub <redacted>
Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)
diff --git a/README.md b/README.md

index 77e597ba0141accea2c083f82dd44da71e49a2d8..62c83e721e447e3ee9e8641195de82258d3f677b 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1705,6 +1705,8 @@ #### youtube
      * E.g. `all,all,1000,10` will get a maximum of 1000 replies total, with up to 10 replies per thread. `1000,all,100` will get a maximum of 1000 comments, with a maximum of 100 replies total
  * `innertube_host`: Innertube API host to use for all API requests; e.g. `studio.youtube.com`, `youtubei.googleapis.com`. Note that cookies exported from one subdomain will not work on others
  * `innertube_key`: Innertube API key to use for all API requests
+* `lang`: Supported content language code to prefer translated metadata of this language (case-sensitive). By default, video primary language metadata is preferred, with a fallback to `en` translated.
+  * See youtube.py for list of supported content language codes.
  
  #### youtubetab (YouTube playlists, channels, feeds, etc.)
  * `skip`: One or more of `webpage` (skip initial webpage download), `authcheck` (allow the download of playlists requiring authentication when no initial webpage is downloaded. This may cause unwanted behavior, see [#1122](https://github.com/yt-dlp/yt-dlp/pull/1122) for more details)
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index 6c4e995b8c63bf26544491b23d9d3f1efce77f00..ac1a5f210907b425e72c9e00c0a533234269905a 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -2,6 +2,7 @@
  import calendar
  import copy
  import datetime
+import enum
  import hashlib
  import itertools
  import json
@@ -275,6 +276,15 @@ def build_innertube_clients():
  build_innertube_clients()
  
  
+class BadgeType(enum.Enum):
+    AVAILABILITY_UNLISTED = enum.auto()
+    AVAILABILITY_PRIVATE = enum.auto()
+    AVAILABILITY_PUBLIC = enum.auto()
+    AVAILABILITY_PREMIUM = enum.auto()
+    AVAILABILITY_SUBSCRIPTION = enum.auto()
+    LIVE_NOW = enum.auto()
+
+
  class YoutubeBaseInfoExtractor(InfoExtractor):
      """Provide base functions for Youtube extractors"""
  
@@ -367,6 +377,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
          r'(?:www\.)?piped\.privacy\.com\.de',
      )
  
+    # extracted from account/account_menu ep
+    # XXX: These are the supported YouTube UI and API languages,
+    # which is slightly different from languages supported for translation in YouTube studio
+    _SUPPORTED_LANG_CODES = [
+        'af', 'az', 'id', 'ms', 'bs', 'ca', 'cs', 'da', 'de', 'et', 'en-IN', 'en-GB', 'en', 'es',
+        'es-419', 'es-US', 'eu', 'fil', 'fr', 'fr-CA', 'gl', 'hr', 'zu', 'is', 'it', 'sw', 'lv',
+        'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi',
+        'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw',
+        'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml',
+        'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko'
+    ]
+
+    @functools.cached_property
+    def _preferred_lang(self):
+        """
+        Returns a language code supported by YouTube for the user preferred language.
+        Returns None if no preferred language set.
+        """
+        preferred_lang = self._configuration_arg('lang', ie_key='Youtube', casesense=True, default=[''])[0]
+        if not preferred_lang:
+            return
+        if preferred_lang not in self._SUPPORTED_LANG_CODES:
+            raise ExtractorError(
+                f'Unsupported language code: {preferred_lang}. Supported language codes (case-sensitive): {join_nonempty(*self._SUPPORTED_LANG_CODES, delim=", ")}.',
+                expected=True)
+        elif preferred_lang != 'en':
+            self.report_warning(
+                f'Preferring "{preferred_lang}" translated fields. Note that some metadata extraction may fail or be incorrect.')
+        return preferred_lang
+
      def _initialize_consent(self):
          cookies = self._get_cookies('https://www.youtube.com/')
          if cookies.get('__Secure-3PSID'):
@@ -391,7 +431,7 @@ def _initialize_pref(self):
                  pref = dict(urllib.parse.parse_qsl(pref_cookie.value))
              except ValueError:
                  self.report_warning('Failed to parse user PREF cookie' + bug_reports_message())
-        pref.update({'hl': 'en', 'tz': 'UTC'})
+        pref.update({'hl': self._preferred_lang or 'en', 'tz': 'UTC'})
          self._set_cookie('.youtube.com', name='PREF', value=urllib.parse.urlencode(pref))
  
      def _real_initialize(self):
@@ -439,7 +479,7 @@ def _extract_context(self, ytcfg=None, default_client='web'):
              (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict)
          # Enforce language and tz for extraction
          client_context = traverse_obj(context, 'client', expected_type=dict, default={})
-        client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
+        client_context.update({'hl': self._preferred_lang or 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0})
          return context
  
      _SAPISID = None
@@ -678,13 +718,49 @@ def _extract_and_report_alerts(self, data, *args, **kwargs):
          return self._report_alerts(self._extract_alerts(data), *args, **kwargs)
  
      def _extract_badges(self, renderer: dict):
-        badges = set()
-        for badge in try_get(renderer, lambda x: x['badges'], list) or []:
-            label = try_get(badge, lambda x: x['metadataBadgeRenderer']['label'], str)
-            if label:
-                badges.add(label.lower())
+        privacy_icon_map = {
+            'PRIVACY_UNLISTED': BadgeType.AVAILABILITY_UNLISTED,
+            'PRIVACY_PRIVATE': BadgeType.AVAILABILITY_PRIVATE,
+            'PRIVACY_PUBLIC': BadgeType.AVAILABILITY_PUBLIC
+        }
+
+        badge_style_map = {
+            'BADGE_STYLE_TYPE_MEMBERS_ONLY': BadgeType.AVAILABILITY_SUBSCRIPTION,
+            'BADGE_STYLE_TYPE_PREMIUM': BadgeType.AVAILABILITY_PREMIUM,
+            'BADGE_STYLE_TYPE_LIVE_NOW': BadgeType.LIVE_NOW
+        }
+
+        label_map = {
+            'unlisted': BadgeType.AVAILABILITY_UNLISTED,
+            'private': BadgeType.AVAILABILITY_PRIVATE,
+            'members only': BadgeType.AVAILABILITY_SUBSCRIPTION,
+            'live': BadgeType.LIVE_NOW,
+            'premium': BadgeType.AVAILABILITY_PREMIUM
+        }
+
+        badges = []
+        for badge in traverse_obj(renderer, ('badges', ..., 'metadataBadgeRenderer'), default=[]):
+            badge_type = (
+                privacy_icon_map.get(traverse_obj(badge, ('icon', 'iconType'), expected_type=str))
+                or badge_style_map.get(traverse_obj(badge, 'style'))
+            )
+            if badge_type:
+                badges.append({'type': badge_type})
+                continue
+
+            # fallback, won't work in some languages
+            label = traverse_obj(badge, 'label', expected_type=str, default='')
+            for match, label_badge_type in label_map.items():
+                if match in label.lower():
+                    badges.append({'type': badge_type})
+                    continue
+
          return badges
  
+    @staticmethod
+    def _has_badge(badges, badge_type):
+        return bool(traverse_obj(badges, lambda _, v: v['type'] == badge_type))
+
      @staticmethod
      def _get_text(data, *path_list, max_runs=None):
          for path in path_list or [None]:
@@ -755,9 +831,9 @@ def extract_relative_time(relative_time_text):
              except ValueError:
                  return None
  
-    def _extract_time_text(self, renderer, *path_list):
-        """@returns (timestamp, time_text)"""
-        text = self._get_text(renderer, *path_list) or ''
+    def _parse_time_text(self, text):
+        if not text:
+            return
          dt = self.extract_relative_time(text)
          timestamp = None
          if isinstance(dt, datetime.datetime):
@@ -770,9 +846,10 @@ def _extract_time_text(self, renderer, *path_list):
                          (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'),
                          text.lower(), 'time text', default=None)))
  
-        if text and timestamp is None:
-            self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True)
-        return timestamp, text
+        if text and timestamp is None and self._preferred_lang in (None, 'en'):
+            self.report_warning(
+                f'Cannot parse localized time text "{text}"', only_once=True)
+        return timestamp
  
      def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None,
                            ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None,
@@ -848,7 +925,7 @@ def _extract_video(self, renderer):
          channel_id = traverse_obj(
              renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'),
              expected_type=str, get_all=False)
-        timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText')
+        time_text = self._get_text(renderer, 'publishedTimeText') or ''
          scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False))
          overlay_style = traverse_obj(
              renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'),
@@ -874,15 +951,21 @@ def _extract_video(self, renderer):
              'uploader': uploader,
              'channel_id': channel_id,
              'thumbnails': thumbnails,
-            'upload_date': (strftime_or_none(timestamp, '%Y%m%d')
+            'upload_date': (strftime_or_none(self._parse_time_text(time_text), '%Y%m%d')
                              if self._configuration_arg('approximate_date', ie_key='youtubetab')
                              else None),
              'live_status': ('is_upcoming' if scheduled_timestamp is not None
                              else 'was_live' if 'streamed' in time_text.lower()
-                            else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
+                            else 'is_live' if overlay_style == 'LIVE' or self._has_badge(badges, BadgeType.LIVE_NOW)
                              else None),
              'release_timestamp': scheduled_timestamp,
-            'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
+            'availability':
+                'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+                else self._availability(
+                    is_private=self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE) or None,
+                    needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+                    needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+                    is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None)
          }
  
  
@@ -2306,6 +2389,61 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                  'tags': [],
                  'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
              }
+        }, {
+            # Prefer primary title+description language metadata by default
+            # Do not prefer translated description if primary is empty
+            'url': 'https://www.youtube.com/watch?v=el3E4MbxRqQ',
+            'info_dict': {
+                'id': 'el3E4MbxRqQ',
+                'ext': 'mp4',
+                'title': 'dlp test video 2 - primary sv no desc',
+                'description': '',
+                'channel': 'cole-dlp-test-acc',
+                'tags': [],
+                'view_count': int,
+                'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'like_count': int,
+                'playable_in_embed': True,
+                'availability': 'unlisted',
+                'thumbnail': 'https://i.ytimg.com/vi_webp/el3E4MbxRqQ/maxresdefault.webp',
+                'age_limit': 0,
+                'duration': 5,
+                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'live_status': 'not_live',
+                'upload_date': '20220908',
+                'categories': ['People & Blogs'],
+                'uploader': 'cole-dlp-test-acc',
+                'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            },
+            'params': {'skip_download': True}
+        }, {
+            # Extractor argument: prefer translated title+description
+            'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng',
+            'info_dict': {
+                'id': 'gHKT4uU8Zng',
+                'ext': 'mp4',
+                'channel': 'cole-dlp-test-acc',
+                'tags': [],
+                'duration': 5,
+                'live_status': 'not_live',
+                'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'upload_date': '20220728',
+                'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+                'view_count': int,
+                'categories': ['People & Blogs'],
+                'thumbnail': 'https://i.ytimg.com/vi_webp/gHKT4uU8Zng/maxresdefault.webp',
+                'title': 'dlp test video title translated (fr)',
+                'availability': 'public',
+                'uploader': 'cole-dlp-test-acc',
+                'age_limit': 0,
+                'description': 'dlp test video description translated (fr)',
+                'playable_in_embed': True,
+                'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+                'uploader_url': 'http://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            },
+            'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}},
+            'expected_warnings': [r'Preferring "fr" translated fields'],
          }, {
              'note': '6 channel audio',
              'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
@@ -2907,8 +3045,10 @@ def _extract_comment(self, comment_renderer, parent=None):
  
          text = self._get_text(comment_renderer, 'contentText')
  
-        # note: timestamp is an estimate calculated from the current time and time_text
-        timestamp, time_text = self._extract_time_text(comment_renderer, 'publishedTimeText')
+        # Timestamp is an estimate calculated from the current time and time_text
+        time_text = self._get_text(comment_renderer, 'publishedTimeText') or ''
+        timestamp = self._parse_time_text(time_text)
+
          author = self._get_text(comment_renderer, 'authorText')
          author_id = try_get(comment_renderer,
                              lambda x: x['authorEndpoint']['browseEndpoint']['browseId'], str)
@@ -3554,11 +3694,19 @@ def _real_extract(self, url):
          microformats = traverse_obj(
              player_responses, (..., 'microformat', 'playerMicroformatRenderer'),
              expected_type=dict, default=[])
-        video_title = (
-            get_first(video_details, 'title')
-            or self._get_text(microformats, (..., 'title'))
-            or search_meta(['og:title', 'twitter:title', 'title']))
-        video_description = get_first(video_details, 'shortDescription')
+
+        translated_title = self._get_text(microformats, (..., 'title'))
+        video_title = (self._preferred_lang and translated_title
+                       or get_first(video_details, 'title')  # primary
+                       or translated_title
+                       or search_meta(['og:title', 'twitter:title', 'title']))
+        translated_description = self._get_text(microformats, (..., 'description'))
+        original_description = get_first(video_details, 'shortDescription')
+        video_description = (
+            self._preferred_lang and translated_description
+            # If original description is blank, it will be an empty string.
+            # Do not prefer translated description in this case.
+            or original_description if original_description is not None else translated_description)
  
          multifeed_metadata_list = get_first(
              player_responses,
@@ -3988,7 +4136,8 @@ def process_language(container, base_url, lang_code, sub_name, query):
              and info.get('live_status') != 'is_upcoming'
              and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', [])
          ):
-            upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') or upload_date
+            upload_date = strftime_or_none(
+                self._parse_time_text(self._get_text(vpir, 'dateText')), '%Y%m%d') or upload_date
          info['upload_date'] = upload_date
  
          for to, frm in fallbacks.items():
@@ -4000,33 +4149,25 @@ def process_language(container, base_url, lang_code, sub_name, query):
              if v:
                  info[d_k] = v
  
-        is_private = get_first(video_details, 'isPrivate', expected_type=bool)
-        is_unlisted = get_first(microformats, 'isUnlisted', expected_type=bool)
-        is_membersonly = None
-        is_premium = None
-        if initial_data and is_private is not None:
-            is_membersonly = False
-            is_premium = False
-            contents = try_get(initial_data, lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], list) or []
-            badge_labels = set()
-            for content in contents:
-                if not isinstance(content, dict):
-                    continue
-                badge_labels.update(self._extract_badges(content.get('videoPrimaryInfoRenderer')))
-            for badge_label in badge_labels:
-                if badge_label.lower() == 'members only':
-                    is_membersonly = True
-                elif badge_label.lower() == 'premium':
-                    is_premium = True
-                elif badge_label.lower() == 'unlisted':
-                    is_unlisted = True
-
-        info['availability'] = self._availability(
-            is_private=is_private,
-            needs_premium=is_premium,
-            needs_subscription=is_membersonly,
-            needs_auth=info['age_limit'] >= 18,
-            is_unlisted=None if is_private is None else is_unlisted)
+        badges = self._extract_badges(traverse_obj(contents, (..., 'videoPrimaryInfoRenderer'), get_all=False))
+
+        is_private = (self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+                      or get_first(video_details, 'isPrivate', expected_type=bool))
+
+        info['availability'] = (
+            'public' if self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+            else self._availability(
+                is_private=is_private,
+                needs_premium=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM)
+                    or False if initial_data and is_private is not None else None),
+                needs_subscription=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION)
+                    or False if initial_data and is_private is not None else None),
+                needs_auth=info['age_limit'] >= 18,
+                is_unlisted=None if is_private is None else (
+                    self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+                    or get_first(microformats, 'isUnlisted', expected_type=bool))))
  
          info['__post_extractor'] = self.extract_comments(master_ytcfg, video_id, contents, webpage)
  
@@ -4472,7 +4613,7 @@ def _get_uncropped(url):
              playlist_id = item_id
  
          playlist_stats = traverse_obj(primary_sidebar_renderer, 'stats')
-        last_updated_unix, _ = self._extract_time_text(playlist_stats, 2)
+        last_updated_unix = self._parse_time_text(self._get_text(playlist_stats, 2))
          if title is None:
              title = self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or playlist_id
          title += format_field(selected_tab, 'title', ' - %s')
@@ -4566,31 +4707,37 @@ def _extract_availability(self, data):
          Note: Unless YouTube tells us explicitly, we do not assume it is public
          @param data: response
          """
-        is_private = is_unlisted = None
          renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarPrimaryInfoRenderer') or {}
-        badge_labels = self._extract_badges(renderer)
+
+        player_header_privacy = traverse_obj(
+            data, ('header', 'playlistHeaderRenderer', 'privacy'), expected_type=str)
+
+        badges = self._extract_badges(renderer)
  
          # Personal playlists, when authenticated, have a dropdown visibility selector instead of a badge
-        privacy_dropdown_entries = try_get(
-            renderer, lambda x: x['privacyForm']['dropdownFormFieldRenderer']['dropdown']['dropdownRenderer']['entries'], list) or []
-        for renderer_dict in privacy_dropdown_entries:
-            is_selected = try_get(
-                renderer_dict, lambda x: x['privacyDropdownItemRenderer']['isSelected'], bool) or False
-            if not is_selected:
-                continue
-            label = self._get_text(renderer_dict, ('privacyDropdownItemRenderer', 'label'))
-            if label:
-                badge_labels.add(label.lower())
-                break
+        privacy_setting_icon = traverse_obj(
+            renderer, (
+                'privacyForm', 'dropdownFormFieldRenderer', 'dropdown', 'dropdownRenderer', 'entries',
+                lambda _, v: v['privacyDropdownItemRenderer']['isSelected'], 'privacyDropdownItemRenderer', 'icon', 'iconType'),
+            get_all=False, expected_type=str)
  
-        for badge_label in badge_labels:
-            if badge_label == 'unlisted':
-                is_unlisted = True
-            elif badge_label == 'private':
-                is_private = True
-            elif badge_label == 'public':
-                is_unlisted = is_private = False
-        return self._availability(is_private, False, False, False, is_unlisted)
+        return (
+            'public' if (
+                self._has_badge(badges, BadgeType.AVAILABILITY_PUBLIC)
+                or player_header_privacy == 'PUBLIC'
+                or privacy_setting_icon == 'PRIVACY_PUBLIC')
+            else self._availability(
+                is_private=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_PRIVATE)
+                    or player_header_privacy == 'PRIVATE' if player_header_privacy is not None
+                    else privacy_setting_icon == 'PRIVACY_PRIVATE' if privacy_setting_icon is not None else None),
+                is_unlisted=(
+                    self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED)
+                    or player_header_privacy == 'UNLISTED' if player_header_privacy is not None
+                    else privacy_setting_icon == 'PRIVACY_UNLISTED' if privacy_setting_icon is not None else None),
+                needs_subscription=self._has_badge(badges, BadgeType.AVAILABILITY_SUBSCRIPTION) or None,
+                needs_premium=self._has_badge(badges, BadgeType.AVAILABILITY_PREMIUM) or None,
+                needs_auth=False))
  
      @staticmethod
      def _extract_sidebar_info_renderer(data, info_renderer, expected_type=dict):
@@ -4866,6 +5013,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
              'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
              'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+            'availability': 'public',
          },
          'playlist_count': 1,
      }, {
@@ -4883,6 +5031,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA',
              'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
              'uploader_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA',
+            'availability': 'public',
          },
          'playlist_count': 0,
      }, {
@@ -5029,6 +5178,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UCEPzS1rYsrkqzSLNp76nrcg',
              'channel_url': 'https://www.youtube.com/c/ChRiStIaAn008',
              'channel': 'Christiaan008',
+            'availability': 'public',
          },
          'playlist_count': 96,
      }, {
@@ -5047,6 +5197,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'view_count': int,
              'description': '',
              'channel_id': 'UCBABnxM4Ar9ten8Mdjj1j0Q',
+            'availability': 'public',
          },
          'playlist_mincount': 1123,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5070,6 +5221,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel': 'Interstellar Movie',
              'description': '',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'playlist_mincount': 21,
      }, {
@@ -5088,6 +5240,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_url': 'https://www.youtube.com/channel/UCTYLiWFZy8xtPwxFwX9rV7Q',
              'channel_id': 'UCTYLiWFZy8xtPwxFwX9rV7Q',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'playlist_mincount': 200,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5107,6 +5260,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/c/blanktv',
              'modified_date': r're:\d{8}',
              'description': '',
+            'availability': 'public',
          },
          'playlist_mincount': 1000,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5125,6 +5279,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UC9-y-6csu5WGm29I7JiwpnA',
              'channel_url': 'https://www.youtube.com/user/Computerphile',
              'channel': 'Computerphile',
+            'availability': 'public',
          },
          'playlist_mincount': 11,
      }, {
@@ -5290,6 +5445,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel_id': 'UC_aEa8K-EOJ3D6gOs7HcyNg',
              'tags': [],
              'channel': 'NoCopyrightSounds',
+            'availability': 'public',
          },
          'playlist_mincount': 166,
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
@@ -5310,6 +5466,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'modified_date': r're:\d{8}',
              'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
              'description': '',
+            'availability': 'public',
          },
          'expected_warnings': [
              'The URL does not have a videos tab',
@@ -5410,6 +5567,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'channel': 'Royalty Free Music - Topic',
              'view_count': int,
              'uploader_url': 'https://www.youtube.com/channel/UC9ALqqC4aIeG5iDs7i90Bfw',
+            'availability': 'public',
          },
          'expected_warnings': [
              'does not have a videos tab',
@@ -5443,6 +5601,45 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor):
              'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q',
          },
          'playlist_mincount': 2
+    }, {
+        'note': 'translated tab name',
+        'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists',
+        'info_dict': {
+            'id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'tags': [],
+            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'description': '',
+            'title': 'cole-dlp-test-acc - 再生リスト',
+            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'uploader': 'cole-dlp-test-acc',
+            'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel': 'cole-dlp-test-acc',
+        },
+        'playlist_mincount': 1,
+        'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+        'expected_warnings': ['Preferring "ja"'],
+    }, {
+        # XXX: this should really check flat playlist entries, but the test suite doesn't support that
+        'note': 'preferred lang set with playlist with translated video titles',
+        'url': 'https://www.youtube.com/playlist?list=PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+        'info_dict': {
+            'id': 'PLt5yu3-wZAlQAaPZ5Z-rJoTdbT-45Q7c0',
+            'tags': [],
+            'view_count': int,
+            'channel_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'uploader': 'cole-dlp-test-acc',
+            'uploader_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'channel': 'cole-dlp-test-acc',
+            'channel_id': 'UCiu-3thuViMebBjw_5nWYrA',
+            'description': 'test',
+            'uploader_url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA',
+            'title': 'dlp test playlist',
+            'availability': 'public',
+        },
+        'playlist_mincount': 1,
+        'params': {'extractor_args': {'youtube': {'lang': ['ja']}}},
+        'expected_warnings': ['Preferring "ja"'],
      }]
  
      @classmethod
@@ -5527,10 +5724,20 @@ def get_mobj(url):
          tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
          if tabs:
              selected_tab = self._extract_selected_tab(tabs)
-            selected_tab_name = selected_tab.get('title', '').lower()
+            selected_tab_url = urljoin(
+                url, traverse_obj(selected_tab, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url')))
+            translated_tab_name = selected_tab.get('title', '').lower()
+
+            # Prefer tab name from tab url as it is always in en,
+            # but only when preferred lang is set as it may not extract reliably in all cases.
+            selected_tab_name = (self._preferred_lang in (None, 'en') and translated_tab_name
+                                 or selected_tab_url and get_mobj(selected_tab_url)['tab'][1:]  # primary
+                                 or translated_tab_name)
+
              if selected_tab_name == 'home':
                  selected_tab_name = 'featured'
              requested_tab_name = mobj['tab'][1:]
+
              if 'no-youtube-channel-redirect' not in compat_opts:
                  if requested_tab_name == 'live':  # Live tab should have redirected to the video
                      raise UserNotLive(video_id=mobj['id'])
@@ -5642,6 +5849,7 @@ class YoutubePlaylistIE(InfoExtractor):
              'channel': 'milan',
              'channel_id': 'UCEI1-PVPcYXjB73Hfelbmaw',
              'uploader_url': 'https://www.youtube.com/channel/UCEI1-PVPcYXjB73Hfelbmaw',
+            'availability': 'public',
          },
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }, {
@@ -5660,6 +5868,7 @@ class YoutubePlaylistIE(InfoExtractor):
              'uploader_url': 'https://www.youtube.com/c/愛低音的國王',
              'channel_id': 'UC21nz3_MesPLqtDqwdvnoxA',
              'modified_date': r're:\d{8}',
+            'availability': 'public',
          },
          'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'],
      }, {
@@ -5848,7 +6057,7 @@ def _extract_notification_renderer(self, notification):
          title = self._search_regex(
              rf'{re.escape(channel or "")}[^:]+: (.+)', notification_title,
              'video title', default=None)
-        upload_date = (strftime_or_none(self._extract_time_text(notification, 'sentTimeText')[0], '%Y%m%d')
+        upload_date = (strftime_or_none(self._parse_time_text(self._get_text(notification, 'sentTimeText')), '%Y%m%d')
                         if self._configuration_arg('approximate_date', ie_key=YoutubeTabIE.ie_key())
                         else None)
          return {
author	coletdjnz <redacted>
	Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)
committer	GitHub <redacted>
	Fri, 9 Sep 2022 05:16:46 +0000 (05:16 +0000)
README.md		patch \| blob \| blame \| history
yt_dlp/extractor/youtube.py		patch \| blob \| blame \| history