X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/596379e26045e7ff250d18c03ea564b6c94ab007..a25bca9f89f77e6e5153c3400c4a27020d8cba9d:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index fd0ede189..4ee09ad9a 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -39,9 +39,11 @@ ExtractorError, float_or_none, format_field, + get_first, int_or_none, is_html, join_nonempty, + js_to_json, mimetype2ext, network_exceptions, NO_DEFAULT, @@ -71,10 +73,6 @@ ) -def get_first(obj, keys, **kwargs): - return traverse_obj(obj, (..., *variadic(keys)), **kwargs, get_all=False) - - # any clients starting with _ cannot be explicity requested by the user INNERTUBE_CLIENTS = { 'web': { @@ -219,31 +217,53 @@ def get_first(obj, keys, **kwargs): } }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 - } + }, + # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) + # See: https://github.com/zerodytrash/YouTube-Internal-Clients + 'tv_embedded': { + 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'TVHTML5_SIMPLY_EMBEDDED_PLAYER', + 'clientVersion': '2.0', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + }, } +def _split_innertube_client(client_name): + variant, *base = client_name.rsplit('.', 1) + if base: + return variant, base[0], variant + base, *variant = client_name.split('_', 1) + return client_name, base, variant[0] if variant else None + + def build_innertube_clients(): - third_party = { - 'embedUrl': 'https://google.com', # Can be any valid URL + THIRD_PARTY = { + 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - base_clients = ('android', 'web', 'ios', 'mweb') - priority = qualities(base_clients[::-1]) + BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb') + priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): ytcfg.setdefault('INNERTUBE_API_KEY', 'AIzaSyDCU8hByM-4DrUqRUYnGn-3llEO78bcxq8') ytcfg.setdefault('INNERTUBE_HOST', 'www.youtube.com') ytcfg.setdefault('REQUIRE_JS_PLAYER', True) ytcfg['INNERTUBE_CONTEXT']['client'].setdefault('hl', 'en') - ytcfg['priority'] = 10 * priority(client.split('_', 1)[0]) - - if client in base_clients: - INNERTUBE_CLIENTS[f'{client}_agegate'] = agegate_ytcfg = copy.deepcopy(ytcfg) - agegate_ytcfg['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' - agegate_ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party - agegate_ytcfg['priority'] -= 1 - elif client.endswith('_embedded'): - ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = third_party + + _, base_client, variant = _split_innertube_client(client) + ytcfg['priority'] = 10 * priority(base_client) + + if not variant: + INNERTUBE_CLIENTS[f'{client}_embedscreen'] = embedscreen = copy.deepcopy(ytcfg) + embedscreen['INNERTUBE_CONTEXT']['client']['clientScreen'] = 'EMBED' + embedscreen['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY + embedscreen['priority'] -= 3 + elif variant == 'embedded': + ytcfg['INNERTUBE_CONTEXT']['thirdParty'] = THIRD_PARTY ytcfg['priority'] -= 2 else: ytcfg['priority'] -= 3 @@ -257,13 +277,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): _RESERVED_NAMES = ( r'channel|c|user|playlist|watch|w|v|embed|e|watch_popup|clip|' - r'shorts|movies|results|shared|hashtag|trending|feed|feeds|' + r'shorts|movies|results|search|shared|hashtag|trending|explore|feed|feeds|' r'browse|oembed|get_video_info|iframe_api|s/player|' r'storefront|oops|index|account|reporthistory|t/terms|about|upload|signin|logout') _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM|WL|LL|LM)' - _NETRC_MACHINE = 'youtube' + # _NETRC_MACHINE = 'youtube' # If True it will raise an error if no login info is provided _LOGIN_REQUIRED = False @@ -334,21 +354,6 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?hpniueoejy4opn7bc4ftgazyqjoeqwlvh2uiku2xqku6zpoa4bf5ruid\.onion', ) - def _login(self): - """ - Attempt to log in to YouTube. - If _LOGIN_REQUIRED is set and no authentication was provided, an error is raised. - """ - - if (self._LOGIN_REQUIRED - and self.get_param('cookiefile') is None - and self.get_param('cookiesfrombrowser') is None): - self.raise_login_required( - 'Login details are needed to download this content', method='cookies') - username, password = self._get_login_info() - if username: - self.report_warning(f'Cannot login to YouTube using username and password. {self._LOGIN_HINTS["cookies"]}') - def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') if cookies.get('__Secure-3PSID'): @@ -373,13 +378,19 @@ def _initialize_pref(self): pref = dict(compat_urlparse.parse_qsl(pref_cookie.value)) except ValueError: self.report_warning('Failed to parse user PREF cookie' + bug_reports_message()) - pref.update({'hl': 'en'}) + pref.update({'hl': 'en', 'tz': 'UTC'}) self._set_cookie('.youtube.com', name='PREF', value=compat_urllib_parse_urlencode(pref)) def _real_initialize(self): self._initialize_pref() self._initialize_consent() - self._login() + self._check_login_required() + + def _check_login_required(self): + if (self._LOGIN_REQUIRED + and self.get_param('cookiefile') is None + and self.get_param('cookiesfrombrowser') is None): + self.raise_login_required('Login details are needed to download this content', method='cookies') _YT_INITIAL_DATA_RE = r'(?:window\s*\[\s*["\']ytInitialData["\']\s*\]|ytInitialData)\s*=\s*({.+?})\s*;' _YT_INITIAL_PLAYER_RESPONSE_RE = r'ytInitialPlayerResponse\s*=\s*({.+?})\s*;' @@ -412,8 +423,9 @@ def _extract_api_key(self, ytcfg=None, default_client='web'): def _extract_context(self, ytcfg=None, default_client='web'): context = get_first( (ytcfg, self._get_default_ytcfg(default_client)), 'INNERTUBE_CONTEXT', expected_type=dict) - # Enforce language for extraction - traverse_obj(context, 'client', expected_type=dict, default={})['hl'] = 'en' + # Enforce language and tz for extraction + client_context = traverse_obj(context, 'client', expected_type=dict, default={}) + client_context.update({'hl': 'en', 'timeZone': 'UTC', 'utcOffsetMinutes': 0}) return context _SAPISID = None @@ -457,7 +469,7 @@ def _call_api(self, ep, query, video_id, fatal=True, headers=None, 'https://%s/youtubei/v1/%s' % (api_hostname or self._get_innertube_host(default_client), ep), video_id=video_id, fatal=fatal, note=note, errnote=errnote, data=json.dumps(data).encode('utf8'), headers=real_headers, - query={'key': api_key or self._extract_api_key()}) + query={'key': api_key or self._extract_api_key(), 'prettyPrint': 'false'}) def extract_yt_initial_data(self, item_id, webpage, fatal=True): data = self._search_regex( @@ -514,7 +526,7 @@ def _extract_visitor_data(*args): Appears to be used to track session state """ return get_first( - args, (('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))), + args, [('VISITOR_DATA', ('INNERTUBE_CONTEXT', 'client', 'visitorData'), ('responseContext', 'visitorData'))], expected_type=str) @property @@ -554,6 +566,18 @@ def generate_api_headers( headers['X-Origin'] = origin return {h: v for h, v in headers.items() if v is not None} + def _download_ytcfg(self, client, video_id): + url = { + 'web': 'https://www.youtube.com', + 'web_music': 'https://music.youtube.com', + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + }.get(client) + if not url: + return {} + webpage = self._download_webpage( + url, video_id, fatal=False, note=f'Downloading {client.replace("_", " ").strip()} client config') + return self.extract_ytcfg(video_id, webpage) or {} + @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { @@ -719,6 +743,7 @@ def extract_relative_time(relative_time_text): return None def _extract_time_text(self, renderer, *path_list): + """@returns (timestamp, time_text)""" text = self._get_text(renderer, *path_list) or '' dt = self.extract_relative_time(text) timestamp = None @@ -729,10 +754,11 @@ def _extract_time_text(self, renderer, *path_list): timestamp = ( unified_timestamp(text) or unified_timestamp( self._search_regex( - (r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*on)?\s*(.+\d)', r'\w+[\s,\.-]*\w+[\s,\.-]+20\d{2}'), text.lower(), 'time text', default=None))) + (r'([a-z]+\s*\d{1,2},?\s*20\d{2})', r'(?:.+|^)(?:live|premieres|ed|ing)(?:\s*(?:on|for))?\s*(.+\d)'), + text.lower(), 'time text', default=None))) if text and timestamp is None: - self.report_warning('Cannot parse localized time text' + bug_reports_message(), only_once=True) + self.report_warning(f"Cannot parse localized time text '{text}'" + bug_reports_message(), only_once=True) return timestamp, text def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, @@ -758,13 +784,15 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers note='%s%s' % (note, ' (retry #%d)' % count if count else '')) except ExtractorError as e: if isinstance(e.cause, network_exceptions): - if isinstance(e.cause, compat_HTTPError) and not is_html(e.cause.read(512)): - e.cause.seek(0) - yt_error = try_get( - self._parse_json(e.cause.read().decode(), item_id, fatal=False), - lambda x: x['error']['message'], compat_str) - if yt_error: - self._report_alerts([('ERROR', yt_error)], fatal=False) + if isinstance(e.cause, compat_HTTPError): + first_bytes = e.cause.read(512) + if not is_html(first_bytes): + yt_error = try_get( + self._parse_json( + self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), + lambda x: x['error']['message'], compat_str) + if yt_error: + self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome @@ -815,23 +843,37 @@ def _extract_video(self, renderer): description = self._get_text(renderer, 'descriptionSnippet') duration = parse_duration(self._get_text( renderer, 'lengthText', ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'text'))) + if duration is None: + duration = parse_duration(self._search_regex( + r'(?i)(ago)(?!.*\1)\s+(?P[a-z0-9 ,]+?)(?:\s+[\d,]+\s+views)?(?:\s+-\s+play\s+short)?$', + traverse_obj(renderer, ('title', 'accessibility', 'accessibilityData', 'label'), default='', expected_type=str), + video_id, default=None, group='duration')) + view_count = self._get_count(renderer, 'viewCountText') uploader = self._get_text(renderer, 'ownerText', 'shortBylineText') channel_id = traverse_obj( - renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), expected_type=str, get_all=False) + renderer, ('shortBylineText', 'runs', ..., 'navigationEndpoint', 'browseEndpoint', 'browseId'), + expected_type=str, get_all=False) timestamp, time_text = self._extract_time_text(renderer, 'publishedTimeText') scheduled_timestamp = str_to_int(traverse_obj(renderer, ('upcomingEventData', 'startTime'), get_all=False)) overlay_style = traverse_obj( - renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), get_all=False, expected_type=str) + renderer, ('thumbnailOverlays', ..., 'thumbnailOverlayTimeStatusRenderer', 'style'), + get_all=False, expected_type=str) badges = self._extract_badges(renderer) thumbnails = self._extract_thumbnails(renderer, 'thumbnail') + navigation_url = urljoin('https://www.youtube.com/', traverse_obj( + renderer, ('navigationEndpoint', 'commandMetadata', 'webCommandMetadata', 'url'), + expected_type=str)) or '' + url = f'https://www.youtube.com/watch?v={video_id}' + if overlay_style == 'SHORTS' or '/shorts/' in navigation_url: + url = f'https://www.youtube.com/shorts/{video_id}' return { '_type': 'url', 'ie_key': YoutubeIE.ie_key(), 'id': video_id, - 'url': f'https://www.youtube.com/watch?v={video_id}', + 'url': url, 'title': title, 'description': description, 'duration': duration, @@ -839,7 +881,9 @@ def _extract_video(self, renderer): 'uploader': uploader, 'channel_id': channel_id, 'thumbnails': thumbnails, - 'upload_date': strftime_or_none(timestamp, '%Y%m%d'), + 'upload_date': (strftime_or_none(timestamp, '%Y%m%d') + if self._configuration_arg('approximate_date', ie_key='youtubetab') + else None), 'live_status': ('is_upcoming' if scheduled_timestamp is not None else 'was_live' if 'streamed' in time_text.lower() else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges @@ -1032,6 +1076,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'age_limit': 0, 'start_time': 1, 'end_time': 9, + 'channel_follower_count': int } }, { @@ -1075,6 +1120,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi/BaW_jenozKc/maxresdefault.jpg', 'live_status': 'not_live', 'age_limit': 0, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1127,6 +1173,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Music'], 'age_limit': 0, 'alt_title': 'The Spark', + 'channel_follower_count': int }, 'params': { 'youtube_include_dash_manifest': True, @@ -1159,6 +1206,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UCzybXLxv08IApdjdN0mJhEg', 'playable_in_embed': True, 'view_count': int, + 'channel_follower_count': int }, }, { @@ -1186,6 +1234,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'duration': 177, 'playable_in_embed': True, + 'channel_follower_count': int }, }, { @@ -1195,7 +1244,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'id': 'Tq92D6wQ1mg', 'title': '[MMD] Adios - EVERGLOW [+Motion DL]', 'ext': 'mp4', - 'upload_date': '20191227', + 'upload_date': '20191228', 'uploader_id': 'UC1yoRdFoFJaCY-AGfD9W0wQ', 'uploader': 'Projekt Melody', 'description': 'md5:17eccca93a786d51bc67646756894066', @@ -1213,6 +1262,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'categories': ['Entertainment'], 'duration': 106, 'channel_url': 'https://www.youtube.com/channel/UC1yoRdFoFJaCY-AGfD9W0wQ', + 'channel_follower_count': int }, }, { @@ -1244,6 +1294,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCdR3RSDPqub28LjZx0v9-aA', 'live_status': 'not_live', 'artist': 'OOMPH!', + 'channel_follower_count': int }, }, { @@ -1282,6 +1333,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCYEK6xds6eo-3tr4xRdflmQ', 'categories': ['Music'], 'album': 'Some Chords', + 'channel_follower_count': int }, 'expected_warnings': [ 'DASH manifest missing', @@ -1314,6 +1366,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'live_status': 'was_live', 'view_count': int, 'channel_url': 'https://www.youtube.com/channel/UCTl3QQTvqHFjurroKxexy2Q', + 'channel_follower_count': int }, 'params': { 'skip_download': 'requires avconv', @@ -1345,6 +1398,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'availability': 'unlisted', + 'channel_follower_count': int }, }, # url_encoded_fmt_stream_map is empty string @@ -1513,6 +1567,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'like_count': int, 'age_limit': 0, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1554,7 +1609,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'md5:e41008789470fc2533a3252216f1c1d1', 'description': 'md5:a677553cf0840649b731a3024aeff4cc', 'duration': 721, - 'upload_date': '20150127', + 'upload_date': '20150128', 'uploader_id': 'BerkmanCenter', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/user/BerkmanCenter', 'uploader': 'The Berkman Klein Center for Internet & Society', @@ -1571,6 +1626,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi_webp/M4gD1WSo5mA/maxresdefault.webp', 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1585,7 +1641,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Democratic Socialism and Foreign Policy | Bernie Sanders', 'description': 'md5:13a2503d7b5904ef4b223aa101628f39', 'duration': 4060, - 'upload_date': '20151119', + 'upload_date': '20151120', 'uploader': 'Bernie Sanders', 'uploader_id': 'UCH1dpzjCEiGAt8CXkryhkZg', 'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', @@ -1602,6 +1658,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'live_status': 'not_live', 'channel_url': 'https://www.youtube.com/channel/UCH1dpzjCEiGAt8CXkryhkZg', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1665,6 +1722,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'playable_in_embed': True, 'live_status': 'not_live', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1774,6 +1832,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'tags': 'count:11', 'live_status': 'not_live', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1829,6 +1888,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'live_status': 'not_live', 'channel': 'ElevageOrVert', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1862,6 +1922,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'duration': 522, 'channel': 'kudvenkat', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1906,6 +1967,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'thumbnail': 'https://i.ytimg.com/vi/OtqTfy26tG0/maxresdefault.jpg', 'categories': ['Music'], 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -1941,6 +2003,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int } }, { @@ -1967,6 +2030,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -2008,6 +2072,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'like_count': int, 'live_status': 'not_live', 'playable_in_embed': True, + 'channel_follower_count': int }, 'params': { 'format': '17', # 3gp format available on android @@ -2051,8 +2116,95 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 248, 'categories': ['Education'], 'age_limit': 0, + 'channel_follower_count': int }, 'params': {'format': 'mhtml', 'skip_download': True} - } + }, { + # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) + 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', + 'info_dict': { + 'id': '2NUZ8W2llS4', + 'ext': 'mp4', + 'title': 'The NP that test your phone performance 🙂', + 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', + 'uploader': 'Leon Nguyen', + 'uploader_id': 'VNSXIII', + 'uploader_url': 'http://www.youtube.com/user/VNSXIII', + 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', + 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', + 'duration': 21, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Gaming'], + 'tags': 'count:23', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Leon Nguyen', + 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', + 'channel_follower_count': int + } + }, { + # date text is premiered video, ensure upload date in UTC (published 1641172509) + 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', + 'info_dict': { + 'id': 'mzZzzBU6lrM', + 'ext': 'mp4', + 'title': 'I Met GeorgeNotFound In Real Life...', + 'description': 'md5:cca98a355c7184e750f711f3a1b22c84', + 'uploader': 'Quackity', + 'uploader_id': 'QuackityHQ', + 'uploader_url': 'http://www.youtube.com/user/QuackityHQ', + 'channel_id': 'UC_8NknAFiyhOUaZqHR3lq3Q', + 'channel_url': 'https://www.youtube.com/channel/UC_8NknAFiyhOUaZqHR3lq3Q', + 'duration': 955, + 'view_count': int, + 'age_limit': 0, + 'categories': ['Entertainment'], + 'tags': 'count:26', + 'playable_in_embed': True, + 'live_status': 'not_live', + 'release_timestamp': 1641172509, + 'release_date': '20220103', + 'upload_date': '20220103', + 'like_count': int, + 'availability': 'public', + 'channel': 'Quackity', + 'thumbnail': 'https://i.ytimg.com/vi/mzZzzBU6lrM/maxresdefault.jpg', + 'channel_follower_count': int + } + }, + { # continuous livestream. Microformat upload date should be preferred. + # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 + 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + 'info_dict': { + 'id': 'kgx4WGK0oNU', + 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'ext': 'mp4', + 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'availability': 'public', + 'age_limit': 0, + 'release_timestamp': 1637975704, + 'upload_date': '20210619', + 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'live_status': 'is_live', + 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', + 'uploader': '阿鲍Abao', + 'uploader_url': 'http://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', + 'channel': 'Abao in Tokyo', + 'channel_follower_count': int, + 'release_date': '20211127', + 'tags': 'count:39', + 'categories': ['People & Blogs'], + 'like_count': int, + 'uploader_id': 'UC84whx2xxsiA1gXHXXqKGOA', + 'view_count': int, + 'playable_in_embed': True, + 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + }, + 'params': {'skip_download': True} + }, ] @classmethod @@ -2108,6 +2260,7 @@ def mpd_feed(format_id, delay): return f['manifest_url'], f['manifest_stream_number'], is_live for f in formats: + f['is_live'] = True f['protocol'] = 'http_dash_segments_generator' f['fragments'] = functools.partial( self._live_dash_fragments, f['format_id'], live_start_time, mpd_feed) @@ -2130,12 +2283,12 @@ def _live_dash_fragments(self, format_id, live_start_time, mpd_feed, ctx): known_idx, no_fragment_score, last_segment_url = begin_index, 0, None fragments, fragment_base_url = None, None - def _extract_sequence_from_mpd(refresh_sequence): + def _extract_sequence_from_mpd(refresh_sequence, immediate): nonlocal mpd_url, stream_number, is_live, no_fragment_score, fragments, fragment_base_url # Obtain from MPD's maximum seq value old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) - expire_fast = last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 + expire_fast = immediate or last_error and isinstance(last_error, compat_HTTPError) and last_error.code == 403 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: @@ -2149,7 +2302,7 @@ def _extract_sequence_from_mpd(refresh_sequence): except ExtractorError: fmts = None if not fmts: - no_fragment_score += 1 + no_fragment_score += 2 return False, last_seq fmt_info = next(x for x in fmts if x['manifest_stream_number'] == stream_number) fragments = fmt_info['fragments'] @@ -2172,11 +2325,12 @@ def _extract_sequence_from_mpd(refresh_sequence): urlh = None last_seq = try_get(urlh, lambda x: int_or_none(x.headers['X-Head-Seqnum'])) if last_seq is None: - no_fragment_score += 1 + no_fragment_score += 2 last_segment_url = None continue else: - should_continue, last_seq = _extract_sequence_from_mpd(True) + should_continue, last_seq = _extract_sequence_from_mpd(True, no_fragment_score > 15) + no_fragment_score += 2 if not should_continue: continue @@ -2194,7 +2348,7 @@ def _extract_sequence_from_mpd(refresh_sequence): try: for idx in range(known_idx, last_seq): # do not update sequence here or you'll get skipped some part of it - should_continue, _ = _extract_sequence_from_mpd(False) + should_continue, _ = _extract_sequence_from_mpd(False, False) if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') @@ -2218,12 +2372,7 @@ def _extract_player_url(self, *ytcfgs, webpage=None): get_all=False, expected_type=compat_str) if not player_url: return - if player_url.startswith('//'): - player_url = 'https:' + player_url - elif not re.match(r'https?://', player_url): - player_url = compat_urlparse.urljoin( - 'https://www.youtube.com', player_url) - return player_url + return urljoin('https://www.youtube.com', player_url) def _download_player_url(self, video_id, fatal=False): res = self._download_webpage( @@ -2372,11 +2521,7 @@ def _decrypt_nsig(self, s, video_id, player_url): """Turn the encrypted n field into a working signature""" if player_url is None: raise ExtractorError('Cannot decrypt nsig without player_url') - if player_url.startswith('//'): - player_url = 'https:' + player_url - elif not re.match(r'https?://', player_url): - player_url = compat_urlparse.urljoin( - 'https://www.youtube.com', player_url) + player_url = urljoin('https://www.youtube.com', player_url) sig_id = ('nsig_value', s) if sig_id in self._player_cache: @@ -2394,9 +2539,14 @@ def _decrypt_nsig(self, s, video_id, player_url): raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id) def _extract_n_function_name(self, jscode): - return self._search_regex( - (r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',), - jscode, 'Initial JS player n function name', group='nfunc') + nfunc, idx = self._search_regex( + r'\.get\("n"\)\)&&\(b=(?P[a-zA-Z0-9$]+)(?:\[(?P\d+)\])?\([a-zA-Z0-9]\)', + jscode, 'Initial JS player n function name', group=('nfunc', 'idx')) + if not idx: + return nfunc + return json.loads(js_to_json(self._search_regex( + rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode, + f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)] def _extract_n_function(self, video_id, player_url): player_id = self._extract_player_info(player_url) @@ -2825,16 +2975,6 @@ def _get_requested_clients(self, url, smuggled_data): return orderedSet(requested_clients) - def _extract_player_ytcfg(self, client, video_id): - url = { - 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' - }.get(client) - if not url: - return {} - webpage = self._download_webpage(url, video_id, fatal=False, note='Downloading %s config' % client.replace('_', ' ').strip()) - return self.extract_ytcfg(video_id, webpage) or {} - def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): initial_pr = None if webpage: @@ -2842,13 +2982,19 @@ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg): webpage, self._YT_INITIAL_PLAYER_RESPONSE_RE, video_id, 'initial player response') - original_clients = clients + all_clients = set(clients) clients = clients[::-1] prs = [] - def append_client(client_name): - if client_name in INNERTUBE_CLIENTS and client_name not in original_clients: - clients.append(client_name) + def append_client(*client_names): + """ Append the first client name that exists but not already used """ + for client_name in client_names: + actual_client = _split_innertube_client(client_name)[0] + if actual_client in INNERTUBE_CLIENTS: + if actual_client not in all_clients: + clients.append(client_name) + all_clients.add(actual_client) + return # Android player_response does not have microFormats which are needed for # extraction of some data. So we return the initial_pr with formats @@ -2863,10 +3009,10 @@ def append_client(client_name): tried_iframe_fallback = False player_url = None while clients: - client = clients.pop() + client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} - if 'configs' not in self._configuration_arg('player_skip'): - player_ytcfg = self._extract_player_ytcfg(client, video_id) or player_ytcfg + if 'configs' not in self._configuration_arg('player_skip') and client != 'web': + player_ytcfg = self._download_ytcfg(client, video_id) or player_ytcfg player_url = player_url or self._extract_player_url(master_ytcfg, player_ytcfg, webpage=webpage) require_js_player = self._get_default_ytcfg(client).get('REQUIRE_JS_PLAYER') @@ -2891,10 +3037,13 @@ def append_client(client_name): prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in - if client.endswith('_agegate') and self._is_unplayable(pr) and self.is_authenticated: - append_client(client.replace('_agegate', '_creator')) + if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: + append_client(f'{base_client}_creator') elif self._is_agegated(pr): - append_client(f'{client}_agegate') + if variant == 'tv_embedded': + append_client(f'{base_client}_embedded') + elif not variant: + append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') if last_error: if not len(prs): @@ -2902,7 +3051,7 @@ def append_client(client_name): self.report_warning(last_error) return prs, player_url - def _extract_formats(self, streaming_data, video_id, player_url, is_live): + def _extract_formats(self, streaming_data, video_id, player_url, is_live, duration): itags, stream_ids = {}, [] itag_qualities, res_qualities = {}, {} q = qualities([ @@ -2915,7 +3064,7 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live): streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...), default=[]) for fmt in streaming_formats: - if fmt.get('targetDurationSec') or fmt.get('drmFamilies'): + if fmt.get('targetDurationSec'): continue itag = str_or_none(fmt.get('itag')) @@ -2972,26 +3121,40 @@ def _extract_formats(self, streaming_data, video_id, player_url, is_live): itags[itag] = 'https' stream_ids.append(stream_id) - tbr = float_or_none( - fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) + tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) + language_preference = ( + 10 if audio_track.get('audioIsDefault') and 10 + else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 + else -1) + # Some formats may have much smaller duration than others (possibly damaged during encoding) + # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 + # Make sure to avoid false positives with small duration differences. + # Eg: __2ABJjxzNo, ySuUZEjARPY + is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) + if is_damaged: + self.report_warning(f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) dct = { 'asr': int_or_none(fmt.get('audioSampleRate')), 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': itag, 'format_note': join_nonempty( '%s%s' % (audio_track.get('displayName') or '', - ' (default)' if audio_track.get('audioIsDefault') else ''), + ' (default)' if language_preference > 0 else ''), fmt.get('qualityLabel') or quality.replace('audio_quality_', ''), - throttled and 'THROTTLED', delim=', '), + throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '), 'source_preference': -10 if throttled else -1, 'fps': int_or_none(fmt.get('fps')) or None, 'height': height, 'quality': q(quality), + 'has_drm': bool(fmt.get('drmFamilies')), 'tbr': tbr, 'url': fmt_url, 'width': int_or_none(fmt.get('width')), - 'language': audio_track.get('id', '').split('.')[0], - 'language_preference': 1 if audio_track.get('audioIsDefault') else -1, + 'language': join_nonempty(audio_track.get('id', '').split('.')[0], + 'desc' if language_preference < -1 else ''), + 'language_preference': language_preference, + # Strictly de-prioritize damaged and 3gp formats + 'preference': -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -3085,7 +3248,7 @@ def _extract_storyboard(self, player_responses, duration): 'width': width, 'height': height, 'fragments': [{ - 'path': url.replace('$M', str(j)), + 'url': url.replace('$M', str(j)), 'duration': min(fragment_duration, duration - (j * fragment_duration)), } for j in range(math.ceil(fragment_count))], } @@ -3104,14 +3267,14 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): return webpage, master_ytcfg, player_responses, player_url - def _list_formats(self, video_id, microformats, video_details, player_responses, player_url): + def _list_formats(self, video_id, microformats, video_details, player_responses, player_url, duration=None): live_broadcast_details = traverse_obj(microformats, (..., 'liveBroadcastDetails')) is_live = get_first(video_details, 'isLive') if is_live is None: is_live = get_first(live_broadcast_details, 'isLiveNow') streaming_data = traverse_obj(player_responses, (..., 'streamingData'), default=[]) - formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live)) + formats = list(self._extract_formats(streaming_data, video_id, player_url, is_live, duration)) return live_broadcast_details, is_live, streaming_data, formats @@ -3192,7 +3355,13 @@ def feed_entry(name): return self.playlist_result( entries, video_id, video_title, video_description) - live_broadcast_details, is_live, streaming_data, formats = self._list_formats(video_id, microformats, video_details, player_responses, player_url) + duration = int_or_none( + get_first(video_details, 'lengthSeconds') + or get_first(microformats, 'lengthSeconds') + or parse_duration(search_meta('duration'))) or None + + live_broadcast_details, is_live, streaming_data, formats = self._list_formats( + video_id, microformats, video_details, player_responses, player_url, duration) if not formats: if not self.get_param('allow_unplayable_formats') and traverse_obj(streaming_data, (..., 'licenseInfos')): @@ -3264,10 +3433,6 @@ def feed_entry(name): get_first(video_details, 'channelId') or get_first(microformats, 'externalChannelId') or search_meta('channelId')) - duration = int_or_none( - get_first(video_details, 'lengthSeconds') - or get_first(microformats, 'lengthSeconds') - or parse_duration(search_meta('duration'))) or None owner_profile_url = get_first(microformats, 'ownerProfileUrl') live_content = get_first(video_details, 'isLiveContent') @@ -3300,14 +3465,11 @@ def feed_entry(name): # URL checking if user don't care about getting the best possible thumbnail 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, - 'upload_date': unified_strdate( - get_first(microformats, 'uploadDate') - or search_meta('uploadDate')), 'uploader': get_first(video_details, 'author'), 'uploader_id': self._search_regex(r'/(?:channel|user)/([^/?&#]+)', owner_profile_url, 'uploader id') if owner_profile_url else None, 'uploader_url': owner_profile_url, 'channel_id': channel_id, - 'channel_url': f'https://www.youtube.com/channel/{channel_id}' if channel_id else None, + 'channel_url': format_field(channel_id, template='https://www.youtube.com/channel/%s'), 'duration': duration, 'view_count': int_or_none( get_first((video_details, microformats), (..., 'viewCount')) @@ -3351,13 +3513,14 @@ def process_language(container, base_url, lang_code, sub_name, query): }) lang_subs.append({ 'ext': fmt, - 'url': update_url_query(base_url, query), + 'url': urljoin('https://www.youtube.com', update_url_query(base_url, query)), 'name': sub_name, }) subtitles, automatic_captions = {}, {} for lang_code, caption_track in captions.items(): base_url = caption_track.get('baseUrl') + orig_lang = parse_qs(base_url).get('lang', [None])[-1] if not base_url: continue lang_name = self._get_text(caption_track, 'name', max_runs=1) @@ -3371,11 +3534,20 @@ def process_language(container, base_url, lang_code, sub_name, query): for trans_code, trans_name in translation_languages.items(): if not trans_code: continue + orig_trans_code = trans_code if caption_track.get('kind') != 'asr': + if 'translated_subs' in self._configuration_arg('skip'): + continue trans_code += f'-{lang_code}' trans_name += format_field(lang_name, template=' from %s') - process_language( - automatic_captions, base_url, trans_code, trans_name, {'tlang': trans_code}) + # Add an "-orig" label to the original language so that it can be distinguished. + # The subs are returned without "-orig" as well for compatibility + if lang_code == f'a-{orig_trans_code}': + process_language( + automatic_captions, base_url, f'{trans_code}-orig', f'{trans_name} (Original)', {}) + # Setting tlang=lang returns damaged subtitles. + process_language(automatic_captions, base_url, trans_code, trans_name, + {} if orig_lang == orig_trans_code else {'tlang': trans_code}) info['automatic_captions'] = automatic_captions info['subtitles'] = subtitles @@ -3438,87 +3610,101 @@ def process_language(container, base_url, lang_code, sub_name, query): or self._extract_chapters_from_engagement_panel(initial_data, duration) or None) - contents = try_get( - initial_data, - lambda x: x['contents']['twoColumnWatchNextResults']['results']['results']['contents'], - list) or [] - for content in contents: - vpir = content.get('videoPrimaryInfoRenderer') - if vpir: - stl = vpir.get('superTitleLink') - if stl: - stl = self._get_text(stl) - if try_get( - vpir, - lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': - info['location'] = stl - else: - mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) - if mobj: - info.update({ - 'series': mobj.group(1), - 'season_number': int(mobj.group(2)), - 'episode_number': int(mobj.group(3)), - }) - for tlb in (try_get( - vpir, - lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], - list) or []): - tbr = tlb.get('toggleButtonRenderer') or {} - for getter, regex in [( - lambda x: x['defaultText']['accessibility']['accessibilityData'], - r'(?P[\d,]+)\s*(?P(?:dis)?like)'), ([ - lambda x: x['accessibility'], - lambda x: x['accessibilityData']['accessibilityData'], - ], r'(?P(?:dis)?like) this video along with (?P[\d,]+) other people')]: - label = (try_get(tbr, getter, dict) or {}).get('label') - if label: - mobj = re.match(regex, label) - if mobj: - info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) - break - sbr_tooltip = try_get( - vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) - if sbr_tooltip: - like_count, dislike_count = sbr_tooltip.split(' / ') + contents = traverse_obj( + initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), + expected_type=list, default=[]) + + vpir = get_first(contents, 'videoPrimaryInfoRenderer') + if vpir: + stl = vpir.get('superTitleLink') + if stl: + stl = self._get_text(stl) + if try_get( + vpir, + lambda x: x['superTitleIcon']['iconType']) == 'LOCATION_PIN': + info['location'] = stl + else: + mobj = re.search(r'(.+?)\s*S(\d+)\s*•\s*E(\d+)', stl) + if mobj: info.update({ - 'like_count': str_to_int(like_count), - 'dislike_count': str_to_int(dislike_count), + 'series': mobj.group(1), + 'season_number': int(mobj.group(2)), + 'episode_number': int(mobj.group(3)), }) - vsir = content.get('videoSecondaryInfoRenderer') - if vsir: - info['channel'] = self._get_text(vsir, ('owner', 'videoOwnerRenderer', 'title')) - rows = try_get( - vsir, - lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], - list) or [] - multiple_songs = False - for row in rows: - if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: - multiple_songs = True + for tlb in (try_get( + vpir, + lambda x: x['videoActions']['menuRenderer']['topLevelButtons'], + list) or []): + tbr = tlb.get('toggleButtonRenderer') or {} + for getter, regex in [( + lambda x: x['defaultText']['accessibility']['accessibilityData'], + r'(?P[\d,]+)\s*(?P(?:dis)?like)'), ([ + lambda x: x['accessibility'], + lambda x: x['accessibilityData']['accessibilityData'], + ], r'(?P(?:dis)?like) this video along with (?P[\d,]+) other people')]: + label = (try_get(tbr, getter, dict) or {}).get('label') + if label: + mobj = re.match(regex, label) + if mobj: + info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) break - for row in rows: - mrr = row.get('metadataRowRenderer') or {} - mrr_title = mrr.get('title') - if not mrr_title: - continue - mrr_title = self._get_text(mrr, 'title') - mrr_contents_text = self._get_text(mrr, ('contents', 0)) - if mrr_title == 'License': - info['license'] = mrr_contents_text - elif not multiple_songs: - if mrr_title == 'Album': - info['album'] = mrr_contents_text - elif mrr_title == 'Artist': - info['artist'] = mrr_contents_text - elif mrr_title == 'Song': - info['track'] = mrr_contents_text + sbr_tooltip = try_get( + vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) + if sbr_tooltip: + like_count, dislike_count = sbr_tooltip.split(' / ') + info.update({ + 'like_count': str_to_int(like_count), + 'dislike_count': str_to_int(dislike_count), + }) + vsir = get_first(contents, 'videoSecondaryInfoRenderer') + if vsir: + vor = traverse_obj(vsir, ('owner', 'videoOwnerRenderer')) + info.update({ + 'channel': self._get_text(vor, 'title'), + 'channel_follower_count': self._get_count(vor, 'subscriberCountText')}) + + rows = try_get( + vsir, + lambda x: x['metadataRowContainer']['metadataRowContainerRenderer']['rows'], + list) or [] + multiple_songs = False + for row in rows: + if try_get(row, lambda x: x['metadataRowRenderer']['hasDividerLine']) is True: + multiple_songs = True + break + for row in rows: + mrr = row.get('metadataRowRenderer') or {} + mrr_title = mrr.get('title') + if not mrr_title: + continue + mrr_title = self._get_text(mrr, 'title') + mrr_contents_text = self._get_text(mrr, ('contents', 0)) + if mrr_title == 'License': + info['license'] = mrr_contents_text + elif not multiple_songs: + if mrr_title == 'Album': + info['album'] = mrr_contents_text + elif mrr_title == 'Artist': + info['artist'] = mrr_contents_text + elif mrr_title == 'Song': + info['track'] = mrr_contents_text fallbacks = { 'channel': 'uploader', 'channel_id': 'uploader_id', 'channel_url': 'uploader_url', } + + # The upload date for scheduled, live and past live streams / premieres in microformats + # may be different from the stream date. Although not in UTC, we will prefer it in this case. + # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 + upload_date = ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate'))) + if not upload_date or (not info.get('is_live') and not info.get('was_live') and info.get('live_status') != 'is_upcoming'): + upload_date = strftime_or_none(self._extract_time_text(vpir, 'dateText')[0], '%Y%m%d') + info['upload_date'] = upload_date + for to, frm in fallbacks.items(): if not info.get(to): info[to] = info.get(frm) @@ -3565,6 +3751,26 @@ def process_language(container, base_url, lang_code, sub_name, query): class YoutubeTabBaseInfoExtractor(YoutubeBaseInfoExtractor): + @staticmethod + def passthrough_smuggled_data(func): + def _smuggle(entries, smuggled_data): + for entry in entries: + # TODO: Convert URL to music.youtube instead. + # Do we need to passthrough any other smuggled_data? + entry['url'] = smuggle_url(entry['url'], smuggled_data) + yield entry + + @functools.wraps(func) + def wrapper(self, url): + url, smuggled_data = unsmuggle_url(url, {}) + if self.is_music_url(url): + smuggled_data['is_music_url'] = True + info_dict = func(self, url, smuggled_data) + if smuggled_data and info_dict.get('entries'): + info_dict['entries'] = _smuggle(info_dict['entries'], smuggled_data) + return info_dict + return wrapper + def _extract_channel_id(self, webpage): channel_id = self._html_search_meta( 'channelId', webpage, 'channel id', default=None) @@ -3582,7 +3788,7 @@ def _extract_channel_id(self, webpage): def _extract_basic_item_renderer(item): # Modified from _extract_grid_item_renderer known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer' + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer' ) for key, renderer in item.items(): if not isinstance(renderer, dict): @@ -3632,6 +3838,24 @@ def _grid_entries(self, grid_renderer): ep_url, ie=ie.ie_key(), video_id=ie._match_id(ep_url), video_title=title) break + def _music_reponsive_list_entry(self, renderer): + video_id = traverse_obj(renderer, ('playlistItemData', 'videoId')) + if video_id: + return self.url_result(f'https://music.youtube.com/watch?v={video_id}', + ie=YoutubeIE.ie_key(), video_id=video_id) + playlist_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'playlistId')) + if playlist_id: + video_id = traverse_obj(renderer, ('navigationEndpoint', 'watchEndpoint', 'videoId')) + if video_id: + return self.url_result(f'https://music.youtube.com/watch?v={video_id}&list={playlist_id}', + ie=YoutubeTabIE.ie_key(), video_id=playlist_id) + return self.url_result(f'https://music.youtube.com/playlist?list={playlist_id}', + ie=YoutubeTabIE.ie_key(), video_id=playlist_id) + browse_id = traverse_obj(renderer, ('navigationEndpoint', 'browseEndpoint', 'browseId')) + if browse_id: + return self.url_result(f'https://music.youtube.com/browse/{browse_id}', + ie=YoutubeTabIE.ie_key(), video_id=browse_id) + def _shelf_entries_from_content(self, shelf_renderer): content = shelf_renderer.get('content') if not isinstance(content, dict): @@ -3690,6 +3914,13 @@ def _video_entry(self, video_renderer): if video_id: return self._extract_video(video_renderer) + def _hashtag_tile_entry(self, hashtag_tile_renderer): + url = urljoin('https://youtube.com', traverse_obj( + hashtag_tile_renderer, ('onTapCommand', 'commandMetadata', 'webCommandMetadata', 'url'))) + if url: + return self.url_result( + url, ie=YoutubeTabIE.ie_key(), title=self._get_text(hashtag_tile_renderer, 'hashtag')) + def _post_thread_entries(self, post_thread_renderer): post_renderer = try_get( post_thread_renderer, lambda x: x['post']['backstagePostRenderer'], dict) @@ -3746,6 +3977,7 @@ def _rich_grid_entries(self, contents): if entry: yield entry ''' + def _extract_entries(self, parent_renderer, continuation_list): # continuation_list is modified in-place with continuation_list = [continuation_token] continuation_list[:] = [None] @@ -3753,7 +3985,9 @@ def _extract_entries(self, parent_renderer, continuation_list): for content in contents: if not isinstance(content, dict): continue - is_renderer = try_get(content, lambda x: x['itemSectionRenderer'], dict) + is_renderer = traverse_obj( + content, 'itemSectionRenderer', 'musicShelfRenderer', 'musicShelfContinuation', + expected_type=dict) if not is_renderer: renderer = content.get('richItemRenderer') if renderer: @@ -3769,11 +4003,14 @@ def _extract_entries(self, parent_renderer, continuation_list): known_renderers = { 'playlistVideoListRenderer': self._playlist_entries, 'gridRenderer': self._grid_entries, - 'shelfRenderer': lambda x: self._shelf_entries(x), + 'reelShelfRenderer': self._grid_entries, + 'shelfRenderer': self._shelf_entries, + 'musicResponsiveListItemRenderer': lambda x: [self._music_reponsive_list_entry(x)], 'backstagePostThreadRenderer': self._post_thread_entries, 'videoRenderer': lambda x: [self._video_entry(x)], 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), + 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)] } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -3841,6 +4078,7 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): continue known_renderers = { + 'videoRenderer': (self._grid_entries, 'items'), # for membership tab 'gridPlaylistRenderer': (self._grid_entries, 'items'), 'gridVideoRenderer': (self._grid_entries, 'items'), 'gridChannelRenderer': (self._grid_entries, 'items'), @@ -3868,22 +4106,24 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): break @staticmethod - def _extract_selected_tab(tabs): + def _extract_selected_tab(tabs, fatal=True): for tab in tabs: renderer = dict_get(tab, ('tabRenderer', 'expandableTabRenderer')) or {} if renderer.get('selected') is True: return renderer else: - raise ExtractorError('Unable to find selected tab') + if fatal: + raise ExtractorError('Unable to find selected tab') - @classmethod - def _extract_uploader(cls, data): + def _extract_uploader(self, data): uploader = {} - renderer = cls._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} + renderer = self._extract_sidebar_info_renderer(data, 'playlistSidebarSecondaryInfoRenderer') or {} owner = try_get( renderer, lambda x: x['videoOwner']['videoOwnerRenderer']['title']['runs'][0], dict) if owner: - uploader['uploader'] = owner.get('text') + owner_text = owner.get('text') + uploader['uploader'] = self._search_regex( + r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text) uploader['uploader_id'] = try_get( owner, lambda x: x['navigationEndpoint']['browseEndpoint']['browseId'], compat_str) uploader['uploader_url'] = urljoin( @@ -3913,10 +4153,37 @@ def _extract_from_tabs(self, item_id, ytcfg, data, tabs): playlist_id = channel_id tags = renderer.get('keywords', '').split() - thumbnails = ( - self._extract_thumbnails(renderer, 'avatar') - or self._extract_thumbnails( - primary_sidebar_renderer, ('thumbnailRenderer', 'playlistVideoThumbnailRenderer', 'thumbnail'))) + # We can get the uncropped banner/avatar by replacing the crop params with '=s0' + # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 + def _get_uncropped(url): + return url_or_none((url or '').split('=')[0] + '=s0') + + avatar_thumbnails = self._extract_thumbnails(renderer, 'avatar') + if avatar_thumbnails: + uncropped_avatar = _get_uncropped(avatar_thumbnails[0]['url']) + if uncropped_avatar: + avatar_thumbnails.append({ + 'url': uncropped_avatar, + 'id': 'avatar_uncropped', + 'preference': 1 + }) + + channel_banners = self._extract_thumbnails( + data, ('header', ..., ['banner', 'mobileBanner', 'tvBanner'])) + for banner in channel_banners: + banner['preference'] = -10 + + if channel_banners: + uncropped_banner = _get_uncropped(channel_banners[0]['url']) + if uncropped_banner: + channel_banners.append({ + 'url': uncropped_banner, + 'id': 'banner_uncropped', + 'preference': -5 + }) + + primary_thumbnails = self._extract_thumbnails( + primary_sidebar_renderer, ('thumbnailRenderer', ('playlistVideoThumbnailRenderer', 'playlistCustomThumbnailRenderer'), 'thumbnail')) if playlist_id is None: playlist_id = item_id @@ -3935,12 +4202,13 @@ def _extract_from_tabs(self, item_id, ytcfg, data, tabs): 'uploader': channel_name, 'uploader_id': channel_id, 'uploader_url': channel_url, - 'thumbnails': thumbnails, + 'thumbnails': primary_thumbnails + avatar_thumbnails + channel_banners, 'tags': tags, 'view_count': self._get_count(playlist_stats, 1), 'availability': self._extract_availability(data), 'modified_date': strftime_or_none(last_updated_unix, '%Y%m%d'), - 'playlist_count': self._get_count(playlist_stats, 0) + 'playlist_count': self._get_count(playlist_stats, 0), + 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), } if not channel_id: metadata.update(self._extract_uploader(data)) @@ -4085,6 +4353,10 @@ def _reload_with_unavailable_videos(self, item_id, data, ytcfg): check_get_keys='contents', fatal=False, ytcfg=ytcfg, note='Downloading API JSON with unavailable videos') + @property + def skip_webpage(self): + return 'webpage' in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) + def _extract_webpage(self, url, item_id, fatal=True): retries = self.get_param('extractor_retries', 3) count = -1 @@ -4119,7 +4391,7 @@ def _extract_webpage(self, url, item_id, fatal=True): self.report_warning(error_to_compat_str(e)) break - if dict_get(data, ('contents', 'currentVideoEndpoint')): + if dict_get(data, ('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')): break last_error = 'Incomplete yt initial data received' @@ -4131,20 +4403,35 @@ def _extract_webpage(self, url, item_id, fatal=True): return webpage, data + def _report_playlist_authcheck(self, ytcfg, fatal=True): + """Use if failed to extract ytcfg (and data) from initial webpage""" + if not ytcfg and self.is_authenticated: + msg = 'Playlists that require authentication may not extract correctly without a successful webpage download' + if 'authcheck' not in self._configuration_arg('skip', ie_key=YoutubeTabIE.ie_key()) and fatal: + raise ExtractorError( + f'{msg}. If you are not downloading private content, or ' + 'your cookies are only for the first account and channel,' + ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', + expected=True) + self.report_warning(msg, only_once=True) + def _extract_data(self, url, item_id, ytcfg=None, fatal=True, webpage_fatal=False, default_client='web'): data = None - if 'webpage' not in self._configuration_arg('skip'): + if not self.skip_webpage: webpage, data = self._extract_webpage(url, item_id, fatal=webpage_fatal) ytcfg = ytcfg or self.extract_ytcfg(item_id, webpage) - if not data: - if not ytcfg and self.is_authenticated: - msg = 'Playlists that require authentication may not extract correctly without a successful webpage download.' - if 'authcheck' not in self._configuration_arg('skip') and fatal: - raise ExtractorError( - msg + ' If you are not downloading private content, or your cookies are only for the first account and channel,' - ' pass "--extractor-args youtubetab:skip=authcheck" to skip this check', - expected=True) + # Reject webpage data if redirected to home page without explicitly requesting + selected_tab = self._extract_selected_tab(traverse_obj( + data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list, default=[]), fatal=False) or {} + if (url != 'https://www.youtube.com/feed/recommended' + and selected_tab.get('tabIdentifier') == 'FEwhat_to_watch' # Home page + and 'no-youtube-channel-redirect' not in self.get_param('compat_opts', [])): + msg = 'The channel/playlist does not exist and the URL redirected to youtube.com home page' + if fatal: + raise ExtractorError(msg, expected=True) self.report_warning(msg, only_once=True) + if not data: + self._report_playlist_authcheck(ytcfg, fatal=fatal) data = self._extract_tab_endpoint(url, item_id, ytcfg, fatal=fatal, default_client=default_client) return data, ytcfg @@ -4160,39 +4447,44 @@ def _extract_tab_endpoint(self, url, item_id, ytcfg=None, fatal=True, default_cl return self._extract_response( item_id=item_id, query=params, ep=ep, headers=headers, ytcfg=ytcfg, fatal=fatal, default_client=default_client, - check_get_keys=('contents', 'currentVideoEndpoint')) + check_get_keys=('contents', 'currentVideoEndpoint', 'onResponseReceivedActions')) err_note = 'Failed to resolve url (does the playlist exist?)' if fatal: raise ExtractorError(err_note, expected=True) self.report_warning(err_note, item_id) - @staticmethod - def _smuggle_data(entries, data): - for entry in entries: - if data: - entry['url'] = smuggle_url(entry['url'], data) - yield entry - _SEARCH_PARAMS = None - def _search_results(self, query, params=NO_DEFAULT): + def _search_results(self, query, params=NO_DEFAULT, default_client='web'): data = {'query': query} if params is NO_DEFAULT: params = self._SEARCH_PARAMS if params: data['params'] = params + + content_keys = ( + ('contents', 'twoColumnSearchResultsRenderer', 'primaryContents', 'sectionListRenderer', 'contents'), + ('onResponseReceivedCommands', 0, 'appendContinuationItemsAction', 'continuationItems'), + # ytmusic search + ('contents', 'tabbedSearchResultsRenderer', 'tabs', 0, 'tabRenderer', 'content', 'sectionListRenderer', 'contents'), + ('continuationContents', ), + ) + display_id = f'query "{query}"' + check_get_keys = tuple(set(keys[0] for keys in content_keys)) + ytcfg = self._download_ytcfg(default_client, display_id) if not self.skip_webpage else {} + self._report_playlist_authcheck(ytcfg, fatal=False) + continuation_list = [None] + search = None for page_num in itertools.count(1): data.update(continuation_list[0] or {}) + headers = self.generate_api_headers( + ytcfg=ytcfg, visitor_data=self._extract_visitor_data(search), default_client=default_client) search = self._extract_response( - item_id='query "%s" page %s' % (query, page_num), ep='search', query=data, - check_get_keys=('contents', 'onResponseReceivedCommands')) - slr_contents = try_get( - search, - (lambda x: x['contents']['twoColumnSearchResultsRenderer']['primaryContents']['sectionListRenderer']['contents'], - lambda x: x['onResponseReceivedCommands'][0]['appendContinuationItemsAction']['continuationItems']), - list) - yield from self._extract_entries({'contents': slr_contents}, continuation_list) + item_id=f'{display_id} page {page_num}', ep='search', query=data, + default_client=default_client, check_get_keys=check_get_keys, ytcfg=ytcfg, headers=headers) + slr_contents = traverse_obj(search, *content_keys) + yield from self._extract_entries({'contents': list(variadic(slr_contents))}, continuation_list) if not continuation_list[0]: break @@ -4236,6 +4528,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'], 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', 'uploader_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', + 'channel_follower_count': int }, }, { 'note': 'playlists, multipage, different order', @@ -4252,6 +4545,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', 'channel': 'Igor Kleiner', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', + 'channel_follower_count': int }, }, { 'note': 'playlists, series', @@ -4268,6 +4562,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': '3Blue1Brown', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'tags': ['Mathematics'], + 'channel_follower_count': int }, }, { 'note': 'playlists, singlepage', @@ -4284,6 +4579,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 'tags': 'count:13', 'channel': 'ThirstForScience', + 'channel_follower_count': int } }, { 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', @@ -4337,6 +4633,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', + 'channel_follower_count': int }, 'playlist_mincount': 2, }, { @@ -4353,6 +4650,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'uploader_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel': 'lex will', + 'channel_follower_count': int }, 'playlist_mincount': 975, }, { @@ -4369,6 +4667,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'lex will', 'tags': ['bible', 'history', 'prophesy'], 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', + 'channel_follower_count': int }, 'playlist_mincount': 199, }, { @@ -4385,6 +4684,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 17, }, { @@ -4401,6 +4701,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 18, }, { @@ -4417,6 +4718,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], + 'channel_follower_count': int }, 'playlist_mincount': 12, }, { @@ -4434,6 +4736,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': ['Mathematics'], 'channel': '3Blue1Brown', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', + 'channel_follower_count': int }, }, { 'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA', @@ -4593,7 +4896,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ/live', 'info_dict': { - 'id': 'zpsbVPFwsqk', # This will keep changing + 'id': 'GgL890LIznQ', # This will keep changing 'ext': 'mp4', 'title': str, 'uploader': 'Sky News', @@ -4604,17 +4907,18 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'categories': ['News & Politics'], 'tags': list, 'like_count': int, - 'release_timestamp': 1640164857, + 'release_timestamp': 1642502819, 'channel': 'Sky News', 'channel_id': 'UCoMdktPbSTixAyNGwb-UYkQ', 'age_limit': 0, 'view_count': int, - 'thumbnail': 'https://i.ytimg.com/vi/zpsbVPFwsqk/maxresdefault_live.jpg', + 'thumbnail': 'https://i.ytimg.com/vi/GgL890LIznQ/maxresdefault_live.jpg', 'playable_in_embed': True, - 'release_date': '20211222', + 'release_date': '20220118', 'availability': 'public', 'live_status': 'is_live', 'channel_url': 'https://www.youtube.com/channel/UCoMdktPbSTixAyNGwb-UYkQ', + 'channel_follower_count': int }, 'params': { 'skip_download': True, @@ -4796,6 +5100,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'recommended', 'title': 'recommended', + 'tags': [], }, 'playlist_mincount': 50, 'params': { @@ -4816,6 +5121,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', 'uploader_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', + 'channel_follower_count': int }, 'playlist_mincount': 650, 'params': { @@ -4848,6 +5154,28 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'skip_download': True, 'extractor_args': {'youtubetab': {'skip': ['webpage']}} }, + }, { + 'note': 'non-standard redirect to regional channel', + 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', + 'only_matching': True + }, { + 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', + 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'info_dict': { + 'id': 'PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', + 'modified_date': '20220407', + 'channel_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + 'tags': [], + 'uploader_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'uploader': 'pukkandan', + 'availability': 'unlisted', + 'channel_id': 'UCKcqXmCcyqnhgpA5P0oHH_Q', + 'channel': 'pukkandan', + 'description': 'Test for collaborative playlist', + 'title': 'yt-dlp test - collaborative playlist', + 'uploader_url': 'https://www.youtube.com/channel/UCKcqXmCcyqnhgpA5P0oHH_Q', + }, + 'playlist_mincount': 2 }] @classmethod @@ -4855,18 +5183,10 @@ def suitable(cls, url): return False if YoutubeIE.suitable(url) else super( YoutubeTabIE, cls).suitable(url) - def _real_extract(self, url): - url, smuggled_data = unsmuggle_url(url, {}) - if self.is_music_url(url): - smuggled_data['is_music_url'] = True - info_dict = self.__real_extract(url, smuggled_data) - if info_dict.get('entries'): - info_dict['entries'] = self._smuggle_data(info_dict['entries'], smuggled_data) - return info_dict - - _URL_RE = re.compile(rf'(?P
{_VALID_URL})(?(channel_type)(?P/\w+))?(?P.*)$')
+    _URL_RE = re.compile(rf'(?P
{_VALID_URL})(?(not_channel)|(?P/\w+))?(?P.*)$')
 
-    def __real_extract(self, url, smuggled_data):
+    @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data
+    def _real_extract(self, url, smuggled_data):
         item_id = self._match_id(url)
         url = compat_urlparse.urlunparse(
             compat_urlparse.urlparse(url)._replace(netloc='www.youtube.com'))
@@ -4896,6 +5216,7 @@ def get_mobj(url):
                 elif mobj['channel_type'] == 'browse':  # Youtube music /browse/ should be changed to /channel/
                     pre = f'https://www.youtube.com/channel/{item_id}'
 
+        original_tab_name = tab
         if is_channel and not tab and 'no-youtube-channel-redirect' not in compat_opts:
             # Home URLs should redirect to /videos/
             redirect_warning = ('A channel/user page was given. All the channel\'s videos will be downloaded. '
@@ -4927,32 +5248,48 @@ def get_mobj(url):
 
         data, ytcfg = self._extract_data(url, item_id)
 
+        # YouTube may provide a non-standard redirect to the regional channel
+        # See: https://github.com/yt-dlp/yt-dlp/issues/2694
+        redirect_url = traverse_obj(
+            data, ('onResponseReceivedActions', ..., 'navigateAction', 'endpoint', 'commandMetadata', 'webCommandMetadata', 'url'), get_all=False)
+        if redirect_url and 'no-youtube-channel-redirect' not in compat_opts:
+            redirect_url = ''.join((
+                urljoin('https://www.youtube.com', redirect_url), mobj['tab'], mobj['post']))
+            self.to_screen(f'This playlist is likely not available in your region. Following redirect to regional playlist {redirect_url}')
+            return self.url_result(redirect_url, ie=YoutubeTabIE.ie_key())
+
         tabs = traverse_obj(data, ('contents', 'twoColumnBrowseResultsRenderer', 'tabs'), expected_type=list)
         if tabs:
             selected_tab = self._extract_selected_tab(tabs)
-            tab_name = selected_tab.get('title', '')
+            selected_tab_name = selected_tab.get('title', '').lower()
+            if selected_tab_name == 'home':
+                selected_tab_name = 'featured'
+            requested_tab_name = mobj['tab'][1:]
             if 'no-youtube-channel-redirect' not in compat_opts:
-                if mobj['tab'] == '/live':
+                if requested_tab_name == 'live':
                     # Live tab should have redirected to the video
                     raise ExtractorError('The channel is not currently live', expected=True)
-                if mobj['tab'] == '/videos' and tab_name.lower() != mobj['tab'][1:]:
-                    redirect_warning = f'The URL does not have a {mobj["tab"][1:]} tab'
-                    if not mobj['not_channel'] and item_id[:2] == 'UC':
-                        # Topic channels don't have /videos. Use the equivalent playlist instead
-                        pl_id = f'UU{item_id[2:]}'
-                        pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
-                        try:
-                            data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True)
-                        except ExtractorError:
-                            redirect_warning += ' and the playlist redirect gave error'
-                        else:
-                            item_id, url, tab_name = pl_id, pl_url, mobj['tab'][1:]
-                            redirect_warning += f'. Redirecting to playlist {pl_id} instead'
-                    if tab_name.lower() != mobj['tab'][1:]:
-                        redirect_warning += f'. {tab_name} tab is being downloaded instead'
+                if requested_tab_name not in ('', selected_tab_name):
+                    redirect_warning = f'The channel does not have a {requested_tab_name} tab'
+                    if not original_tab_name:
+                        if item_id[:2] == 'UC':
+                            # Topic channels don't have /videos. Use the equivalent playlist instead
+                            pl_id = f'UU{item_id[2:]}'
+                            pl_url = f'https://www.youtube.com/playlist?list={pl_id}'
+                            try:
+                                data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True)
+                            except ExtractorError:
+                                redirect_warning += ' and the playlist redirect gave error'
+                            else:
+                                item_id, url, selected_tab_name = pl_id, pl_url, requested_tab_name
+                                redirect_warning += f'. Redirecting to playlist {pl_id} instead'
+                        if selected_tab_name and selected_tab_name != requested_tab_name:
+                            redirect_warning += f'. {selected_tab_name} tab is being downloaded instead'
+                    else:
+                        raise ExtractorError(redirect_warning, expected=True)
 
         if redirect_warning:
-            self.report_warning(redirect_warning)
+            self.to_screen(redirect_warning)
         self.write_debug(f'Final URL: {url}')
 
         # YouTube sometimes provides a button to reload playlist with unavailable videos.
@@ -5194,7 +5531,14 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
     IE_NAME = 'youtube:search'
     _SEARCH_KEY = 'ytsearch'
     _SEARCH_PARAMS = 'EgIQAQ%3D%3D'  # Videos only
-    _TESTS = []
+    _TESTS = [{
+        'url': 'ytsearch5:youtube-dl test video',
+        'playlist_count': 5,
+        'info_dict': {
+            'id': 'youtube-dl test video',
+            'title': 'youtube-dl test video',
+        }
+    }]
 
 
 class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
@@ -5202,12 +5546,20 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor):
     _SEARCH_KEY = 'ytsearchdate'
     IE_DESC = 'YouTube search, newest videos first'
     _SEARCH_PARAMS = 'CAISAhAB'  # Videos only, sorted by date
+    _TESTS = [{
+        'url': 'ytsearchdate5:youtube-dl test video',
+        'playlist_count': 5,
+        'info_dict': {
+            'id': 'youtube-dl test video',
+            'title': 'youtube-dl test video',
+        }
+    }]
 
 
 class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
     IE_DESC = 'YouTube search URLs with sorting and filter support'
     IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
-    _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
+    _VALID_URL = r'https?://(?:www\.)?youtube\.com/(?:results|search)\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
     _TESTS = [{
         'url': 'https://www.youtube.com/results?baz=bar&search_query=youtube-dl+test+video&filters=video&lclk=video',
         'playlist_mincount': 5,
@@ -5222,7 +5574,17 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor):
             'id': 'python',
             'title': 'python',
         }
-
+    }, {
+        'url': 'https://www.youtube.com/results?search_query=%23cats',
+        'playlist_mincount': 1,
+        'info_dict': {
+            'id': '#cats',
+            'title': '#cats',
+            'entries': [{
+                'url': r're:https://(www\.)?youtube\.com/hashtag/cats',
+                'title': '#cats',
+            }],
+        },
     }, {
         'url': 'https://www.youtube.com/results?q=test&sp=EgQIBBgB',
         'only_matching': True,
@@ -5234,13 +5596,68 @@ def _real_extract(self, url):
         return self.playlist_result(self._search_results(query, qs.get('sp', (None,))[0]), query, query)
 
 
-class YoutubeFeedsInfoExtractor(YoutubeTabIE):
+class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
+    IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
+    IE_NAME = 'youtube:music:search_url'
+    _VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
+    _TESTS = [{
+        'url': 'https://music.youtube.com/search?q=royalty+free+music',
+        'playlist_count': 16,
+        'info_dict': {
+            'id': 'royalty free music',
+            'title': 'royalty free music',
+        }
+    }, {
+        'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D',
+        'playlist_mincount': 30,
+        'info_dict': {
+            'id': 'royalty free music - songs',
+            'title': 'royalty free music - songs',
+        },
+        'params': {'extract_flat': 'in_playlist'}
+    }, {
+        'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists',
+        'playlist_mincount': 30,
+        'info_dict': {
+            'id': 'royalty free music - community playlists',
+            'title': 'royalty free music - community playlists',
+        },
+        'params': {'extract_flat': 'in_playlist'}
+    }]
+
+    _SECTIONS = {
+        'albums': 'EgWKAQIYAWoKEAoQAxAEEAkQBQ==',
+        'artists': 'EgWKAQIgAWoKEAoQAxAEEAkQBQ==',
+        'community playlists': 'EgeKAQQoAEABagoQChADEAQQCRAF',
+        'featured playlists': 'EgeKAQQoADgBagwQAxAJEAQQDhAKEAU==',
+        'songs': 'EgWKAQIIAWoKEAoQAxAEEAkQBQ==',
+        'videos': 'EgWKAQIQAWoKEAoQAxAEEAkQBQ==',
+    }
+
+    def _real_extract(self, url):
+        qs = parse_qs(url)
+        query = (qs.get('search_query') or qs.get('q'))[0]
+        params = qs.get('sp', (None,))[0]
+        if params:
+            section = next((k for k, v in self._SECTIONS.items() if v == params), params)
+        else:
+            section = compat_urllib_parse_unquote_plus((url.split('#') + [''])[1]).lower()
+            params = self._SECTIONS.get(section)
+            if not params:
+                section = None
+        title = join_nonempty(query, section, delim=' - ')
+        return self.playlist_result(self._search_results(query, params, default_client='web_music'), title, title)
+
+
+class YoutubeFeedsInfoExtractor(InfoExtractor):
     """
     Base class for feed extractors
     Subclasses must define the _FEED_NAME property.
     """
     _LOGIN_REQUIRED = True
-    _TESTS = []
+
+    def _real_initialize(self):
+        YoutubeBaseInfoExtractor._check_login_required(self)
 
     @property
     def IE_NAME(self):
@@ -5248,8 +5665,7 @@ def IE_NAME(self):
 
     def _real_extract(self, url):
         return self.url_result(
-            'https://www.youtube.com/feed/%s' % self._FEED_NAME,
-            ie=YoutubeTabIE.ie_key())
+            f'https://www.youtube.com/feed/{self._FEED_NAME}', ie=YoutubeTabIE.ie_key())
 
 
 class YoutubeWatchLaterIE(InfoExtractor):