X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/81ca451480051d7ce1a31c017e005358345a9149..24f3097ea9a470a984d0454dc013cafa2325f5f8:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index a77a626ac..094b1e9a3 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -2,7 +2,7 @@ import calendar import collections import copy -import datetime +import datetime as dt import enum import hashlib import itertools @@ -11,17 +11,18 @@ import os.path import random import re +import shlex import sys import threading import time import traceback -import urllib.error import urllib.parse from .common import InfoExtractor, SearchInfoExtractor from .openload import PhantomJSwrapper from ..compat import functools from ..jsinterp import JSInterpreter +from ..networking.exceptions import HTTPError, network_exceptions from ..utils import ( NO_DEFAULT, ExtractorError, @@ -32,6 +33,7 @@ clean_html, datetime_from_str, dict_get, + filesize_from_tbr, filter_dict, float_or_none, format_field, @@ -41,7 +43,6 @@ join_nonempty, js_to_json, mimetype2ext, - network_exceptions, orderedSet, parse_codecs, parse_count, @@ -55,6 +56,7 @@ str_to_int, strftime_or_none, traverse_obj, + try_call, try_get, unescapeHTML, unified_strdate, @@ -75,9 +77,9 @@ 'client': { 'clientName': 'WEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 1 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 1, }, 'web_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8', @@ -87,7 +89,7 @@ 'clientVersion': '1.20220731.00.00', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 56 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 56, }, 'web_music': { 'INNERTUBE_API_KEY': 'AIzaSyC9XL3ZjWddXya6X74dJoCTL-WEYFDNX30', @@ -96,7 +98,7 @@ 'client': { 'clientName': 'WEB_REMIX', 'clientVersion': '1.20220727.01.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 67, }, @@ -106,7 +108,7 @@ 'client': { 'clientName': 'WEB_CREATOR', 'clientVersion': '1.20220726.00.00', - } + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 62, }, @@ -115,39 +117,39 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID', - 'clientVersion': '17.31.35', + 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 3, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_embedded': { 'INNERTUBE_API_KEY': 'AIzaSyCjc_pVEDi4qsv5MtC2dMXzpIaDoRFLsxw', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_EMBEDDED_PLAYER', - 'clientVersion': '17.31.35', + 'clientVersion': '19.09.37', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.youtube/19.09.37 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 55, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_music': { 'INNERTUBE_API_KEY': 'AIzaSyAOghZGza2MQSZkY_zfZ370N-PUdXEo8AI', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'ANDROID_MUSIC', - 'clientVersion': '5.16.51', + 'clientVersion': '6.42.52', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip' - } + 'userAgent': 'com.google.android.apps.youtube.music/6.42.52 (Linux; U; Android 11) gzip', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 21, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'android_creator': { 'INNERTUBE_API_KEY': 'AIzaSyD_qjV8zaaUMehtLkrKFgVeSX_Iqbtyws8', @@ -156,11 +158,11 @@ 'clientName': 'ANDROID_CREATOR', 'clientVersion': '22.30.100', 'androidSdkVersion': 30, - 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip' + 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 14, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # iOS clients have HLS live streams. Setting device model to get 60fps formats. # See: https://github.com/TeamNewPipe/NewPipeExtractor/issues/680#issuecomment-1002724558 @@ -169,38 +171,38 @@ 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS', - 'clientVersion': '17.33.2', + 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' - } + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', + }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 5, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_embedded': { 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MESSAGES_EXTENSION', - 'clientVersion': '17.33.2', + 'clientVersion': '19.09.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtube/19.09.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 66, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_music': { 'INNERTUBE_API_KEY': 'AIzaSyBAETezhkwP0ZWA02RsqT1zu78Fpt0bC_s', 'INNERTUBE_CONTEXT': { 'client': { 'clientName': 'IOS_MUSIC', - 'clientVersion': '5.21', + 'clientVersion': '6.33.3', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.youtubemusic/6.33.3 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 26, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, 'ios_creator': { 'INNERTUBE_CONTEXT': { @@ -208,11 +210,11 @@ 'clientName': 'IOS_CREATOR', 'clientVersion': '22.33.101', 'deviceModel': 'iPhone14,3', - 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)' + 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)', }, }, 'INNERTUBE_CONTEXT_CLIENT_NAME': 15, - 'REQUIRE_JS_PLAYER': False + 'REQUIRE_JS_PLAYER': False, }, # mweb has 'ultralow' formats # See: https://github.com/yt-dlp/yt-dlp/pull/557 @@ -222,9 +224,9 @@ 'client': { 'clientName': 'MWEB', 'clientVersion': '2.20220801.00.00', - } + }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 2 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 2, }, # This client can access age restricted videos (unless the uploader has disabled the 'allow embedding' option) # See: https://github.com/zerodytrash/YouTube-Internal-Clients @@ -236,7 +238,17 @@ 'clientVersion': '2.0', }, }, - 'INNERTUBE_CONTEXT_CLIENT_NAME': 85 + 'INNERTUBE_CONTEXT_CLIENT_NAME': 85, + }, + # This client has pre-merged video+audio 720p/1080p streams + 'mediaconnect': { + 'INNERTUBE_CONTEXT': { + 'client': { + 'clientName': 'MEDIA_CONNECT_FRONTEND', + 'clientVersion': '0.1', + }, + }, + 'INNERTUBE_CONTEXT_CLIENT_NAME': 95, }, } @@ -258,7 +270,7 @@ def build_innertube_clients(): THIRD_PARTY = { 'embedUrl': 'https://www.youtube.com/', # Can be any valid URL } - BASE_CLIENTS = ('android', 'web', 'tv', 'ios', 'mweb') + BASE_CLIENTS = ('ios', 'android', 'web', 'tv', 'mweb') priority = qualities(BASE_CLIENTS[::-1]) for client, ytcfg in tuple(INNERTUBE_CLIENTS.items()): @@ -429,7 +441,7 @@ class YoutubeBaseInfoExtractor(InfoExtractor): r'(?:www\.)?piped\.adminforge\.de', r'(?:www\.)?watch\.whatevertinfoil\.de', r'(?:www\.)?piped\.qdi\.fi', - r'(?:www\.)?piped\.video', + r'(?:(?:www|cf)\.)?piped\.video', r'(?:www\.)?piped\.aeong\.one', r'(?:www\.)?piped\.moomoo\.me', r'(?:www\.)?piped\.chauvet\.pro', @@ -453,10 +465,13 @@ class YoutubeBaseInfoExtractor(InfoExtractor): 'lt', 'hu', 'nl', 'no', 'uz', 'pl', 'pt-PT', 'pt', 'ro', 'sq', 'sk', 'sl', 'sr-Latn', 'fi', 'sv', 'vi', 'tr', 'be', 'bg', 'ky', 'kk', 'mk', 'mn', 'ru', 'sr', 'uk', 'el', 'hy', 'iw', 'ur', 'ar', 'fa', 'ne', 'mr', 'hi', 'as', 'bn', 'pa', 'gu', 'or', 'ta', 'te', 'kn', 'ml', - 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko' + 'si', 'th', 'lo', 'my', 'ka', 'am', 'km', 'zh-CN', 'zh-TW', 'zh-HK', 'ja', 'ko', ] - _IGNORED_WARNINGS = {'Unavailable videos will be hidden during playback'} + _IGNORED_WARNINGS = { + 'Unavailable videos will be hidden during playback', + 'Unavailable videos are hidden', + } _YT_HANDLE_RE = r'@[\w.-]{3,30}' # https://support.google.com/youtube/answer/11585688?hl=en _YT_CHANNEL_UCID_RE = r'UC[\w-]{22}' @@ -497,16 +512,10 @@ def _initialize_consent(self): cookies = self._get_cookies('https://www.youtube.com/') if cookies.get('__Secure-3PSID'): return - consent_id = None - consent = cookies.get('CONSENT') - if consent: - if 'YES' in consent.value: - return - consent_id = self._search_regex( - r'PENDING\+(\d+)', consent.value, 'consent', default=None) - if not consent_id: - consent_id = random.randint(100, 999) - self._set_cookie('.youtube.com', 'CONSENT', 'YES+cb.20210328-17-p0.en+FX+%s' % consent_id) + socs = cookies.get('SOCS') + if socs and not socs.value.startswith('CAA'): # not consented + return + self._set_cookie('.youtube.com', 'SOCS', 'CAI', secure=True) # accept all (required for mixes) def _initialize_pref(self): cookies = self._get_cookies('https://www.youtube.com/') @@ -692,7 +701,7 @@ def generate_api_headers( 'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg), 'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg), 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg), - 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client) + 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client), } if session_index is None: session_index = self._extract_session_index(ytcfg) @@ -709,7 +718,7 @@ def _download_ytcfg(self, client, video_id): url = { 'web': 'https://www.youtube.com', 'web_music': 'https://music.youtube.com', - 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1' + 'web_embedded': f'https://www.youtube.com/embed/{video_id}?html5=1', }.get(client) if not url: return {} @@ -720,7 +729,7 @@ def _download_ytcfg(self, client, video_id): @staticmethod def _build_api_continuation_query(continuation, ctp=None): query = { - 'continuation': continuation + 'continuation': continuation, } # TODO: Inconsistency with clickTrackingParams. # Currently we have a fixed ctp contained within context (from ytcfg) @@ -760,7 +769,7 @@ def _extract_continuation(cls, renderer): return traverse_obj(renderer, ( ('contents', 'items', 'rows'), ..., 'continuationItemRenderer', - ('continuationEndpoint', ('button', 'buttonRenderer', 'command')) + ('continuationEndpoint', ('button', 'buttonRenderer', 'command')), ), get_all=False, expected_type=cls._extract_continuation_ep_data) @classmethod @@ -787,7 +796,7 @@ def _report_alerts(self, alerts, expected=True, fatal=True, only_once=False): for alert_type, alert_message in (warnings + errors[:-1]): self.report_warning(f'YouTube said: {alert_type} - {alert_message}', only_once=only_once) if errors: - raise ExtractorError('YouTube said: %s' % errors[-1][1], expected=expected) + raise ExtractorError(f'YouTube said: {errors[-1][1]}', expected=expected) def _extract_and_report_alerts(self, data, *args, **kwargs): return self._report_alerts(self._extract_alerts(data), *args, **kwargs) @@ -879,14 +888,14 @@ def _get_count(self, data, *path_list): return count @staticmethod - def _extract_thumbnails(data, *path_list): + def _extract_thumbnails(data, *path_list, final_key='thumbnails'): """ Extract thumbnails from thumbnails dict @param path_list: path list to level that contains 'thumbnails' key """ thumbnails = [] for path in path_list or [()]: - for thumbnail in traverse_obj(data, (*variadic(path), 'thumbnails', ...)): + for thumbnail in traverse_obj(data, (*variadic(path), final_key, ...)): thumbnail_url = url_or_none(thumbnail.get('url')) if not thumbnail_url: continue @@ -909,7 +918,7 @@ def extract_relative_time(relative_time_text): e.g. 'streamed 6 days ago', '5 seconds ago (edited)', 'updated today', '8 yr ago' """ - # XXX: this could be moved to a general function in utils.py + # XXX: this could be moved to a general function in utils/_utils.py # The relative time text strings are roughly the same as what # Javascript's Intl.RelativeTimeFormat function generates. # See: https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Intl/RelativeTimeFormat @@ -921,17 +930,17 @@ def extract_relative_time(relative_time_text): if start: return datetime_from_str(start) try: - return datetime_from_str('now-%s%s' % (mobj.group('time'), mobj.group('unit'))) + return datetime_from_str('now-{}{}'.format(mobj.group('time'), mobj.group('unit'))) except ValueError: return None def _parse_time_text(self, text): if not text: return - dt = self.extract_relative_time(text) + dt_ = self.extract_relative_time(text) timestamp = None - if isinstance(dt, datetime.datetime): - timestamp = calendar.timegm(dt.timetuple()) + if isinstance(dt_, dt.datetime): + timestamp = calendar.timegm(dt_.timetuple()) if timestamp is None: timestamp = ( @@ -948,7 +957,16 @@ def _parse_time_text(self, text): def _extract_response(self, item_id, query, note='Downloading API JSON', headers=None, ytcfg=None, check_get_keys=None, ep='browse', fatal=True, api_hostname=None, default_client='web'): - for retry in self.RetryManager(): + raise_for_incomplete = bool(self._configuration_arg('raise_incomplete_data', ie_key=YoutubeIE)) + # Incomplete Data should be a warning by default when retries are exhausted, while other errors should be fatal. + icd_retries = iter(self.RetryManager(fatal=raise_for_incomplete)) + icd_rm = next(icd_retries) + main_retries = iter(self.RetryManager()) + main_rm = next(main_retries) + # Manual retry loop for multiple RetryManagers + # The proper RetryManager MUST be advanced after an error + # and its result MUST be checked if the manager is non fatal + while True: try: response = self._call_api( ep=ep, fatal=True, headers=headers, @@ -959,40 +977,46 @@ def _extract_response(self, item_id, query, note='Downloading API JSON', headers except ExtractorError as e: if not isinstance(e.cause, network_exceptions): return self._error_or_warning(e, fatal=fatal) - elif not isinstance(e.cause, urllib.error.HTTPError): - retry.error = e + elif not isinstance(e.cause, HTTPError): + main_rm.error = e + next(main_retries) continue - first_bytes = e.cause.read(512) + first_bytes = e.cause.response.read(512) if not is_html(first_bytes): yt_error = try_get( self._parse_json( - self._webpage_read_content(e.cause, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), + self._webpage_read_content(e.cause.response, None, item_id, prefix=first_bytes) or '{}', item_id, fatal=False), lambda x: x['error']['message'], str) if yt_error: self._report_alerts([('ERROR', yt_error)], fatal=False) # Downloading page may result in intermittent 5xx HTTP error - # Sometimes a 404 is also recieved. See: https://github.com/ytdl-org/youtube-dl/issues/28289 + # Sometimes a 404 is also received. See: https://github.com/ytdl-org/youtube-dl/issues/28289 # We also want to catch all other network exceptions since errors in later pages can be troublesome # See https://github.com/yt-dlp/yt-dlp/issues/507#issuecomment-880188210 - if e.cause.code not in (403, 429): - retry.error = e + if e.cause.status not in (403, 429): + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) try: self._extract_and_report_alerts(response, only_once=True) except ExtractorError as e: - # YouTube servers may return errors we want to retry on in a 200 OK response + # YouTube's servers may return errors we want to retry on in a 200 OK response # See: https://github.com/yt-dlp/yt-dlp/issues/839 if 'unknown error' in e.msg.lower(): - retry.error = e + main_rm.error = e + next(main_retries) continue return self._error_or_warning(e, fatal=fatal) # Youtube sometimes sends incomplete data # See: https://github.com/ytdl-org/youtube-dl/issues/28194 if not traverse_obj(response, *variadic(check_get_keys)): - retry.error = ExtractorError('Incomplete data received', expected=True) + icd_rm.error = ExtractorError('Incomplete data received', expected=True) + should_retry = next(icd_retries, None) + if not should_retry: + return None continue return response @@ -1093,13 +1117,13 @@ def _extract_video(self, renderer): is_unlisted=self._has_badge(badges, BadgeType.AVAILABILITY_UNLISTED) or None), view_count_field: view_count, 'live_status': live_status, - 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None + 'channel_is_verified': True if self._has_badge(owner_badges, BadgeType.VERIFIED) else None, } class YoutubeIE(YoutubeBaseInfoExtractor): IE_DESC = 'YouTube' - _VALID_URL = r"""(?x)^ + _VALID_URL = r'''(?x)^ ( (?:https?://|//) # http(s):// or protocol-independent URL (?:(?:(?:(?:\w+\.)?[yY][oO][uU][tT][uU][bB][eE](?:-nocookie|kids)?\.com| @@ -1108,7 +1132,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): (?:www\.)?hooktube\.com| (?:www\.)?yourepeat\.com| tube\.majestyc\.net| - %(invidious)s| + {invidious}| youtube\.googleapis\.com)/ # the various hostnames, with wildcard subdomains (?:.*?\#/)? # handle anchor (#/) redirect urls (?: # the various things that can precede the ID: @@ -1124,16 +1148,16 @@ class YoutubeIE(YoutubeBaseInfoExtractor): youtu\.be| # just youtu.be/xxxx vid\.plus| # or vid.plus/xxxx zwearz\.com/watch| # or zwearz.com/watch/xxxx - %(invidious)s + {invidious} )/ |(?:www\.)?cleanvideosearch\.com/media/action/yt/watch\?videoId= ) )? # all until now is optional -> you can pass the naked ID - (?P[0-9A-Za-z_-]{11}) # here is it! the YouTube video ID + (?P[0-9A-Za-z_-]{{11}}) # here is it! the YouTube video ID (?(1).+)? # if we found the ID, everything can follow - (?:\#|$)""" % { - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?:\#|$)'''.format( + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) _EMBED_REGEX = [ r'''(?x) (?: @@ -1160,7 +1184,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): r'/(?P[a-zA-Z0-9_-]{8,})/player(?:_ias\.vflset(?:/[a-zA-Z]{2,3}_[a-zA-Z]{2,3})?|-plasma-ias-(?:phone|tablet)-[a-z]{2}_[A-Z]{2}\.vflset)/base\.js$', r'\b(?Pvfl[a-zA-Z0-9_-]+)\b.*?\.js$', ) - _formats = { + _formats = { # NB: Used in YoutubeWebArchiveIE and GoogleDriveIE '5': {'ext': 'flv', 'width': 400, 'height': 240, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '6': {'ext': 'flv', 'width': 450, 'height': 270, 'acodec': 'mp3', 'abr': 64, 'vcodec': 'h263'}, '13': {'ext': '3gp', 'acodec': 'aac', 'vcodec': 'mp4v'}, @@ -1304,7 +1328,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', - } + 'timestamp': 1349198244, + }, }, { 'url': '//www.YouTube.com/watch?v=yZIXLfi8CZQ', @@ -1347,6 +1372,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader_id': '@PhilippHagemeister', 'heatmap': 'count:100', + 'timestamp': 1349198244, }, 'params': { 'skip_download': True, @@ -1360,7 +1386,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'ext': 'm4a', 'upload_date': '20121002', 'description': '', - 'title': 'UHDTV TEST 8K VIDEO.mp4' + 'title': 'UHDTV TEST 8K VIDEO.mp4', }, 'params': { 'youtube_include_dash_manifest': True, @@ -1433,6 +1459,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1401991663, }, }, { @@ -1492,6 +1519,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', 'uploader_id': '@ProjektMelody', + 'timestamp': 1577508724, }, }, { @@ -1566,7 +1594,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, 'expected_warnings': [ 'DASH manifest missing', - ] + ], }, # Olympics (https://github.com/ytdl-org/youtube-dl/issues/4431) { @@ -1597,10 +1625,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@Olympics', 'uploader_id': '@Olympics', 'channel_is_verified': True, + 'timestamp': 1440707674, }, 'params': { 'skip_download': 'requires avconv', - } + }, }, # Non-square pixels { @@ -1630,6 +1659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': '孫ᄋᄅ', 'uploader_url': 'https://www.youtube.com/@AllenMeow', 'uploader_id': '@AllenMeow', + 'timestamp': 1299776999, }, }, # url_encoded_fmt_stream_map is empty string @@ -1773,6 +1803,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, }], 'params': {'skip_download': True}, + 'skip': 'Not multifeed anymore', }, { # Multifeed video with comma in title (see https://github.com/ytdl-org/youtube-dl/issues/8536) @@ -1822,7 +1853,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'playable_in_embed': True, 'like_count': int, 'age_limit': 0, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -1881,6 +1912,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'The Berkman Klein Center for Internet & Society', 'uploader_id': '@BKCHarvard', 'uploader_url': 'https://www.youtube.com/@BKCHarvard', + 'timestamp': 1422422076, }, 'params': { 'skip_download': True, @@ -1916,6 +1948,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@BernieSanders', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1447987198, }, 'params': { 'skip_download': True, @@ -1979,6 +2012,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Vsauce', 'comment_count': int, 'channel_is_verified': True, + 'timestamp': 1484761047, }, 'params': { 'skip_download': True, @@ -2060,11 +2094,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'title': 'Voyeur Girl', 'description': 'md5:7ae382a65843d6df2685993e90a8628f', 'upload_date': '20190312', - 'artist': 'Stephen', + 'artists': ['Stephen'], + 'creators': ['Stephen'], 'track': 'Voyeur Girl', 'album': 'it\'s too much love to know my dear', 'release_date': '20190313', - 'release_year': 2019, 'alt_title': 'Voyeur Girl', 'view_count': int, 'playable_in_embed': True, @@ -2074,14 +2108,13 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'channel': 'Stephen', # TODO: should be "Stephen - Topic" 'uploader': 'Stephen', 'availability': 'public', - 'creator': 'Stephen', 'duration': 169, 'thumbnail': 'https://i.ytimg.com/vi_webp/MgNrAu2pzNs/maxresdefault.webp', 'age_limit': 0, 'channel_id': 'UC-pWHpBjdGG69N9mM2auIAA', 'tags': 'count:11', 'live_status': 'not_live', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'skip_download': True, @@ -2135,6 +2168,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'l\'Or Vert asbl', 'uploader_url': 'https://www.youtube.com/@ElevageOrVert', 'uploader_id': '@ElevageOrVert', + 'timestamp': 1497343210, }, 'params': { 'skip_download': True, @@ -2173,6 +2207,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@Csharp-video-tutorialsBlogspot', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1377976349, }, 'params': { 'skip_download': True, @@ -2255,7 +2290,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@CBSMornings', 'comment_count': int, 'channel_is_verified': True, - } + 'timestamp': 1405513526, + }, }, { # restricted location, https://github.com/ytdl-org/youtube-dl/issues/28685 @@ -2272,7 +2308,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'view_count': int, 'channel': 'Walk around Japan', 'tags': ['Ueno Tokyo', 'Okachimachi Tokyo', 'Ameyoko Street', 'Tokyo attraction', 'Travel in Tokyo'], - 'thumbnail': 'https://i.ytimg.com/vi_webp/cBvYw8_A0vQ/hqdefault.webp', + 'thumbnail': 'https://i.ytimg.com/vi/cBvYw8_A0vQ/hqdefault.jpg', 'age_limit': 0, 'availability': 'public', 'channel_url': 'https://www.youtube.com/channel/UC3o_t8PzBmXf5S9b7GLx1Mw', @@ -2282,6 +2318,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Walk around Japan', 'uploader_url': 'https://www.youtube.com/@walkaroundjapan7124', 'uploader_id': '@walkaroundjapan7124', + 'timestamp': 1605884416, }, 'params': { 'skip_download': True, @@ -2289,11 +2326,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): }, { # Has multiple audio streams 'url': 'WaOKSUlf4TM', - 'only_matching': True + 'only_matching': True, }, { # Requires Premium: has format 141 when requested using YTM url 'url': 'https://music.youtube.com/watch?v=XclachpHxis', - 'only_matching': True + 'only_matching': True, }, { # multiple subtitles with same lang_code 'url': 'https://www.youtube.com/watch?v=wsQiKKfKxug', @@ -2333,6 +2370,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'format': '17', # 3gp format available on android 'extractor_args': {'youtube': {'player_client': ['android']}}, }, + 'skip': 'android client broken', }, { # Skip download of additional client configs (remix client config in this case) @@ -2376,7 +2414,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', - }, 'params': {'format': 'mhtml', 'skip_download': True} + 'timestamp': 1395685455, + }, 'params': {'format': 'mhtml', 'skip_download': True}, }, { # Ensure video upload_date is in UTC timezone (video was uploaded 1641170939) 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', @@ -2405,37 +2444,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@LeonNguyen', 'uploader_id': '@LeonNguyen', 'heatmap': 'count:100', - } - }, { - # Same video as above, but with --compat-opt no-youtube-prefer-utc-upload-date - 'url': 'https://www.youtube.com/watch?v=2NUZ8W2llS4', - 'info_dict': { - 'id': '2NUZ8W2llS4', - 'ext': 'mp4', - 'title': 'The NP that test your phone performance 🙂', - 'description': 'md5:144494b24d4f9dfacb97c1bbef5de84d', - 'channel_id': 'UCRqNBSOHgilHfAczlUmlWHA', - 'channel_url': 'https://www.youtube.com/channel/UCRqNBSOHgilHfAczlUmlWHA', - 'duration': 21, - 'view_count': int, - 'age_limit': 0, - 'categories': ['Gaming'], - 'tags': 'count:23', - 'playable_in_embed': True, - 'live_status': 'not_live', - 'upload_date': '20220102', - 'like_count': int, - 'availability': 'public', - 'channel': 'Leon Nguyen', - 'thumbnail': 'https://i.ytimg.com/vi_webp/2NUZ8W2llS4/maxresdefault.webp', - 'comment_count': int, - 'channel_follower_count': int, - 'uploader': 'Leon Nguyen', - 'uploader_url': 'https://www.youtube.com/@LeonNguyen', - 'uploader_id': '@LeonNguyen', - 'heatmap': 'count:100', + 'timestamp': 1641170939, }, - 'params': {'compat_opts': ['no-youtube-prefer-utc-upload-date']} }, { # date text is premiered video, ensure upload date in UTC (published 1641172509) 'url': 'https://www.youtube.com/watch?v=mzZzzBU6lrM', @@ -2467,61 +2477,41 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'comment_count': int, 'channel_is_verified': True, 'heatmap': 'count:100', - } + 'timestamp': 1641172509, + }, }, - { # continuous livestream. Microformat upload date should be preferred. - # Upload date was 2021-06-19 (not UTC), while stream start is 2021-11-27 - 'url': 'https://www.youtube.com/watch?v=kgx4WGK0oNU', + { # continuous livestream. + # Upload date was 2022-07-12T05:12:29-07:00, while stream start is 2022-07-12T15:59:30+00:00 + 'url': 'https://www.youtube.com/watch?v=jfKfPfyJRdk', 'info_dict': { - 'id': 'kgx4WGK0oNU', - 'title': r're:jazz\/lofi hip hop radio🌱chill beats to relax\/study to \[LIVE 24\/7\] \d{4}-\d{2}-\d{2} \d{2}:\d{2}', + 'id': 'jfKfPfyJRdk', 'ext': 'mp4', - 'channel_id': 'UC84whx2xxsiA1gXHXXqKGOA', - 'availability': 'public', - 'age_limit': 0, - 'release_timestamp': 1637975704, - 'upload_date': '20210619', - 'channel_url': 'https://www.youtube.com/channel/UC84whx2xxsiA1gXHXXqKGOA', - 'live_status': 'is_live', - 'thumbnail': 'https://i.ytimg.com/vi/kgx4WGK0oNU/maxresdefault.jpg', - 'channel': 'Abao in Tokyo', - 'channel_follower_count': int, - 'release_date': '20211127', - 'tags': 'count:39', - 'categories': ['People & Blogs'], + 'channel_id': 'UCSJ4gkVC6NrvII8umztf0Ow', 'like_count': int, - 'view_count': int, - 'playable_in_embed': True, - 'description': 'md5:2ef1d002cad520f65825346e2084e49d', + 'uploader': 'Lofi Girl', + 'categories': ['Music'], 'concurrent_view_count': int, - 'uploader': 'Abao in Tokyo', - 'uploader_url': 'https://www.youtube.com/@abaointokyo', - 'uploader_id': '@abaointokyo', - }, - 'params': {'skip_download': True} - }, { - # Story. Requires specific player params to work. - 'url': 'https://www.youtube.com/watch?v=vv8qTUWmulI', - 'info_dict': { - 'id': 'vv8qTUWmulI', - 'ext': 'mp4', - 'availability': 'unlisted', - 'view_count': int, - 'channel_id': 'UCzIZ8HrzDgc-pNQDUG6avBA', - 'upload_date': '20220526', - 'categories': ['Education'], - 'title': 'Story', - 'channel': 'IT\'S HISTORY', - 'description': '', - 'duration': 12, 'playable_in_embed': True, + 'timestamp': 1657627949, + 'release_date': '20220712', + 'channel_url': 'https://www.youtube.com/channel/UCSJ4gkVC6NrvII8umztf0Ow', + 'description': 'md5:13a6f76df898f5674f9127139f3df6f7', 'age_limit': 0, - 'live_status': 'not_live', - 'tags': [], - 'thumbnail': 'https://i.ytimg.com/vi_webp/vv8qTUWmulI/maxresdefault.webp', - 'channel_url': 'https://www.youtube.com/channel/UCzIZ8HrzDgc-pNQDUG6avBA', + 'thumbnail': 'https://i.ytimg.com/vi/jfKfPfyJRdk/maxresdefault.jpg', + 'release_timestamp': 1657641570, + 'uploader_url': 'https://www.youtube.com/@LofiGirl', + 'channel_follower_count': int, + 'channel_is_verified': True, + 'title': r're:^lofi hip hop radio 📚 - beats to relax/study to', + 'view_count': int, + 'live_status': 'is_live', + 'tags': 'count:32', + 'channel': 'Lofi Girl', + 'availability': 'public', + 'upload_date': '20220712', + 'uploader_id': '@LofiGirl', }, - 'skip': 'stories get removed after some period of time', + 'params': {'skip_download': True}, }, { 'url': 'https://www.youtube.com/watch?v=tjjjtzRLHvA', 'info_dict': { @@ -2547,7 +2537,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@lesmiscore', 'uploader': 'Lesmiscore', 'uploader_url': 'https://www.youtube.com/@lesmiscore', - } + 'timestamp': 1648005313, + }, }, { # Prefer primary title+description language metadata by default # Do not prefer translated description if primary is empty @@ -2574,8 +2565,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1662677394, }, - 'params': {'skip_download': True} + 'params': {'skip_download': True}, }, { # Extractor argument: prefer translated title+description 'url': 'https://www.youtube.com/watch?v=gHKT4uU8Zng', @@ -2587,7 +2579,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'duration': 5, 'live_status': 'not_live', 'channel_id': 'UCiu-3thuViMebBjw_5nWYrA', - 'upload_date': '20220728', + 'upload_date': '20220729', 'view_count': int, 'categories': ['People & Blogs'], 'thumbnail': r're:^https?://.*\.jpg', @@ -2600,6 +2592,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@coletdjnz', 'uploader_id': '@coletdjnz', 'uploader': 'cole-dlp-test-acc', + 'timestamp': 1659073275, + 'like_count': int, }, 'params': {'skip_download': True, 'extractor_args': {'youtube': {'lang': ['fr']}}}, 'expected_warnings': [r'Preferring "fr" translated fields'], @@ -2665,6 +2659,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader': 'Projekt Melody', 'uploader_id': '@ProjektMelody', 'uploader_url': 'https://www.youtube.com/@ProjektMelody', + 'timestamp': 1577508724, }, 'params': {'extractor_args': {'youtube': {'player_client': ['tv_embedded']}}, 'format': '251-drc'}, }, @@ -2699,6 +2694,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_id': '@sana_natori', 'channel_is_verified': True, 'heatmap': 'count:100', + 'timestamp': 1671798112, }, }, { @@ -2733,7 +2729,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'heatmap': 'count:100', }, 'params': { - 'extractor_args': {'youtube': {'player_client': ['android'], 'player_skip': ['webpage']}}, + 'extractor_args': {'youtube': {'player_client': ['ios'], 'player_skip': ['webpage']}}, }, }, ] @@ -2768,10 +2764,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@ChristopherSykesDocumentaries', 'uploader_id': '@ChristopherSykesDocumentaries', 'heatmap': 'count:100', + 'timestamp': 1211825920, }, 'params': { 'skip_download': True, - } + }, }, ] @@ -2860,7 +2857,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): # Obtain from MPD's maximum seq value old_mpd_url = mpd_url last_error = ctx.pop('last_error', None) - expire_fast = immediate or last_error and isinstance(last_error, urllib.error.HTTPError) and last_error.code == 403 + expire_fast = immediate or last_error and isinstance(last_error, HTTPError) and last_error.status == 403 mpd_url, stream_number, is_live = (mpd_feed(format_id, 5 if expire_fast else 18000) or (mpd_url, stream_number, False)) if not refresh_sequence: @@ -2928,7 +2925,7 @@ def _extract_sequence_from_mpd(refresh_sequence, immediate): if not should_continue: known_idx = idx - 1 raise ExtractorError('breaking out of outer loop') - last_segment_url = urljoin(fragment_base_url, 'sq/%d' % idx) + last_segment_url = urljoin(fragment_base_url, f'sq/{idx}') yield { 'url': last_segment_url, 'fragment_count': last_seq, @@ -2977,7 +2974,7 @@ def _extract_player_info(cls, player_url): if id_m: break else: - raise ExtractorError('Cannot identify player %r' % player_url) + raise ExtractorError(f'Cannot identify player {player_url!r}') return id_m.group('id') def _load_player(self, video_id, player_url, fatal=True): @@ -2986,7 +2983,7 @@ def _load_player(self, video_id, player_url, fatal=True): code = self._download_webpage( player_url, video_id, fatal=fatal, note='Downloading player ' + player_id, - errnote='Download of %s failed' % player_url) + errnote=f'Download of {player_url} failed') if code: self._code_cache[player_id] = code return self._code_cache.get(player_id) @@ -3047,10 +3044,9 @@ def _genslice(start, end, step): cache_res = func(test_string) cache_spec = [ord(c) for c in cache_res] expr_code = ' + '.join(gen_sig_code(cache_spec)) - signature_id_tuple = '(%s)' % ( - ', '.join(str(len(p)) for p in example_sig.split('.'))) - code = ('if tuple(len(p) for p in s.split(\'.\')) == %s:\n' - ' return %s\n') % (signature_id_tuple, expr_code) + signature_id_tuple = '({})'.format(', '.join(str(len(p)) for p in example_sig.split('.'))) + code = (f'if tuple(len(p) for p in s.split(\'.\')) == {signature_id_tuple}:\n' + f' return {expr_code}\n') self.to_screen('Extracted signature function:\n' + code) def _parse_sig_js(self, jscode): @@ -3140,7 +3136,7 @@ def _extract_n_function_name(self, jscode): return funcname return json.loads(js_to_json(self._search_regex( - rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode, + rf'var {re.escape(funcname)}\s*=\s*(\[.+?\])\s*[,;]', jscode, f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)] def _extract_n_function_code(self, video_id, player_url): @@ -3156,9 +3152,9 @@ def _extract_n_function_code(self, video_id, player_url): # For redundancy func_code = self._search_regex( - r'''(?xs)%s\s*=\s*function\s*\((?P[\w$]+)\)\s* + rf'''(?xs){func_name}\s*=\s*function\s*\((?P[\w$]+)\)\s* # NB: The end of the regex is intentionally kept strict - {(?P.+?}\s*return\ [\w$]+.join\(""\))};''' % func_name, + {{(?P.+?}}\s*return\ [\w$]+.join\(""\))}};''', jscode, 'nsig function', group=('var', 'code'), default=None) if func_code: func_code = ([func_code[0]], func_code[1]) @@ -3224,7 +3220,7 @@ def _mark_watched(self, video_id, player_responses): # cpn generation algorithm is reverse engineered from base.js. # In fact it works even with dummy cpn. CPN_ALPHABET = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_' - cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(0, 16)) + cpn = ''.join(CPN_ALPHABET[random.randint(0, 256) & 63] for _ in range(16)) # # more consistent results setting it to right before the end video_length = [str(float((qs.get('len') or ['1.5'])[0]) - 1)] @@ -3261,7 +3257,7 @@ def _extract_from_webpage(cls, url, webpage): webpage) if mobj: yield cls.url_result(mobj.group('url'), cls) - raise cls.StopExtraction() + raise cls.StopExtraction yield from super()._extract_from_webpage(url, webpage) @@ -3286,7 +3282,7 @@ def _extract_chapters_from_json(self, data, duration): chapter_list = traverse_obj( data, ( 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', - 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters' + 'decoratedPlayerBarRenderer', 'playerBar', 'chapteredPlayerBarRenderer', 'chapters', ), expected_type=list) return self._extract_chapters_helper( @@ -3310,18 +3306,46 @@ def _extract_chapters_from_engagement_panel(self, data, duration): chapter_time, chapter_title, duration) for contents in content_list)), []) - def _extract_heatmap_from_player_overlay(self, data): - content_list = traverse_obj(data, ( - 'playerOverlays', 'playerOverlayRenderer', 'decoratedPlayerBarRenderer', 'decoratedPlayerBarRenderer', 'playerBar', - 'multiMarkersPlayerBarRenderer', 'markersMap', ..., 'value', 'heatmap', 'heatmapRenderer', 'heatMarkers', {list})) - return next(filter(None, ( - traverse_obj(contents, (..., 'heatMarkerRenderer', { - 'start_time': ('timeRangeStartMillis', {functools.partial(float_or_none, scale=1000)}), - 'end_time': {lambda x: (x['timeRangeStartMillis'] + x['markerDurationMillis']) / 1000}, - 'value': ('heatMarkerIntensityScoreNormalized', {float_or_none}), - })) for contents in content_list)), None) + def _extract_heatmap(self, data): + return traverse_obj(data, ( + 'frameworkUpdates', 'entityBatchUpdate', 'mutations', + lambda _, v: v['payload']['macroMarkersListEntity']['markersList']['markerType'] == 'MARKER_TYPE_HEATMAP', + 'payload', 'macroMarkersListEntity', 'markersList', 'markers', ..., { + 'start_time': ('startMillis', {functools.partial(float_or_none, scale=1000)}), + 'end_time': {lambda x: (int(x['startMillis']) + int(x['durationMillis'])) / 1000}, + 'value': ('intensityScoreNormalized', {float_or_none}), + })) or None + + def _extract_comment(self, entities, parent=None): + comment_entity_payload = get_first(entities, ('payload', 'commentEntityPayload', {dict})) + if not (comment_id := traverse_obj(comment_entity_payload, ('properties', 'commentId', {str}))): + return - def _extract_comment(self, comment_renderer, parent=None): + toolbar_entity_payload = get_first(entities, ('payload', 'engagementToolbarStateEntityPayload', {dict})) + time_text = traverse_obj(comment_entity_payload, ('properties', 'publishedTime', {str})) or '' + + return { + 'id': comment_id, + 'parent': parent or 'root', + **traverse_obj(comment_entity_payload, { + 'text': ('properties', 'content', 'content', {str}), + 'like_count': ('toolbar', 'likeCountA11y', {parse_count}), + 'author_id': ('author', 'channelId', {self.ucid_or_none}), + 'author': ('author', 'displayName', {str}), + 'author_thumbnail': ('author', 'avatarThumbnailUrl', {url_or_none}), + 'author_is_uploader': ('author', 'isCreator', {bool}), + 'author_is_verified': ('author', 'isVerified', {bool}), + 'author_url': ('author', 'channelCommand', 'innertubeCommand', ( + ('browseEndpoint', 'canonicalBaseUrl'), ('commandMetadata', 'webCommandMetadata', 'url'), + ), {lambda x: urljoin('https://www.youtube.com', x)}), + }, get_all=False), + 'is_favorited': (None if toolbar_entity_payload is None else + toolbar_entity_payload.get('heartState') == 'TOOLBAR_HEART_STATE_HEARTED'), + '_time_text': time_text, # FIXME: non-standard, but we need a way of showing that it is an estimate. + 'timestamp': self._parse_time_text(time_text), + } + + def _extract_comment_old(self, comment_renderer, parent=None): comment_id = comment_renderer.get('commentId') if not comment_id: return @@ -3356,7 +3380,7 @@ def _extract_comment(self, comment_renderer, parent=None): info['author_is_uploader'] = author_is_uploader comment_abr = traverse_obj( - comment_renderer, ('actionsButtons', 'commentActionButtonsRenderer'), expected_type=dict) + comment_renderer, ('actionButtons', 'commentActionButtonsRenderer'), expected_type=dict) if comment_abr is not None: info['is_favorited'] = 'creatorHeart' in comment_abr @@ -3398,25 +3422,43 @@ def extract_header(contents): sort_text = str_or_none(sort_menu_item.get('title')) if not sort_text: sort_text = 'top comments' if comment_sort_index == 0 else 'newest first' - self.to_screen('Sorting comments by %s' % sort_text.lower()) + self.to_screen(f'Sorting comments by {sort_text.lower()}') break return _continuation - def extract_thread(contents): + def extract_thread(contents, entity_payloads): if not parent: tracker['current_page_thread'] = 0 for content in contents: if not parent and tracker['total_parent_comments'] >= max_parents: yield comment_thread_renderer = try_get(content, lambda x: x['commentThreadRenderer']) - comment_renderer = get_first( - (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], - expected_type=dict, default={}) - comment = self._extract_comment(comment_renderer, parent) + # old comment format + if not entity_payloads: + comment_renderer = get_first( + (comment_thread_renderer, content), [['commentRenderer', ('comment', 'commentRenderer')]], + expected_type=dict, default={}) + + comment = self._extract_comment_old(comment_renderer, parent) + + # new comment format + else: + view_model = ( + traverse_obj(comment_thread_renderer, ('commentViewModel', 'commentViewModel', {dict})) + or traverse_obj(content, ('commentViewModel', {dict}))) + comment_keys = traverse_obj(view_model, (('commentKey', 'toolbarStateKey'), {str})) + if not comment_keys: + continue + entities = traverse_obj(entity_payloads, lambda _, v: v['entityKey'] in comment_keys) + comment = self._extract_comment(entities, parent) + if comment: + comment['is_pinned'] = traverse_obj(view_model, ('pinnedText', {str})) is not None + if not comment: continue comment_id = comment['id'] + if comment.get('is_pinned'): tracker['pinned_comment_ids'].add(comment_id) # Sometimes YouTube may break and give us infinite looping comments. @@ -3426,7 +3468,9 @@ def extract_thread(contents): # Pinned comments may appear a second time in newest first sort # See: https://github.com/yt-dlp/yt-dlp/issues/6712 continue - self.report_warning('Detected YouTube comments looping. Stopping comment extraction as we probably cannot get any more.') + self.report_warning( + 'Detected YouTube comments looping. Stopping comment extraction ' + f'{"for this thread" if parent else ""} as we probably cannot get any more.') yield else: tracker['seen_comment_ids'].add(comment['id']) @@ -3449,15 +3493,15 @@ def extract_thread(contents): # Keeps track of counts across recursive calls if not tracker: - tracker = dict( - running_total=0, - est_total=None, - current_page_thread=0, - total_parent_comments=0, - total_reply_comments=0, - seen_comment_ids=set(), - pinned_comment_ids=set() - ) + tracker = { + 'running_total': 0, + 'est_total': None, + 'current_page_thread': 0, + 'total_parent_comments': 0, + 'total_reply_comments': 0, + 'seen_comment_ids': set(), + 'pinned_comment_ids': set(), + } # TODO: Deprecated # YouTube comments have a max depth of 2 @@ -3468,8 +3512,8 @@ def extract_thread(contents): if max_depth == 1 and parent: return - max_comments, max_parents, max_replies, max_replies_per_thread, *_ = map( - lambda p: int_or_none(p, default=sys.maxsize), self._configuration_arg('max_comments', ) + [''] * 4) + max_comments, max_parents, max_replies, max_replies_per_thread, *_ = ( + int_or_none(p, default=sys.maxsize) for p in self._configuration_arg('max_comments') + [''] * 4) continuation = self._extract_continuation(root_continuation_data) @@ -3498,7 +3542,7 @@ def extract_thread(contents): note_prefix = ' Downloading comment API JSON reply thread %d %s' % ( tracker['current_page_thread'], comment_prog_str) else: - note_prefix = '%sDownloading comment%s API JSON page %d %s' % ( + note_prefix = '{}Downloading comment{} API JSON page {} {}'.format( ' ' if parent else '', ' replies' if parent else '', page_num, comment_prog_str) @@ -3507,7 +3551,7 @@ def extract_thread(contents): check_get_keys = None if not is_forced_continuation and not (tracker['est_total'] == 0 and tracker['running_total'] == 0): check_get_keys = [[*continuation_items_path, ..., ( - 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentRenderer'))]] + 'commentsHeaderRenderer' if is_first_continuation else ('commentThreadRenderer', 'commentViewModel', 'commentRenderer'))]] try: response = self._extract_response( item_id=None, query=continuation, @@ -3517,14 +3561,21 @@ def extract_thread(contents): # Ignore incomplete data error for replies if retries didn't work. # This is to allow any other parent comments and comment threads to be downloaded. # See: https://github.com/yt-dlp/yt-dlp/issues/4669 - if 'incomplete data' in str(e).lower() and parent and self.get_param('ignoreerrors') is True: - self.report_warning( - 'Received incomplete data for a comment reply thread and retrying did not help. ' - 'Ignoring to let other comments be downloaded.') - else: - raise + if 'incomplete data' in str(e).lower() and parent: + if self.get_param('ignoreerrors') in (True, 'only_download'): + self.report_warning( + 'Received incomplete data for a comment reply thread and retrying did not help. ' + 'Ignoring to let other comments be downloaded. Pass --no-ignore-errors to not ignore.') + return + else: + raise ExtractorError( + 'Incomplete data received for comment reply thread. ' + 'Pass --ignore-errors to ignore and allow rest of comments to download.', + expected=True) + raise is_forced_continuation = False continuation = None + mutations = traverse_obj(response, ('frameworkUpdates', 'entityBatchUpdate', 'mutations', ..., {dict})) for continuation_items in traverse_obj(response, continuation_items_path, expected_type=list, default=[]): if is_first_continuation: continuation = extract_header(continuation_items) @@ -3533,7 +3584,7 @@ def extract_thread(contents): break continue - for entry in extract_thread(continuation_items): + for entry in extract_thread(continuation_items, mutations): if not entry: return yield entry @@ -3578,9 +3629,9 @@ def _generate_player_context(cls, sts=None): context['signatureTimestamp'] = sts return { 'playbackContext': { - 'contentPlaybackContext': context + 'contentPlaybackContext': context, }, - **cls._get_checkok_params() + **cls._get_checkok_params(), } @staticmethod @@ -3599,8 +3650,6 @@ def _is_agegated(player_response): def _is_unplayable(player_response): return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE' - _PLAYER_PARAMS = 'CgIQBg==' - def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data): session_index = self._extract_session_index(player_ytcfg, master_ytcfg) @@ -3612,32 +3661,39 @@ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, yt_query = { 'videoId': video_id, } - if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android': - yt_query['params'] = self._PLAYER_PARAMS + + pp_arg = self._configuration_arg('player_params', [None], casesense=True)[0] + if pp_arg: + yt_query['params'] = pp_arg yt_query.update(self._generate_player_context(sts)) return self._extract_response( item_id=video_id, ep='player', query=yt_query, ytcfg=player_ytcfg, headers=headers, fatal=True, default_client=client, - note='Downloading %s player API JSON' % client.replace('_', ' ').strip() + note='Downloading {} player API JSON'.format(client.replace('_', ' ').strip()), ) or None def _get_requested_clients(self, url, smuggled_data): requested_clients = [] - default = ['android', 'web'] + android_clients = [] + default = ['ios', 'web'] allowed_clients = sorted( - (client for client in INNERTUBE_CLIENTS.keys() if client[:1] != '_'), + (client for client in INNERTUBE_CLIENTS if client[:1] != '_'), key=lambda client: INNERTUBE_CLIENTS[client]['priority'], reverse=True) for client in self._configuration_arg('player_client'): - if client in allowed_clients: - requested_clients.append(client) - elif client == 'default': + if client == 'default': requested_clients.extend(default) elif client == 'all': requested_clients.extend(allowed_clients) - else: + elif client not in allowed_clients: self.report_warning(f'Skipping unsupported client {client}') + elif client.startswith('android'): + android_clients.append(client) + else: + requested_clients.append(client) + # Force deprioritization of broken Android clients for format de-duplication + requested_clients.extend(android_clients) if not requested_clients: requested_clients = default @@ -3647,15 +3703,28 @@ def _get_requested_clients(self, url, smuggled_data): return orderedSet(requested_clients) + def _invalid_player_response(self, pr, video_id): + # YouTube may return a different video player response than expected. + # See: https://github.com/TeamNewPipe/NewPipe/issues/8713 + if (pr_id := traverse_obj(pr, ('videoDetails', 'videoId'))) != video_id: + return pr_id + def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data): initial_pr = None if webpage: initial_pr = self._search_json( self._YT_INITIAL_PLAYER_RESPONSE_RE, webpage, 'initial player response', video_id, fatal=False) + prs = [] + if initial_pr and not self._invalid_player_response(initial_pr, video_id): + # Android player_response does not have microFormats which are needed for + # extraction of some data. So we return the initial_pr with formats + # stripped out even if not requested by the user + # See: https://github.com/yt-dlp/yt-dlp/issues/501 + prs.append({**initial_pr, 'streamingData': None}) + all_clients = set(clients) clients = clients[::-1] - prs = [] def append_client(*client_names): """ Append the first client name that exists but not already used """ @@ -3667,18 +3736,9 @@ def append_client(*client_names): all_clients.add(actual_client) return - # Android player_response does not have microFormats which are needed for - # extraction of some data. So we return the initial_pr with formats - # stripped out even if not requested by the user - # See: https://github.com/yt-dlp/yt-dlp/issues/501 - if initial_pr: - pr = dict(initial_pr) - pr['streamingData'] = None - prs.append(pr) - - last_error = None tried_iframe_fallback = False player_url = None + skipped_clients = {} while clients: client, base_client, variant = _split_innertube_client(clients.pop()) player_ytcfg = master_ytcfg if client == 'web' else {} @@ -3699,26 +3759,19 @@ def append_client(*client_names): pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response( client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data) except ExtractorError as e: - if last_error: - self.report_warning(last_error) - last_error = e + self.report_warning(e) continue - if pr: - # YouTube may return a different video player response than expected. - # See: https://github.com/TeamNewPipe/NewPipe/issues/8713 - pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId')) - if pr_video_id and pr_video_id != video_id: - self.report_warning( - f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message()) - else: - # Save client name for introspection later - name = short_client_name(client) - sd = traverse_obj(pr, ('streamingData', {dict})) or {} - sd[STREAMING_DATA_CLIENT_NAME] = name - for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})): - f[STREAMING_DATA_CLIENT_NAME] = name - prs.append(pr) + if pr_id := self._invalid_player_response(pr, video_id): + skipped_clients[client] = pr_id + elif pr: + # Save client name for introspection later + name = short_client_name(client) + sd = traverse_obj(pr, ('streamingData', {dict})) or {} + sd[STREAMING_DATA_CLIENT_NAME] = name + for f in traverse_obj(sd, (('formats', 'adaptiveFormats'), ..., {dict})): + f[STREAMING_DATA_CLIENT_NAME] = name + prs.append(pr) # creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated: @@ -3729,19 +3782,26 @@ def append_client(*client_names): elif not variant: append_client(f'tv_embedded.{base_client}', f'{base_client}_embedded') - if last_error: - if not len(prs): - raise last_error - self.report_warning(last_error) + if skipped_clients: + self.report_warning( + f'Skipping player responses from {"/".join(skipped_clients)} clients ' + f'(got player responses for video "{"/".join(set(skipped_clients.values()))}" instead of "{video_id}")') + if not prs: + raise ExtractorError( + 'All player responses are invalid. Your IP is likely being blocked by Youtube', expected=True) + elif not prs: + raise ExtractorError('Failed to extract any player response') return prs, player_url def _needs_live_processing(self, live_status, duration): if (live_status == 'is_live' and self.get_param('live_from_start') - or live_status == 'post_live' and (duration or 0) > 4 * 3600): + or live_status == 'post_live' and (duration or 0) > 2 * 3600): return live_status def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, live_status, duration): CHUNK_SIZE = 10 << 20 + PREFERRED_LANG_VALUE = 10 + original_language = None itags, stream_ids = collections.defaultdict(set), [] itag_qualities, res_qualities = {}, {0: None} q = qualities([ @@ -3749,16 +3809,21 @@ def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, l # audio-only formats with unknown quality may get tagged as tiny 'tiny', 'audio_quality_ultralow', 'audio_quality_low', 'audio_quality_medium', 'audio_quality_high', # Audio only formats - 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres' + 'small', 'medium', 'large', 'hd720', 'hd1080', 'hd1440', 'hd2160', 'hd2880', 'highres', ]) streaming_formats = traverse_obj(streaming_data, (..., ('formats', 'adaptiveFormats'), ...)) - all_formats = self._configuration_arg('include_duplicate_formats') + format_types = self._configuration_arg('formats') + all_formats = 'duplicate' in format_types + if self._configuration_arg('include_duplicate_formats'): + all_formats = True + self._downloader.deprecated_feature('[youtube] include_duplicate_formats extractor argument is deprecated. ' + 'Use formats=duplicate extractor argument instead') def build_fragments(f): return LazyList({ 'url': update_url_query(f['url'], { - 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}' - }) + 'range': f'{range_start}-{min(range_start + CHUNK_SIZE - 1, f["filesize"])}', + }), } for range_start in range(0, f['filesize'], CHUNK_SIZE)) for fmt in streaming_formats: @@ -3785,6 +3850,13 @@ def build_fragments(f): itag_qualities[itag] = quality if height: res_qualities[height] = quality + + is_default = audio_track.get('audioIsDefault') + is_descriptive = 'descriptive' in (audio_track.get('displayName') or '').lower() + language_code = audio_track.get('id', '').split('.')[0] + if language_code and is_default: + original_language = language_code + # FORMAT_STREAM_TYPE_OTF(otf=1) requires downloading the init fragment # (adding `&sq=0` to the URL) and parsing emsg box to determine the # number of fragment that would subsequently requested with (`&sq=N`) @@ -3799,9 +3871,9 @@ def build_fragments(f): if not all((sc, fmt_url, player_url, encrypted_sig)): continue try: - fmt_url += '&%s=%s' % ( + fmt_url += '&{}={}'.format( traverse_obj(sc, ('sp', -1)) or 'signature', - self._decrypt_signature(encrypted_sig, video_id, player_url) + self._decrypt_signature(encrypted_sig, video_id, player_url), ) except ExtractorError as e: self.report_warning('Signature extraction failed: Some formats may be missing', @@ -3810,12 +3882,11 @@ def build_fragments(f): continue query = parse_qs(fmt_url) - throttled = False if query.get('n'): try: decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0]) fmt_url = update_url_query(fmt_url, { - 'n': decrypt_nsig(query['n'][0], video_id, player_url) + 'n': decrypt_nsig(query['n'][0], video_id, player_url), }) except ExtractorError as e: phantomjs_hint = '' @@ -3824,30 +3895,35 @@ def build_fragments(f): f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n') if player_url: self.report_warning( - f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}' + f'nsig extraction failed: Some formats may be missing\n{phantomjs_hint}' f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True) self.write_debug(e, only_once=True) else: self.report_warning( - 'Cannot decrypt nsig without player_url: You may experience throttling for some formats', + 'Cannot decrypt nsig without player_url: Some formats may be missing', video_id=video_id, only_once=True) - throttled = True + continue tbr = float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) - language_preference = ( - 10 if audio_track.get('audioIsDefault') and 10 - else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10 - else -1) + format_duration = traverse_obj(fmt, ('approxDurationMs', {lambda x: float_or_none(x, 1000)})) # Some formats may have much smaller duration than others (possibly damaged during encoding) # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823 # Make sure to avoid false positives with small duration differences. # E.g. __2ABJjxzNo, ySuUZEjARPY - is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500) + is_damaged = try_call(lambda: format_duration < duration // 2) if is_damaged: self.report_warning( f'{video_id}: Some formats are possibly damaged. They will be deprioritized', only_once=True) client_name = fmt.get(STREAMING_DATA_CLIENT_NAME) + # Android client formats are broken due to integrity check enforcement + # Ref: https://github.com/yt-dlp/yt-dlp/issues/9554 + is_broken = client_name and client_name.startswith(short_client_name('android')) + if is_broken: + self.report_warning( + f'{video_id}: Android client formats are broken and may yield HTTP Error 403. ' + 'They will be deprioritized', only_once=True) + name = fmt.get('qualityLabel') or quality.replace('audio_quality_', '') or '' fps = int_or_none(fmt.get('fps')) or 0 dct = { @@ -3855,30 +3931,28 @@ def build_fragments(f): 'filesize': int_or_none(fmt.get('contentLength')), 'format_id': f'{itag}{"-drc" if fmt.get("isDrc") else ""}', 'format_note': join_nonempty( - join_nonempty(audio_track.get('displayName'), - language_preference > 0 and ' (default)', delim=''), + join_nonempty(audio_track.get('displayName'), is_default and ' (default)', delim=''), name, fmt.get('isDrc') and 'DRC', try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()), try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()), - throttled and 'THROTTLED', is_damaged and 'DAMAGED', + is_damaged and 'DAMAGED', is_broken and 'BROKEN', (self.get_param('verbose') or all_formats) and client_name, delim=', '), # Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372 - 'source_preference': ((-10 if throttled else -5 if itag == '22' else -1) - + (100 if 'Premium' in name else 0)), + 'source_preference': (-5 if itag == '22' else -1) + (100 if 'Premium' in name else 0), 'fps': fps if fps > 1 else None, # For some formats, fps is wrongly returned as 1 'audio_channels': fmt.get('audioChannels'), 'height': height, 'quality': q(quality) - bool(fmt.get('isDrc')) / 2, 'has_drm': bool(fmt.get('drmFamilies')), 'tbr': tbr, + 'filesize_approx': filesize_from_tbr(tbr, format_duration), 'url': fmt_url, 'width': int_or_none(fmt.get('width')), - 'language': join_nonempty(audio_track.get('id', '').split('.')[0], - 'desc' if language_preference < -1 else '') or None, - 'language_preference': language_preference, - # Strictly de-prioritize damaged and 3gp formats - 'preference': -10 if is_damaged else -2 if itag == '17' else None, + 'language': join_nonempty(language_code, 'desc' if is_descriptive else '') or None, + 'language_preference': PREFERRED_LANG_VALUE if is_default else -10 if is_descriptive else -1, + # Strictly de-prioritize broken, damaged and 3gp formats + 'preference': -20 if is_broken else -10 if is_damaged else -2 if itag == '17' else None, } mime_mobj = re.match( r'((?:[^/]+)/(?:[^;]+))(?:;\s*codecs="([^"]+)")?', fmt.get('mimeType') or '') @@ -3892,18 +3966,23 @@ def build_fragments(f): if single_stream and dct.get('ext'): dct['container'] = dct['ext'] + '_dash' - if all_formats and dct['filesize']: + if (all_formats or 'dashy' in format_types) and dct['filesize']: yield { **dct, 'format_id': f'{dct["format_id"]}-dashy' if all_formats else dct['format_id'], 'protocol': 'http_dash_segments', 'fragments': build_fragments(dct), } - dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} - yield dct + if all_formats or 'dashy' not in format_types: + dct['downloader_options'] = {'http_chunk_size': CHUNK_SIZE} + yield dct needs_live_processing = self._needs_live_processing(live_status, duration) - skip_bad_formats = not self._configuration_arg('include_incomplete_formats') + skip_bad_formats = 'incomplete' not in format_types + if self._configuration_arg('include_incomplete_formats'): + skip_bad_formats = False + self._downloader.deprecated_feature('[youtube] include_incomplete_formats extractor argument is deprecated. ' + 'Use formats=incomplete extractor argument instead') skip_manifests = set(self._configuration_arg('skip')) if (not self.get_param('youtube_include_hls_manifest', True) @@ -3915,7 +3994,7 @@ def build_fragments(f): skip_manifests.add('dash') if self._configuration_arg('include_live_dash'): self._downloader.deprecated_feature('[youtube] include_live_dash extractor argument is deprecated. ' - 'Use include_incomplete_formats extractor argument instead') + 'Use formats=incomplete extractor argument instead') elif skip_bad_formats and live_status == 'is_live' and needs_live_processing != 'is_live': skip_manifests.add('dash') @@ -3932,6 +4011,17 @@ def process_manifest_format(f, proto, client_name, itag): elif itag: f['format_id'] = itag + if original_language and f.get('language') == original_language: + f['format_note'] = join_nonempty(f.get('format_note'), '(default)', delim=' ') + f['language_preference'] = PREFERRED_LANG_VALUE + + if f.get('source_preference') is None: + f['source_preference'] = -1 + + if itag in ('616', '235'): + f['format_note'] = join_nonempty(f.get('format_note'), 'Premium', delim=' ') + f['source_preference'] += 100 + f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1)) if f['quality'] == -1 and f.get('height'): f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))]) @@ -3939,6 +4029,10 @@ def process_manifest_format(f, proto, client_name, itag): f['format_note'] = join_nonempty(f.get('format_note'), client_name, delim=', ') if f.get('fps') and f['fps'] <= 1: del f['fps'] + + if proto == 'hls' and f.get('has_drm'): + f['has_drm'] = 'maybe' + f['source_preference'] -= 5 return True subtitles = {} @@ -4011,8 +4105,9 @@ def _download_player_responses(self, url, smuggled_data, video_id, webpage_url): webpage = None if 'webpage' not in self._configuration_arg('player_skip'): query = {'bpctr': '9999999999', 'has_verified': '1'} - if smuggled_data.get('is_story'): # XXX: Deprecated - query['pp'] = self._PLAYER_PARAMS + pp = self._configuration_arg('player_params', [None], casesense=True)[0] + if pp: + query['pp'] = pp webpage = self._download_webpage( webpage_url, video_id, fatal=False, query=query) @@ -4040,6 +4135,10 @@ def _list_formats(self, video_id, microformats, video_details, player_responses, else None) streaming_data = traverse_obj(player_responses, (..., 'streamingData')) *formats, subtitles = self._extract_formats_and_subtitles(streaming_data, video_id, player_url, live_status, duration) + if all(f.get('has_drm') for f in formats): + # If there are no formats that definitely don't have DRM, all have DRM + for f in formats: + f['has_drm'] = True return live_broadcast_details, live_status, streaming_data, formats, subtitles @@ -4090,7 +4189,7 @@ def _real_extract(self, url): expected_type=str) if multifeed_metadata_list and not smuggled_data.get('force_singlefeed'): if self.get_param('noplaylist'): - self.to_screen('Downloading just video %s because of --no-playlist' % video_id) + self.to_screen(f'Downloading just video {video_id} because of --no-playlist') else: entries = [] feed_ids = [] @@ -4111,19 +4210,19 @@ def feed_entry(name): feed_title = feed_entry('title') title = video_title if feed_title: - title += ' (%s)' % feed_title + title += f' ({feed_title})' entries.append({ '_type': 'url_transparent', 'ie_key': 'Youtube', 'url': smuggle_url( - '%swatch?v=%s' % (base_url, feed_data['id'][0]), + '{}watch?v={}'.format(base_url, feed_data['id'][0]), {'force_singlefeed': True}), 'title': title, }) feed_ids.append(feed_id) self.to_screen( - 'Downloading multifeed video (%s) - add --no-playlist to just download video %s' - % (', '.join(feed_ids), video_id)) + 'Downloading multifeed video ({}) - add --no-playlist to just download video {}'.format( + ', '.join(feed_ids), video_id)) return self.playlist_result( entries, video_id, video_title, video_description) @@ -4187,7 +4286,7 @@ def feed_entry(name): # While the *1,*2,*3 thumbnails are just below their corresponding "*default" variants # in resolution, these are not the custom thumbnail. So de-prioritize them 'maxresdefault', 'hq720', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', - 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3' + 'sd1', 'sd2', 'sd3', 'hq1', 'hq2', 'hq3', 'mq1', 'mq2', 'mq3', '1', '2', '3', ] n_thumbnail_names = len(thumbnail_names) thumbnails.extend({ @@ -4224,7 +4323,7 @@ def is_bad_format(fmt): for fmt in filter(is_bad_format, formats): fmt['preference'] = (fmt.get('preference') or -1) - 10 - fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 4 hours)', delim=' ') + fmt['format_note'] = join_nonempty(fmt.get('format_note'), '(Last 2 hours)', delim=' ') if needs_live_processing: self._prepare_live_from_start_formats( @@ -4260,8 +4359,8 @@ def is_bad_format(fmt): 'playable_in_embed': get_first(playability_statuses, 'playableInEmbed'), 'live_status': live_status, 'release_timestamp': live_start_time, - '_format_sort_fields': ( # source_preference is lower for throttled/potentially damaged formats - 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto') + '_format_sort_fields': ( # source_preference is lower for potentially damaged formats + 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'), } subtitles = {} @@ -4339,18 +4438,24 @@ def process_language(container, base_url, lang_code, sub_name, query): for d_k, s_ks in [('start', ('start', 't')), ('end', ('end',))]: d_k += '_time' if d_k not in info and k in s_ks: - info[d_k] = parse_duration(query[k][0]) + info[d_k] = parse_duration(v[0]) # Youtube Music Auto-generated description - if video_description: + if (video_description or '').strip().endswith('\nAuto-generated by YouTube.'): + # XXX: Causes catastrophic backtracking if description has "·" + # E.g. https://www.youtube.com/watch?v=DoPaAxMQoiI + # Simulating atomic groups: (?P[^xy]+)x => (?=(?P[^xy]+))(?P=a)x + # reduces it, but does not fully fix it. https://regex101.com/r/8Ssf2h/2 mobj = re.search( r'''(?xs) - (?P[^·\n]+)·(?P[^\n]+)\n+ - (?P[^\n]+) + (?=(?P[^\n·]+))(?P=track)· + (?=(?P[^\n]+))(?P=artist)\n+ + (?=(?P[^\n]+))(?P=album)\n (?:.+?℗\s*(?P\d{4})(?!\d))? (?:.+?Released on\s*:\s*(?P\d{4}-\d{2}-\d{2}))? - (.+?\nArtist\s*:\s*(?P[^\n]+))? - .+\nAuto-generated\ by\ YouTube\.\s*$ + (.+?\nArtist\s*:\s* + (?=(?P[^\n]+))(?P=clean_artist)\n + )?.+\nAuto-generated\ by\ YouTube\.\s*$ ''', video_description) if mobj: release_year = mobj.group('release_year') @@ -4361,7 +4466,8 @@ def process_language(container, base_url, lang_code, sub_name, query): release_year = release_date[:4] info.update({ 'album': mobj.group('album'.strip()), - 'artist': mobj.group('clean_artist') or ', '.join(a.strip() for a in mobj.group('artist').split('·')), + 'artists': ([a] if (a := mobj.group('clean_artist')) + else [a.strip() for a in mobj.group('artist').split('·')]), 'track': mobj.group('track').strip(), 'release_date': release_date, 'release_year': int_or_none(release_year), @@ -4384,10 +4490,10 @@ def process_language(container, base_url, lang_code, sub_name, query): info['comment_count'] = traverse_obj(initial_data, ( 'contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents', ..., 'itemSectionRenderer', - 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount' + 'contents', ..., 'commentsEntryPointHeaderRenderer', 'commentCount', ), ( 'engagementPanels', lambda _, v: v['engagementPanelSectionListRenderer']['panelIdentifier'] == 'comment-item-section', - 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo' + 'engagementPanelSectionListRenderer', 'header', 'engagementPanelTitleHeaderRenderer', 'contextualInfo', ), expected_type=self._get_count, get_all=False) try: # This will error if there is no livechat @@ -4411,7 +4517,7 @@ def process_language(container, base_url, lang_code, sub_name, query): or self._extract_chapters_from_description(video_description, duration) or None) - info['heatmap'] = self._extract_heatmap_from_player_overlay(initial_data) + info['heatmap'] = self._extract_heatmap(initial_data) contents = traverse_obj( initial_data, ('contents', 'twoColumnWatchNextResults', 'results', 'results', 'contents'), @@ -4455,14 +4561,13 @@ def process_language(container, base_url, lang_code, sub_name, query): if mobj: info[mobj.group('type') + '_count'] = str_to_int(mobj.group('count')) break - sbr_tooltip = try_get( - vpir, lambda x: x['sentimentBar']['sentimentBarRenderer']['tooltip']) - if sbr_tooltip: - like_count, dislike_count = sbr_tooltip.split(' / ') - info.update({ - 'like_count': str_to_int(like_count), - 'dislike_count': str_to_int(dislike_count), - }) + + info['like_count'] = traverse_obj(vpir, ( + 'videoActions', 'menuRenderer', 'topLevelButtons', ..., + 'segmentedLikeDislikeButtonViewModel', 'likeButtonViewModel', 'likeButtonViewModel', + 'toggleButtonViewModel', 'toggleButtonViewModel', 'defaultButtonViewModel', + 'buttonViewModel', 'accessibilityText', {parse_count}), get_all=False) + vcr = traverse_obj(vpir, ('viewCount', 'videoViewCountRenderer')) if vcr: vc = self._get_count(vcr, 'viewCount') @@ -4508,7 +4613,7 @@ def process_language(container, base_url, lang_code, sub_name, query): if mrr_title == 'Album': info['album'] = mrr_contents_text elif mrr_title == 'Artist': - info['artist'] = mrr_contents_text + info['artists'] = [mrr_contents_text] if mrr_contents_text else None elif mrr_title == 'Song': info['track'] = mrr_contents_text owner_badges = self._extract_badges(traverse_obj(vsir, ('owner', 'videoOwnerRenderer', 'badges'))) @@ -4520,21 +4625,41 @@ def process_language(container, base_url, lang_code, sub_name, query): 'uploader_id': channel_handle, 'uploader_url': format_field(channel_handle, None, 'https://www.youtube.com/%s', default=None), }) + + # We only want timestamp IF it has time precision AND a timezone + # Currently the uploadDate in microformats appears to be in US/Pacific timezone. + timestamp = ( + parse_iso8601(get_first(microformats, 'uploadDate'), timezone=NO_DEFAULT) + or parse_iso8601(search_meta('uploadDate'), timezone=NO_DEFAULT) + ) + upload_date = ( + dt.datetime.fromtimestamp(timestamp, dt.timezone.utc).strftime('%Y%m%d') if timestamp else + ( + unified_strdate(get_first(microformats, 'uploadDate')) + or unified_strdate(search_meta('uploadDate')) + )) + + # In the case we cannot get the timestamp: # The upload date for scheduled, live and past live streams / premieres in microformats # may be different from the stream date. Although not in UTC, we will prefer it in this case. # See: https://github.com/yt-dlp/yt-dlp/pull/2223#issuecomment-1008485139 - upload_date = ( - unified_strdate(get_first(microformats, 'uploadDate')) - or unified_strdate(search_meta('uploadDate'))) - if not upload_date or ( - live_status in ('not_live', None) - and 'no-youtube-prefer-utc-upload-date' not in self.get_param('compat_opts', []) - ): + if not upload_date or (not timestamp and live_status in ('not_live', None)): + # this should be in UTC, as configured in the cookie/client context upload_date = strftime_or_none( self._parse_time_text(self._get_text(vpir, 'dateText'))) or upload_date + info['upload_date'] = upload_date + info['timestamp'] = timestamp + + if upload_date and live_status not in ('is_live', 'post_live', 'is_upcoming'): + # Newly uploaded videos' HLS formats are potentially problematic and need to be checked + upload_datetime = datetime_from_str(upload_date).replace(tzinfo=dt.timezone.utc) + if upload_datetime >= datetime_from_str('today-2days'): + for fmt in info['formats']: + if fmt.get('protocol') == 'm3u8_native': + fmt['__needs_testing'] = True - for s_k, d_k in [('artist', 'creator'), ('track', 'alt_title')]: + for s_k, d_k in [('artists', 'creators'), ('track', 'alt_title')]: v = info.get(s_k) if v: info[d_k] = v @@ -4598,7 +4723,7 @@ def wrapper(self, url): def _extract_basic_item_renderer(item): # Modified from _extract_grid_item_renderer known_basic_renderers = ( - 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer' + 'playlistRenderer', 'videoRenderer', 'channelRenderer', 'showRenderer', 'reelItemRenderer', ) for key, renderer in item.items(): if not isinstance(renderer, dict): @@ -4659,7 +4784,7 @@ def _grid_entries(self, grid_renderer): playlist_id = renderer.get('playlistId') if playlist_id: yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, + f'https://www.youtube.com/playlist?list={playlist_id}', ie=YoutubeTabIE.ie_key(), video_id=playlist_id, video_title=title) continue @@ -4717,7 +4842,7 @@ def _shelf_entries_from_content(self, shelf_renderer): yield from self._grid_entries(renderer) renderer = content.get('horizontalListRenderer') if renderer: - # TODO + # TODO: handle case pass def _shelf_entries(self, shelf_renderer, skip_channels=False): @@ -4794,7 +4919,7 @@ def _post_thread_entries(self, post_thread_renderer): post_renderer, lambda x: x['backstageAttachment']['playlistRenderer']['playlistId'], str) if playlist_id: yield self.url_result( - 'https://www.youtube.com/playlist?list=%s' % playlist_id, + f'https://www.youtube.com/playlist?list={playlist_id}', ie=YoutubeTabIE.ie_key(), video_id=playlist_id) # inline video links runs = try_get(post_renderer, lambda x: x['contentText']['runs'], list) or [] @@ -4878,7 +5003,8 @@ def _extract_entries(self, parent_renderer, continuation_list): 'videoRenderer': lambda x: [self._video_entry(x)], 'playlistRenderer': lambda x: self._grid_entries({'items': [{'playlistRenderer': x}]}), 'channelRenderer': lambda x: self._grid_entries({'items': [{'channelRenderer': x}]}), - 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)] + 'hashtagTileRenderer': lambda x: [self._hashtag_tile_entry(x)], + 'richGridRenderer': lambda x: self._extract_entries(x, continuation_list), } for key, renderer in isr_content.items(): if key not in known_renderers: @@ -4906,10 +5032,15 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): or try_get(tab_content, lambda x: x['richGridRenderer'], dict) or {}) yield from extract_entries(parent_renderer) continuation = continuation_list[0] - + seen_continuations = set() for page_num in itertools.count(1): if not continuation: break + continuation_token = continuation.get('continuation') + if continuation_token is not None and continuation_token in seen_continuations: + self.write_debug('Detected YouTube feed looping - assuming end of feed.') + break + seen_continuations.add(continuation_token) headers = self.generate_api_headers( ytcfg=ytcfg, account_syncid=account_syncid, visitor_data=visitor_data) response = self._extract_response( @@ -4941,12 +5072,12 @@ def _entries(self, tab, item_id, ytcfg, account_syncid, visitor_data): continuation_items = traverse_obj(response, ( ('onResponseReceivedActions', 'onResponseReceivedEndpoints'), ..., - 'appendContinuationItemsAction', 'continuationItems' + 'appendContinuationItemsAction', 'continuationItems', ), 'continuationContents', get_all=False) continuation_item = traverse_obj(continuation_items, 0, None, expected_type=dict, default={}) video_items_renderer = None - for key in continuation_item.keys(): + for key in continuation_item: if key not in known_renderers: continue func, parent_key = known_renderers[key] @@ -5001,6 +5132,10 @@ def _extract_metadata_from_tabs(self, item_id, data): else: metadata_renderer = traverse_obj(data, ('metadata', 'playlistMetadataRenderer'), expected_type=dict) + # pageHeaderViewModel slow rollout began April 2024 + page_header_view_model = traverse_obj(data, ( + 'header', 'pageHeaderRenderer', 'content', 'pageHeaderViewModel', {dict})) + # We can get the uncropped banner/avatar by replacing the crop params with '=s0' # See: https://github.com/yt-dlp/yt-dlp/issues/2237#issuecomment-1013694714 def _get_uncropped(url): @@ -5013,11 +5148,13 @@ def _get_uncropped(url): avatar_thumbnails.append({ 'url': uncropped_avatar, 'id': 'avatar_uncropped', - 'preference': 1 + 'preference': 1, }) - channel_banners = self._extract_thumbnails( - data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + channel_banners = ( + self._extract_thumbnails(data, ('header', ..., ('banner', 'mobileBanner', 'tvBanner'))) + or self._extract_thumbnails( + page_header_view_model, ('banner', 'imageBannerViewModel', 'image'), final_key='sources')) for banner in channel_banners: banner['preference'] = -10 @@ -5027,7 +5164,7 @@ def _get_uncropped(url): channel_banners.append({ 'url': uncropped_banner, 'id': 'banner_uncropped', - 'preference': -5 + 'preference': -5, }) # Deprecated - remove primary_sidebar_renderer when layout discontinued @@ -5044,9 +5181,14 @@ def _get_uncropped(url): or self._get_text(data, ('header', 'hashtagHeaderRenderer', 'hashtag')) or info['id']), 'availability': self._extract_availability(data), - 'channel_follower_count': self._get_count(data, ('header', ..., 'subscriberCountText')), + 'channel_follower_count': ( + self._get_count(data, ('header', ..., 'subscriberCountText')) + or traverse_obj(page_header_view_model, ( + 'metadata', 'contentMetadataViewModel', 'metadataRows', ..., 'metadataParts', + lambda _, v: 'subscribers' in v['text']['content'], 'text', 'content', {parse_count}, any))), 'description': try_get(metadata_renderer, lambda x: x.get('description', '')), - 'tags': try_get(metadata_renderer or {}, lambda x: x.get('keywords', '').split()), + 'tags': (traverse_obj(data, ('microformat', 'microformatDataRenderer', 'tags', ..., {str})) + or traverse_obj(metadata_renderer, ('keywords', {lambda x: x and shlex.split(x)}, ...))), 'thumbnails': (primary_thumbnails or playlist_thumbnails) + avatar_thumbnails + channel_banners, }) @@ -5096,7 +5238,7 @@ def _get_uncropped(url): info.update({ 'channel': self._search_regex(r'^by (.+) and \d+ others?$', owner_text, 'uploader', default=owner_text), 'channel_id': self.ucid_or_none(browse_ep.get('browseId')), - 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))) + 'uploader_id': self.handle_from_url(urljoin('https://www.youtube.com', browse_ep.get('canonicalBaseUrl'))), }) info.update({ @@ -5128,12 +5270,12 @@ def _extract_inline_playlist(self, playlist, playlist_id, data, ytcfg): 'playlistId': playlist_id, 'videoId': watch_endpoint.get('videoId') or last_id, 'index': watch_endpoint.get('index') or len(videos), - 'params': watch_endpoint.get('params') or 'OAE%3D' + 'params': watch_endpoint.get('params') or 'OAE%3D', } response = self._extract_response( - item_id='%s page %d' % (playlist_id, page_num), + item_id=f'{playlist_id} page {page_num}', query=query, ep='next', headers=headers, ytcfg=ytcfg, - check_get_keys='contents' + check_get_keys='contents', ) playlist = try_get( response, lambda x: x['contents']['twoColumnWatchNextResults']['playlist']['playlist'], dict) @@ -5224,7 +5366,7 @@ def _reload_with_unavailable_videos(self, item_id, data, ytcfg): visitor_data=self._extract_visitor_data(data, ytcfg)) query = { 'params': 'wgYCCAA=', - 'browseId': f'VL{item_id}' + 'browseId': f'VL{item_id}', } return self._extract_response( item_id=item_id, headers=headers, query=query, @@ -5243,7 +5385,7 @@ def _extract_webpage(self, url, item_id, fatal=True): data = self.extract_yt_initial_data(item_id, webpage or '', fatal=fatal) or {} except ExtractorError as e: if isinstance(e.cause, network_exceptions): - if not isinstance(e.cause, urllib.error.HTTPError) or e.cause.code not in (403, 429): + if not isinstance(e.cause, HTTPError) or e.cause.status not in (403, 429): retry.error = e continue self._error_or_warning(e, fatal=fatal) @@ -5259,6 +5401,7 @@ def _extract_webpage(self, url, item_id, fatal=True): # See: https://github.com/yt-dlp/yt-dlp/issues/116 if not traverse_obj(data, 'contents', 'currentVideoEndpoint', 'onResponseReceivedActions'): retry.error = ExtractorError('Incomplete yt initial data received') + data = None continue return webpage, data @@ -5355,7 +5498,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): (?!consent\.)(?:\w+\.)? (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} )/ (?: (?Pchannel|c|user|browse)/| @@ -5363,13 +5506,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): feed/|hashtag/| (?:playlist|watch)\?.*?\blist= )| - (?!(?:%(reserved_names)s)\b) # Direct URLs + (?!(?:{reserved_names})\b) # Direct URLs ) (?P[^/?\#&]+) - )''' % { - 'reserved_names': YoutubeBaseInfoExtractor._RESERVED_NAMES, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + )'''.format( + reserved_names=YoutubeBaseInfoExtractor._RESERVED_NAMES, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:tab' _TESTS = [{ @@ -5378,16 +5521,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 94, 'info_dict': { 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Igor Kleiner - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader': 'Igor Kleiner', + 'title': 'Igor Kleiner Ph.D. - Playlists', + 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', + 'uploader': 'Igor Kleiner Ph.D.', 'uploader_id': '@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience', - 'channel': 'Igor Kleiner', + 'channel': 'Igor Kleiner Ph.D.', 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'], + 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, multipage, different order', @@ -5395,16 +5538,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 94, 'info_dict': { 'id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'title': 'Igor Kleiner - Playlists', - 'description': 'md5:be97ee0f14ee314f1f002cf187166ee2', - 'uploader': 'Igor Kleiner', + 'title': 'Igor Kleiner Ph.D. - Playlists', + 'description': 'md5:15d7dd9e333cb987907fcb0d604b233a', + 'uploader': 'Igor Kleiner Ph.D.', 'uploader_id': '@IgorDataScience', 'uploader_url': 'https://www.youtube.com/@IgorDataScience', - 'tags': ['"критическое', 'мышление"', '"наука', 'просто"', 'математика', '"анализ', 'данных"'], + 'tags': ['критическое мышление', 'наука просто', 'математика', 'анализ данных'], 'channel_id': 'UCqj7Cz7revf5maW9g5pgNcg', - 'channel': 'Igor Kleiner', + 'channel': 'Igor Kleiner Ph.D.', 'channel_url': 'https://www.youtube.com/channel/UCqj7Cz7revf5maW9g5pgNcg', - 'channel_follower_count': int + 'channel_follower_count': int, }, }, { 'note': 'playlists, series', @@ -5413,7 +5556,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'UCYO_jab_esuFRV4b17AJtAw', 'title': '3Blue1Brown - Playlists', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'channel': '3Blue1Brown', 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', @@ -5437,10 +5580,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@ThirstForScience', 'channel_id': 'UCAEtajcuhQ6an9WEzY9LEMQ', 'channel_url': 'https://www.youtube.com/channel/UCAEtajcuhQ6an9WEzY9LEMQ', - 'tags': 'count:13', + 'tags': 'count:12', 'channel': 'ThirstForScience', - 'channel_follower_count': int - } + 'channel_follower_count': int, + }, }, { 'url': 'https://www.youtube.com/c/ChristophLaimer/playlists', 'only_matching': True, @@ -5472,10 +5615,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'tags': [], 'channel': 'Sergey M.', 'description': '', - 'modified_date': '20160902', + 'modified_date': '20230921', 'channel_id': 'UCmlqkdCBesrv2Lak1mF_MxA', 'channel_url': 'https://www.youtube.com/channel/UCmlqkdCBesrv2Lak1mF_MxA', - 'availability': 'public', + 'availability': 'unlisted', 'uploader_url': 'https://www.youtube.com/@sergeym.6173', 'uploader_id': '@sergeym.6173', 'uploader': 'Sergey M.', @@ -5495,7 +5638,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 2, }, { @@ -5512,7 +5655,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'uploader_url': 'https://www.youtube.com/@lexwill718', 'channel': 'lex will', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 975, }, { @@ -5529,7 +5672,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': 'lex will', 'tags': ['bible', 'history', 'prophesy'], 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 199, }, { @@ -5546,7 +5689,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_url': 'https://www.youtube.com/channel/UCKfVa3S1e4PHvxWcwyMMg8w', 'channel_id': 'UCKfVa3S1e4PHvxWcwyMMg8w', 'tags': ['bible', 'history', 'prophesy'], - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 17, }, { @@ -5590,7 +5733,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'UCYO_jab_esuFRV4b17AJtAw', 'title': '3Blue1Brown - Search - linear algebra', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', 'tags': ['Mathematics'], 'channel': '3Blue1Brown', @@ -5859,7 +6002,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/hashtag/cctv9', 'info_dict': { 'id': 'cctv9', - 'title': '#cctv9', + 'title': 'cctv9 - All', 'tags': [], }, 'playlist_mincount': 300, # not consistent but should be over 300 @@ -5869,11 +6012,11 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): }, { 'note': 'Requires Premium: should request additional YTM-info webpage (and have format 141) for videos in playlist', 'url': 'https://music.youtube.com/playlist?list=PLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', - 'only_matching': True + 'only_matching': True, }, { 'note': '/browse/ should redirect to /channel/', 'url': 'https://music.youtube.com/browse/UC1a8OFewdjuLq6KlF8M_8Ng', - 'only_matching': True + 'only_matching': True, }, { 'note': 'VLPL, should redirect to playlist?list=PL...', 'url': 'https://music.youtube.com/browse/VLPLRBp0Fe2GpgmgoscNFLxNyBVSFVdYmFkq', @@ -5970,7 +6113,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@PhilippHagemeister', 'uploader_url': 'https://www.youtube.com/@PhilippHagemeister', 'uploader': 'Philipp Hagemeister', - } + }, }], 'playlist_count': 1, 'params': {'extract_flat': True}, @@ -5985,7 +6128,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 50, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, }, { 'note': 'API Fallback: /videos tab, sorted by oldest first', @@ -5998,12 +6141,12 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_id': 'UCu6mSoMNzHQiBIOCkHUa2Aw', 'tags': [], 'channel_url': 'https://www.youtube.com/channel/UCu6mSoMNzHQiBIOCkHUa2Aw', - 'channel_follower_count': int + 'channel_follower_count': int, }, 'playlist_mincount': 650, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'skip': 'Query for sorting no longer works', }, { @@ -6025,13 +6168,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'playlist_mincount': 101, 'params': { 'skip_download': True, - 'extractor_args': {'youtubetab': {'skip': ['webpage']}} + 'extractor_args': {'youtubetab': {'skip': ['webpage']}}, }, 'expected_warnings': ['YouTube Music is not directly supported', r'[Uu]navailable videos (are|will be) hidden'], }, { 'note': 'non-standard redirect to regional channel', 'url': 'https://www.youtube.com/channel/UCwVVpHQ2Cs9iGJfpdFngePQ', - 'only_matching': True + 'only_matching': True, }, { 'note': 'collaborative playlist (uploader name in the form "by and x other(s)")', 'url': 'https://www.youtube.com/playlist?list=PLx-_-Kk4c89oOHEDQAojOXzEzemXxoqx6', @@ -6050,7 +6193,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@pukkandan', 'uploader': 'pukkandan', }, - 'playlist_mincount': 2 + 'playlist_mincount': 2, }, { 'note': 'translated tab name', 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/playlists', @@ -6137,12 +6280,13 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel_follower_count': int, 'channel_id': 'UCK9V2B22uJYu3N7eR_BT9QA', 'channel_url': 'https://www.youtube.com/channel/UCK9V2B22uJYu3N7eR_BT9QA', - 'description': 'md5:e56b74b5bb7e9c701522162e9abfb822', + 'description': 'md5:49809d8bf9da539bc48ed5d1f83c33f2', 'channel': 'Polka Ch. 尾丸ポルカ', 'tags': 'count:35', 'uploader_url': 'https://www.youtube.com/@OmaruPolka', 'uploader': 'Polka Ch. 尾丸ポルカ', 'uploader_id': '@OmaruPolka', + 'channel_is_verified': True, }, 'playlist_count': 3, }, { @@ -6152,15 +6296,16 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'UC0intLFzLaudFG-xAvUEO-A', 'title': 'Not Just Bikes - Shorts', - 'tags': 'count:12', + 'tags': 'count:10', 'channel_url': 'https://www.youtube.com/channel/UC0intLFzLaudFG-xAvUEO-A', - 'description': 'md5:26bc55af26855a608a5cf89dfa595c8d', + 'description': 'md5:5e82545b3a041345927a92d0585df247', 'channel_follower_count': int, 'channel_id': 'UC0intLFzLaudFG-xAvUEO-A', 'channel': 'Not Just Bikes', 'uploader_url': 'https://www.youtube.com/@NotJustBikes', 'uploader': 'Not Just Bikes', 'uploader_id': '@NotJustBikes', + 'channel_is_verified': True, }, 'playlist_mincount': 10, }, { @@ -6189,7 +6334,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): # No uploads and no UCID given. Should fail with no uploads error # See test_youtube_lists 'url': 'https://www.youtube.com/news', - 'only_matching': True + 'only_matching': True, }, { # No videos tab but has a shorts tab 'url': 'https://www.youtube.com/c/TKFShorts', @@ -6251,7 +6396,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'title': 'SHORT short', 'view_count': int, 'thumbnails': list, - } + }, }], 'params': {'extract_flat': True}, }, { @@ -6259,8 +6404,8 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'url': 'https://www.youtube.com/channel/UCQvWX73GQygcwXOTSf_VDVg/live', 'info_dict': { 'id': 'UCQvWX73GQygcwXOTSf_VDVg', - 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO, should be Minecraft - Live or Minecraft - Topic - Live - 'tags': [] + 'title': 'UCQvWX73GQygcwXOTSf_VDVg - Live', # TODO: should be Minecraft - Live or Minecraft - Topic - Live + 'tags': [], }, 'playlist': [{ 'info_dict': { @@ -6278,10 +6423,10 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': str, 'uploader_id': str, 'channel_is_verified': bool, # this will keep changing - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, - 'playlist_mincount': 1 + 'playlist_mincount': 1, }, { # Channel renderer metadata. Contains number of videos on the channel 'url': 'https://www.youtube.com/channel/UCiu-3thuViMebBjw_5nWYrA/channels', @@ -6314,21 +6459,20 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_url': 'https://www.youtube.com/@PewDiePie', 'uploader_id': '@PewDiePie', 'channel_is_verified': True, - } + }, }], 'params': {'extract_flat': True}, }, { 'url': 'https://www.youtube.com/@3blue1brown/about', 'info_dict': { - 'id': 'UCYO_jab_esuFRV4b17AJtAw', + 'id': '@3blue1brown', 'tags': ['Mathematics'], - 'title': '3Blue1Brown - About', + 'title': '3Blue1Brown', 'channel_follower_count': int, 'channel_id': 'UCYO_jab_esuFRV4b17AJtAw', 'channel': '3Blue1Brown', - 'view_count': int, 'channel_url': 'https://www.youtube.com/channel/UCYO_jab_esuFRV4b17AJtAw', - 'description': 'md5:e1384e8a133307dd10edee76e875d62f', + 'description': 'md5:4d1da95432004b7ba840ebc895b6b4c9', 'uploader_url': 'https://www.youtube.com/@3blue1brown', 'uploader_id': '@3blue1brown', 'uploader': '3Blue1Brown', @@ -6351,7 +6495,7 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'channel': '99 Percent Invisible', 'uploader_id': '@99percentinvisiblepodcast', }, - 'playlist_count': 1, + 'playlist_count': 0, }, { # Releases tab, with rich entry playlistRenderers (same as Podcasts tab) 'url': 'https://www.youtube.com/@AHimitsu/releases', @@ -6363,13 +6507,56 @@ class YoutubeTabIE(YoutubeTabBaseInfoExtractor): 'uploader_id': '@AHimitsu', 'uploader': 'A Himitsu', 'channel_id': 'UCgFwu-j5-xNJml2FtTrrB3A', - 'tags': 'count:16', + 'tags': 'count:12', 'description': 'I make music', 'channel_url': 'https://www.youtube.com/channel/UCgFwu-j5-xNJml2FtTrrB3A', 'channel_follower_count': int, 'channel_is_verified': True, }, 'playlist_mincount': 10, + }, { + # Playlist with only shorts, shown as reel renderers + # FIXME: future: YouTube currently doesn't give continuation for this, + # may do in future. + 'url': 'https://www.youtube.com/playlist?list=UUxqPAgubo4coVn9Lx1FuKcg', + 'info_dict': { + 'id': 'UUxqPAgubo4coVn9Lx1FuKcg', + 'channel_url': 'https://www.youtube.com/channel/UCxqPAgubo4coVn9Lx1FuKcg', + 'view_count': int, + 'uploader_id': '@BangyShorts', + 'description': '', + 'uploader_url': 'https://www.youtube.com/@BangyShorts', + 'channel_id': 'UCxqPAgubo4coVn9Lx1FuKcg', + 'channel': 'Bangy Shorts', + 'uploader': 'Bangy Shorts', + 'tags': [], + 'availability': 'public', + 'modified_date': r're:\d{8}', + 'title': 'Uploads from Bangy Shorts', + }, + 'playlist_mincount': 100, + 'expected_warnings': [r'[Uu]navailable videos (are|will be) hidden'], + }, { + 'note': 'Tags containing spaces', + 'url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', + 'playlist_count': 3, + 'info_dict': { + 'id': 'UC7_YxT-KID8kRbqZo7MyscQ', + 'channel': 'Markiplier', + 'channel_id': 'UC7_YxT-KID8kRbqZo7MyscQ', + 'title': 'Markiplier', + 'channel_follower_count': int, + 'description': 'md5:0c010910558658824402809750dc5d97', + 'uploader_id': '@markiplier', + 'uploader_url': 'https://www.youtube.com/@markiplier', + 'uploader': 'Markiplier', + 'channel_url': 'https://www.youtube.com/channel/UC7_YxT-KID8kRbqZo7MyscQ', + 'channel_is_verified': True, + 'tags': ['markiplier', 'comedy', 'gaming', 'funny videos', 'funny moments', + 'sketch comedy', 'laughing', 'lets play', 'challenge videos', 'hilarious', + 'challenges', 'sketches', 'scary games', 'funny games', 'rage games', + 'mark fischbach'], + }, }] @classmethod @@ -6408,6 +6595,9 @@ def _extract_tab_id_and_name(self, tab, base_url='https://www.youtube.com'): def _has_tab(self, tabs, tab_id): return any(self._extract_tab_id_and_name(tab)[0] == tab_id for tab in tabs) + def _empty_playlist(self, item_id, data): + return self.playlist_result([], item_id, **self._extract_metadata_from_tabs(item_id, data)) + @YoutubeTabBaseInfoExtractor.passthrough_smuggled_data def _real_extract(self, url, smuggled_data): item_id = self._match_id(url) @@ -6441,7 +6631,7 @@ def _real_extract(self, url, smuggled_data): # Handle both video/playlist URLs qs = parse_qs(url) - video_id, playlist_id = [traverse_obj(qs, (key, 0)) for key in ('v', 'list')] + video_id, playlist_id = (traverse_obj(qs, (key, 0)) for key in ('v', 'list')) if not video_id and mobj['not_channel'].startswith('watch'): if not playlist_id: # If there is neither video or playlist ids, youtube redirects to home page, which is undesirable @@ -6473,6 +6663,10 @@ def _real_extract(self, url, smuggled_data): selected_tab_id, selected_tab_name = self._extract_tab_id_and_name(selected_tab, url) # NB: Name may be translated self.write_debug(f'Selected tab: {selected_tab_id!r} ({selected_tab_name}), Requested tab: {original_tab_id!r}') + # /about is no longer a tab + if original_tab_id == 'about': + return self._empty_playlist(item_id, data) + if not original_tab_id and selected_tab_name: self.to_screen('Downloading all uploads of the channel. ' 'To download only the videos in a specific tab, pass the tab\'s URL') @@ -6485,7 +6679,7 @@ def _real_extract(self, url, smuggled_data): if not extra_tabs and selected_tab_id != 'videos': # Channel does not have streams, shorts or videos tabs if item_id[:2] != 'UC': - raise ExtractorError('This channel has no uploads', expected=True) + return self._empty_playlist(item_id, data) # Topic channels don't have /videos. Use the equivalent playlist instead pl_id = f'UU{item_id[2:]}' @@ -6493,7 +6687,7 @@ def _real_extract(self, url, smuggled_data): try: data, ytcfg = self._extract_data(pl_url, pl_id, ytcfg=ytcfg, fatal=True, webpage_fatal=True) except ExtractorError: - raise ExtractorError('This channel has no uploads', expected=True) + return self._empty_playlist(item_id, data) else: item_id, url = pl_id, pl_url self.to_screen( @@ -6569,15 +6763,15 @@ class YoutubePlaylistIE(InfoExtractor): (?: (?: youtube(?:kids)?\.com| - %(invidious)s + {invidious} ) /.*?\?.*?\blist= )? - (?P%(playlist_id)s) - )''' % { - 'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, - 'invidious': '|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), - } + (?P{playlist_id}) + )'''.format( + playlist_id=YoutubeBaseInfoExtractor._PLAYLIST_ID_RE, + invidious='|'.join(YoutubeBaseInfoExtractor._INVIDIOUS_SITES), + ) IE_NAME = 'youtube:playlist' _TESTS = [{ 'note': 'issue #673', @@ -6625,7 +6819,7 @@ class YoutubePlaylistIE(InfoExtractor): 'uploader_url': 'https://www.youtube.com/@milan5503', 'availability': 'public', }, - 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden'], + 'expected_warnings': [r'[Uu]navailable videos? (is|are|will be) hidden', 'Retrying', 'Giving up'], }, { 'url': 'http://www.youtube.com/embed/_xDOZElKyNU?list=PLsyOSbh5bs16vubvKePAQ1x3PhKavfBIl', 'playlist_mincount': 455, @@ -6677,7 +6871,7 @@ def _real_extract(self, url): class YoutubeYtBeIE(InfoExtractor): IE_DESC = 'youtu.be' - _VALID_URL = r'https?://youtu\.be/(?P[0-9A-Za-z_-]{11})/*?.*?\blist=(?P%(playlist_id)s)' % {'playlist_id': YoutubeBaseInfoExtractor._PLAYLIST_ID_RE} + _VALID_URL = rf'https?://youtu\.be/(?P[0-9A-Za-z_-]{{11}})/*?.*?\blist=(?P{YoutubeBaseInfoExtractor._PLAYLIST_ID_RE})' _TESTS = [{ 'url': 'https://youtu.be/yeWKywCrFtk?list=PL2qgrgXsNUG5ig9cat4ohreBjYLAPC0J5', 'info_dict': { @@ -6703,7 +6897,7 @@ class YoutubeYtBeIE(InfoExtractor): 'availability': 'public', 'duration': 59, 'comment_count': int, - 'channel_follower_count': int + 'channel_follower_count': int, }, 'params': { 'noplaylist': True, @@ -6870,14 +7064,22 @@ class YoutubeSearchIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_DESC = 'YouTube search' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' - _SEARCH_PARAMS = 'EgIQAQ%3D%3D' # Videos only + _SEARCH_PARAMS = 'EgIQAfABAQ==' # Videos only _TESTS = [{ 'url': 'ytsearch5:youtube-dl test video', 'playlist_count': 5, 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, + }, { + 'note': 'Suicide/self-harm search warning', + 'url': 'ytsearch1:i hate myself and i wanna die', + 'playlist_count': 1, + 'info_dict': { + 'id': 'i hate myself and i wanna die', + 'title': 'i hate myself and i wanna die', + }, }] @@ -6885,14 +7087,14 @@ class YoutubeSearchDateIE(YoutubeTabBaseInfoExtractor, SearchInfoExtractor): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' IE_DESC = 'YouTube search, newest videos first' - _SEARCH_PARAMS = 'CAISAhAB' # Videos only, sorted by date + _SEARCH_PARAMS = 'CAISAhAB8AEB' # Videos only, sorted by date _TESTS = [{ 'url': 'ytsearchdate5:youtube-dl test video', 'playlist_count': 5, 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }] @@ -6906,14 +7108,14 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'youtube-dl test video', 'title': 'youtube-dl test video', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=python&sp=EgIQAg%253D%253D', 'playlist_mincount': 5, 'info_dict': { 'id': 'python', 'title': 'python', - } + }, }, { 'url': 'https://www.youtube.com/results?search_query=%23cats', 'playlist_mincount': 1, @@ -6952,7 +7154,7 @@ class YoutubeSearchURLIE(YoutubeTabBaseInfoExtractor): 'uploader': 'Kurzgesagt – In a Nutshell', 'channel_is_verified': True, 'channel_follower_count': int, - } + }, }], 'params': {'extract_flat': True, 'playlist_items': '1'}, 'playlist_mincount': 1, @@ -6977,7 +7179,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'info_dict': { 'id': 'royalty free music', 'title': 'royalty free music', - } + }, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music&sp=EgWKAQIIAWoKEAoQAxAEEAkQBQ%3D%3D', 'playlist_mincount': 30, @@ -6985,7 +7187,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - songs', 'title': 'royalty free music - songs', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }, { 'url': 'https://music.youtube.com/search?q=royalty+free+music#community+playlists', 'playlist_mincount': 30, @@ -6993,7 +7195,7 @@ class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor): 'id': 'royalty free music - community playlists', 'title': 'royalty free music - community playlists', }, - 'params': {'extract_flat': 'in_playlist'} + 'params': {'extract_flat': 'in_playlist'}, }] _SECTIONS = { @@ -7012,7 +7214,7 @@ def _real_extract(self, url): if params: section = next((k for k, v in self._SECTIONS.items() if v == params), params) else: - section = urllib.parse.unquote_plus((url.split('#') + [''])[1]).lower() + section = urllib.parse.unquote_plus(([*url.split('#'), ''])[1]).lower() params = self._SECTIONS.get(section) if not params: section = None @@ -7032,8 +7234,8 @@ def _real_initialize(self): YoutubeBaseInfoExtractor._check_login_required(self) @classproperty - def IE_NAME(self): - return f'youtube:{self._FEED_NAME}' + def IE_NAME(cls): + return f'youtube:{cls._FEED_NAME}' def _real_extract(self, url): return self.url_result( @@ -7094,22 +7296,6 @@ class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): }] -class YoutubeStoriesIE(InfoExtractor): - IE_DESC = 'YouTube channel stories; "ytstories:" prefix' - IE_NAME = 'youtube:stories' - _VALID_URL = r'ytstories:UC(?P[A-Za-z0-9_-]{21}[AQgw])$' - _TESTS = [{ - 'url': 'ytstories:UCwFCb4jeqaKWnciAYM-ZVHg', - 'only_matching': True, - }] - - def _real_extract(self, url): - playlist_id = f'RLTD{self._match_id(url)}' - return self.url_result( - smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}), - ie=YoutubeTabIE, video_id=playlist_id) - - class YoutubeShortsAudioPivotIE(InfoExtractor): IE_DESC = 'YouTube Shorts audio pivot (Shorts using audio of a given video)' IE_NAME = 'youtube:shorts:pivot:audio' @@ -7217,7 +7403,7 @@ class YoutubeClipIE(YoutubeTabBaseInfoExtractor): 'chapters': 'count:20', 'comment_count': int, 'heatmap': 'count:100', - } + }, }] def _real_extract(self, url):