import urllib.parse
from .common import InfoExtractor, SearchInfoExtractor
+from .openload import PhantomJSwrapper
from ..compat import functools
from ..jsinterp import JSInterpreter
from ..utils import (
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID',
- 'clientVersion': '17.28.34',
+ 'clientVersion': '17.31.35',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 3,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_EMBEDDED_PLAYER',
- 'clientVersion': '17.28.34',
+ 'clientVersion': '17.31.35',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.youtube/17.31.35 (Linux; U; Android 11) gzip'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 55,
'client': {
'clientName': 'ANDROID_MUSIC',
'clientVersion': '5.16.51',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.music/5.16.51 (Linux; U; Android 11) gzip'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 21,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'ANDROID_CREATOR',
- 'clientVersion': '22.28.100',
+ 'clientVersion': '22.30.100',
+ 'androidSdkVersion': 30,
+ 'userAgent': 'com.google.android.apps.youtube.creator/22.30.100 (Linux; U; Android 11) gzip'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 14,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS',
- 'clientVersion': '17.30.1',
+ 'clientVersion': '17.33.2',
'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
}
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 5,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MESSAGES_EXTENSION',
- 'clientVersion': '17.30.1',
+ 'clientVersion': '17.33.2',
'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtube/17.33.2 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 66,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_MUSIC',
- 'clientVersion': '5.18',
+ 'clientVersion': '5.21',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.youtubemusic/5.21 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 26,
'INNERTUBE_CONTEXT': {
'client': {
'clientName': 'IOS_CREATOR',
- 'clientVersion': '22.29.101',
+ 'clientVersion': '22.33.101',
+ 'deviceModel': 'iPhone14,3',
+ 'userAgent': 'com.google.ios.ytcreator/22.33.101 (iPhone14,3; U; CPU iOS 15_6 like Mac OS X)'
},
},
'INNERTUBE_CONTEXT_CLIENT_NAME': 15,
'Origin': origin,
'X-Youtube-Identity-Token': identity_token or self._extract_identity_token(ytcfg),
'X-Goog-PageId': account_syncid or self._extract_account_syncid(ytcfg),
- 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg)
+ 'X-Goog-Visitor-Id': visitor_data or self._extract_visitor_data(ytcfg),
+ 'User-Agent': self._ytcfg_get_safe(ytcfg, lambda x: x['INNERTUBE_CONTEXT']['client']['userAgent'], default_client=default_client)
}
if session_index is None:
session_index = self._extract_session_index(ytcfg)
# Youtube sometimes sends incomplete data
# See: https://github.com/ytdl-org/youtube-dl/issues/28194
if not traverse_obj(response, *variadic(check_get_keys)):
- retry.error = ExtractorError('Incomplete data received')
+ retry.error = ExtractorError('Incomplete data received', expected=True)
continue
return response
else None),
'live_status': ('is_upcoming' if scheduled_timestamp is not None
else 'was_live' if 'streamed' in time_text.lower()
- else 'is_live' if overlay_style is not None and overlay_style == 'LIVE' or 'live now' in badges
+ else 'is_live' if overlay_style == 'LIVE' or 'live now' in badges
else None),
'release_timestamp': scheduled_timestamp,
'availability': self._availability(needs_premium='premium' in badges, needs_subscription='members only' in badges)
'tags': [],
'uploader_url': 'http://www.youtube.com/user/nao20010128nao',
}
+ }, {
+ 'note': '6 channel audio',
+ 'url': 'https://www.youtube.com/watch?v=zgdo7-RRjgo',
+ 'only_matching': True,
}
]
assert os.path.basename(func_id) == func_id
self.write_debug(f'Extracting signature function {func_id}')
- cache_spec = self.cache.load('youtube-sigfuncs', func_id)
- if cache_spec is not None:
- return lambda s: ''.join(s[i] for i in cache_spec)
+ cache_spec, code = self.cache.load('youtube-sigfuncs', func_id), None
- code = self._load_player(video_id, player_url)
+ if not cache_spec:
+ code = self._load_player(video_id, player_url)
if code:
res = self._parse_sig_js(code)
-
test_string = ''.join(map(chr, range(len(example_sig))))
- cache_res = res(test_string)
- cache_spec = [ord(c) for c in cache_res]
-
+ cache_spec = [ord(c) for c in res(test_string)]
self.cache.store('youtube-sigfuncs', func_id, cache_spec)
- return res
+
+ return lambda s: ''.join(s[i] for i in cache_spec)
def _print_sig_code(self, func, example_sig):
if not self.get_param('youtube_print_sig_code'):
initial_function = jsi.extract_function(funcname)
return lambda s: initial_function([s])
+ def _cached(self, func, *cache_id):
+ def inner(*args, **kwargs):
+ if cache_id not in self._player_cache:
+ try:
+ self._player_cache[cache_id] = func(*args, **kwargs)
+ except ExtractorError as e:
+ self._player_cache[cache_id] = e
+ except Exception as e:
+ self._player_cache[cache_id] = ExtractorError(traceback.format_exc(), cause=e)
+
+ ret = self._player_cache[cache_id]
+ if isinstance(ret, Exception):
+ raise ret
+ return ret
+ return inner
+
def _decrypt_signature(self, s, video_id, player_url):
"""Turn the encrypted s field into a working signature"""
- try:
- player_id = (player_url, self._signature_cache_id(s))
- if player_id not in self._player_cache:
- func = self._extract_signature_function(video_id, player_url, s)
- self._player_cache[player_id] = func
- func = self._player_cache[player_id]
- self._print_sig_code(func, s)
- return func(s)
- except Exception as e:
- raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
+ extract_sig = self._cached(
+ self._extract_signature_function, 'sig', player_url, self._signature_cache_id(s))
+ func = extract_sig(video_id, player_url, s)
+ self._print_sig_code(func, s)
+ return func(s)
def _decrypt_nsig(self, s, video_id, player_url):
"""Turn the encrypted n field into a working signature"""
raise ExtractorError('Cannot decrypt nsig without player_url')
player_url = urljoin('https://www.youtube.com', player_url)
- sig_id = ('nsig_value', s)
- if sig_id in self._player_cache:
- return self._player_cache[sig_id]
+ jsi, player_id, func_code = self._extract_n_function_code(video_id, player_url)
+ if self.get_param('youtube_print_sig_code'):
+ self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
try:
- player_id = ('nsig', player_url)
- if player_id not in self._player_cache:
- self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
- func = self._player_cache[player_id]
- self._player_cache[sig_id] = func(s)
- self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
- return self._player_cache[sig_id]
- except Exception as e:
- raise ExtractorError(traceback.format_exc(), cause=e, video_id=video_id)
+ extract_nsig = self._cached(self._extract_n_function_from_code, 'nsig func', player_url)
+ ret = extract_nsig(jsi, func_code)(s)
+ except JSInterpreter.Exception as e:
+ try:
+ jsi = PhantomJSwrapper(self, timeout=5000)
+ except ExtractorError:
+ raise e
+ self.report_warning(
+ f'Native nsig extraction failed: Trying with PhantomJS\n'
+ f' n = {s} ; player = {player_url}', video_id)
+ self.write_debug(e)
+
+ args, func_body = func_code
+ ret = jsi.execute(
+ f'console.log(function({", ".join(args)}) {{ {func_body} }}({s!r}));',
+ video_id=video_id, note='Executing signature code').strip()
+
+ self.write_debug(f'Decrypted nsig {s} => {ret}')
+ return ret
def _extract_n_function_name(self, jscode):
- nfunc, idx = self._search_regex(
+ funcname, idx = self._search_regex(
r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]+)(?:\[(?P<idx>\d+)\])?\([a-zA-Z0-9]\)',
jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
if not idx:
- return nfunc
+ return funcname
+
return json.loads(js_to_json(self._search_regex(
- rf'var {re.escape(nfunc)}\s*=\s*(\[.+?\]);', jscode,
- f'Initial JS player n function list ({nfunc}.{idx})')))[int(idx)]
+ rf'var {re.escape(funcname)}\s*=\s*(\[.+?\]);', jscode,
+ f'Initial JS player n function list ({funcname}.{idx})')))[int(idx)]
- def _extract_n_function(self, video_id, player_url):
+ def _extract_n_function_code(self, video_id, player_url):
player_id = self._extract_player_info(player_url)
- func_code = self.cache.load('youtube-nsig', player_id)
+ func_code = self.cache.load('youtube-nsig', player_id, min_ver='2022.08.19.2')
+ jscode = func_code or self._load_player(video_id, player_url)
+ jsi = JSInterpreter(jscode)
if func_code:
- jsi = JSInterpreter(func_code)
- else:
- jscode = self._load_player(video_id, player_url)
- funcname = self._extract_n_function_name(jscode)
- jsi = JSInterpreter(jscode)
- func_code = jsi.extract_function_code(funcname)
- self.cache.store('youtube-nsig', player_id, func_code)
+ return jsi, player_id, func_code
- if self.get_param('youtube_print_sig_code'):
- self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
+ func_code = jsi.extract_function_code(self._extract_n_function_name(jscode))
+ self.cache.store('youtube-nsig', player_id, func_code)
+ return jsi, player_id, func_code
+
+ def _extract_n_function_from_code(self, jsi, func_code):
+ func = jsi.extract_function_from_code(*func_code)
- return lambda s: jsi.extract_function_from_code(*func_code)([s])
+ def extract_nsig(s):
+ try:
+ ret = func([s])
+ except JSInterpreter.Exception:
+ raise
+ except Exception as e:
+ raise JSInterpreter.Exception(traceback.format_exc(), cause=e)
+
+ if ret.startswith('enhanced_except_'):
+ raise JSInterpreter.Exception('Signature function returned an exception')
+ return ret
+
+ return extract_nsig
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
def _is_unplayable(player_response):
return traverse_obj(player_response, ('playabilityStatus', 'status')) == 'UNPLAYABLE'
- def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr):
+ _STORY_PLAYER_PARAMS = '8AEB'
+
+ def _extract_player_response(self, client, video_id, master_ytcfg, player_ytcfg, player_url, initial_pr, smuggled_data):
session_index = self._extract_session_index(player_ytcfg, master_ytcfg)
syncid = self._extract_account_syncid(player_ytcfg, master_ytcfg, initial_pr)
yt_query = {
'videoId': video_id,
- 'params': '8AEB' # enable stories
}
+ if smuggled_data.get('is_story') or _split_innertube_client(client)[0] == 'android':
+ yt_query['params'] = self._STORY_PLAYER_PARAMS
+
yt_query.update(self._generate_player_context(sts))
return self._extract_response(
item_id=video_id, ep='player', query=yt_query,
return orderedSet(requested_clients)
- def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg):
+ def _extract_player_responses(self, clients, video_id, webpage, master_ytcfg, smuggled_data):
initial_pr = None
if webpage:
initial_pr = self._search_json(
try:
pr = initial_pr if client == 'web' and initial_pr else self._extract_player_response(
- client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr)
+ client, video_id, player_ytcfg or master_ytcfg, player_ytcfg, player_url if require_js_player else None, initial_pr, smuggled_data)
except ExtractorError as e:
if last_error:
self.report_warning(last_error)
continue
if pr:
- prs.append(pr)
+ # YouTube may return a different video player response than expected.
+ # See: https://github.com/TeamNewPipe/NewPipe/issues/8713
+ pr_video_id = traverse_obj(pr, ('videoDetails', 'videoId'))
+ if pr_video_id and pr_video_id != video_id:
+ self.report_warning(
+ f'Skipping player response from {client} client (got player response for video "{pr_video_id}" instead of "{video_id}")' + bug_reports_message())
+ else:
+ prs.append(pr)
# creator clients can bypass AGE_VERIFICATION_REQUIRED if logged in
if variant == 'embedded' and self._is_unplayable(pr) and self.is_authenticated:
def _extract_formats_and_subtitles(self, streaming_data, video_id, player_url, is_live, duration):
itags, stream_ids = {}, []
- itag_qualities, res_qualities = {}, {}
+ itag_qualities, res_qualities = {}, {0: None}
q = qualities([
# Normally tiny is the smallest video-only formats. But
# audio-only formats with unknown quality may get tagged as tiny
self._decrypt_signature(encrypted_sig, video_id, player_url)
)
except ExtractorError as e:
- self.report_warning('Signature extraction failed: Some formats may be missing', only_once=True)
+ self.report_warning('Signature extraction failed: Some formats may be missing',
+ video_id=video_id, only_once=True)
self.write_debug(e, only_once=True)
continue
throttled = False
if query.get('n'):
try:
+ decrypt_nsig = self._cached(self._decrypt_nsig, 'nsig', query['n'][0])
fmt_url = update_url_query(fmt_url, {
- 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
+ 'n': decrypt_nsig(query['n'][0], video_id, player_url)
+ })
except ExtractorError as e:
+ phantomjs_hint = ''
+ if isinstance(e, JSInterpreter.Exception):
+ phantomjs_hint = (f' Install {self._downloader._format_err("PhantomJS", self._downloader.Styles.EMPHASIS)} '
+ f'to workaround the issue. {PhantomJSwrapper.INSTALL_HINT}\n')
self.report_warning(
- 'nsig extraction failed: You may experience throttling for some formats\n'
- f'n = {query["n"][0]} ; player = {player_url}', only_once=True)
+ f'nsig extraction failed: You may experience throttling for some formats\n{phantomjs_hint}'
+ f' n = {query["n"][0]} ; player = {player_url}', video_id=video_id, only_once=True)
self.write_debug(e, only_once=True)
throttled = True
else -10 if 'descriptive' in (audio_track.get('displayName') or '').lower() and -10
else -1)
# Some formats may have much smaller duration than others (possibly damaged during encoding)
- # Eg: 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
+ # E.g. 2-nOtRESiUc Ref: https://github.com/yt-dlp/yt-dlp/issues/2823
# Make sure to avoid false positives with small duration differences.
- # Eg: __2ABJjxzNo, ySuUZEjARPY
+ # E.g. __2ABJjxzNo, ySuUZEjARPY
is_damaged = try_get(fmt, lambda x: float(x['approxDurationMs']) / duration < 500)
if is_damaged:
self.report_warning(
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if language_preference > 0 else ''),
fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
+ try_get(fmt, lambda x: x['projectionType'].replace('RECTANGULAR', '').lower()),
+ try_get(fmt, lambda x: x['spatialAudioType'].replace('SPATIAL_AUDIO_TYPE_', '').lower()),
throttled and 'THROTTLED', is_damaged and 'DAMAGED', delim=', '),
# Format 22 is likely to be damaged. See https://github.com/yt-dlp/yt-dlp/issues/3372
'source_preference': -10 if throttled else -5 if itag == '22' else -1,
'fps': int_or_none(fmt.get('fps')) or None,
+ 'audio_channels': fmt.get('audioChannels'),
'height': height,
'quality': q(quality),
'has_drm': bool(fmt.get('drmFamilies')),
f['format_id'] = itag
itags[itag] = proto
- f['quality'] = next((
- q(qdict[val])
- for val, qdict in ((f.get('format_id', '').split('-')[0], itag_qualities), (f.get('height'), res_qualities))
- if val in qdict), -1)
+ f['quality'] = q(itag_qualities.get(try_get(f, lambda f: f['format_id'].split('-')[0]), -1))
+ if f['quality'] == -1 and f.get('height'):
+ f['quality'] = q(res_qualities[min(res_qualities, key=lambda x: abs(x - f['height']))])
return True
subtitles = {}
def _download_player_responses(self, url, smuggled_data, video_id, webpage_url):
webpage = None
if 'webpage' not in self._configuration_arg('player_skip'):
+ query = {'bpctr': '9999999999', 'has_verified': '1'}
+ if smuggled_data.get('is_story'):
+ query['pp'] = self._STORY_PLAYER_PARAMS
webpage = self._download_webpage(
- webpage_url + '&bpctr=9999999999&has_verified=1&pp=8AEB', video_id, fatal=False)
+ webpage_url, video_id, fatal=False, query=query)
master_ytcfg = self.extract_ytcfg(video_id, webpage) or self._get_default_ytcfg()
player_responses, player_url = self._extract_player_responses(
self._get_requested_clients(url, smuggled_data),
- video_id, webpage, master_ytcfg)
+ video_id, webpage, master_ytcfg, smuggled_data)
return webpage, master_ytcfg, player_responses, player_url
formats.extend(self._extract_storyboard(player_responses, duration))
# source_preference is lower for throttled/potentially damaged formats
- self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang', 'proto'))
+ self._sort_formats(formats, (
+ 'quality', 'res', 'fps', 'hdr:12', 'source', 'vcodec:vp9.2', 'channels', 'acodec', 'lang', 'proto'))
info = {
'id': video_id,
class YoutubeMusicSearchURLIE(YoutubeTabBaseInfoExtractor):
- IE_DESC = 'YouTube music search URLs with selectable sections (Eg: #songs)'
+ IE_DESC = 'YouTube music search URLs with selectable sections, e.g. #songs'
IE_NAME = 'youtube:music:search_url'
_VALID_URL = r'https?://music\.youtube\.com/search\?([^#]+&)?(?:search_query|q)=(?:[^&]+)(?:[&#]|$)'
_TESTS = [{
def _real_extract(self, url):
playlist_id = f'RLTD{self._match_id(url)}'
return self.url_result(
- f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1',
+ smuggle_url(f'https://www.youtube.com/playlist?list={playlist_id}&playnext=1', {'is_story': True}),
ie=YoutubeTabIE, video_id=playlist_id)