class YoutubeIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com'
+ IE_DESC = 'YouTube'
_INVIDIOUS_SITES = (
# invidious-redirect websites
r'(?:www\.)?redirect\.invidious\.io',
raise ExtractorError('Cannot identify player %r' % player_url)
return id_m.group('id')
- def _load_player(self, video_id, player_url, fatal=True) -> bool:
+ def _load_player(self, video_id, player_url, fatal=True):
player_id = self._extract_player_info(player_url)
if player_id not in self._code_cache:
code = self._download_webpage(
errnote='Download of %s failed' % player_url)
if code:
self._code_cache[player_id] = code
- return player_id in self._code_cache
+ return self._code_cache.get(player_id)
def _extract_signature_function(self, video_id, player_url, example_sig):
player_id = self._extract_player_info(player_url)
if cache_spec is not None:
return lambda s: ''.join(s[i] for i in cache_spec)
- if self._load_player(video_id, player_url):
- code = self._code_cache[player_id]
+ code = self._load_player(video_id, player_url)
+ if code:
res = self._parse_sig_js(code)
test_string = ''.join(map(compat_chr, range(len(example_sig))))
return res
def _print_sig_code(self, func, example_sig):
+ if not self.get_param('youtube_print_sig_code'):
+ return
+
def gen_sig_code(idxs):
def _genslice(start, end, step):
starts = '' if start == 0 else str(start)
)
self._player_cache[player_id] = func
func = self._player_cache[player_id]
- if self.get_param('youtube_print_sig_code'):
- self._print_sig_code(func, s)
+ self._print_sig_code(func, s)
return func(s)
except Exception as e:
- tb = traceback.format_exc()
- raise ExtractorError(
- 'Signature extraction failed: ' + tb, cause=e)
+ raise ExtractorError('Signature extraction failed: ' + traceback.format_exc(), cause=e)
+
+ def _decrypt_nsig(self, s, video_id, player_url):
+ """Turn the encrypted n field into a working signature"""
+ if player_url is None:
+ raise ExtractorError('Cannot decrypt nsig without player_url')
+ if player_url.startswith('//'):
+ player_url = 'https:' + player_url
+ elif not re.match(r'https?://', player_url):
+ player_url = compat_urlparse.urljoin(
+ 'https://www.youtube.com', player_url)
+
+ sig_id = ('nsig_value', s)
+ if sig_id in self._player_cache:
+ return self._player_cache[sig_id]
+
+ try:
+ player_id = ('nsig', player_url)
+ if player_id not in self._player_cache:
+ self._player_cache[player_id] = self._extract_n_function(video_id, player_url)
+ func = self._player_cache[player_id]
+ self._player_cache[sig_id] = func(s)
+ self.write_debug(f'Decrypted nsig {s} => {self._player_cache[sig_id]}')
+ return self._player_cache[sig_id]
+ except Exception as e:
+ raise ExtractorError(traceback.format_exc(), cause=e)
+
+ def _extract_n_function_name(self, jscode):
+ return self._search_regex(
+ (r'\.get\("n"\)\)&&\(b=(?P<nfunc>[a-zA-Z0-9$]{3})\([a-zA-Z0-9]\)',),
+ jscode, 'Initial JS player n function name', group='nfunc')
+
+ def _extract_n_function(self, video_id, player_url):
+ player_id = self._extract_player_info(player_url)
+ func_code = self._downloader.cache.load('youtube-nsig', player_id)
+
+ if func_code:
+ jsi = JSInterpreter(func_code)
+ else:
+ jscode = self._load_player(video_id, player_url)
+ funcname = self._extract_n_function_name(jscode)
+ jsi = JSInterpreter(jscode)
+ func_code = jsi.extract_function_code(funcname)
+ self._downloader.cache.store('youtube-nsig', player_id, func_code)
+
+ if self.get_param('youtube_print_sig_code'):
+ self.to_screen(f'Extracted nsig function from {player_id}:\n{func_code[1]}\n')
+
+ return lambda s: jsi.extract_function_from_code(*func_code)([s])
def _extract_signature_timestamp(self, video_id, player_url, ytcfg=None, fatal=False):
"""
raise ExtractorError(error_msg)
self.report_warning(error_msg)
return
- if self._load_player(video_id, player_url, fatal=fatal):
- player_id = self._extract_player_info(player_url)
- code = self._code_cache[player_id]
+ code = self._load_player(video_id, player_url, fatal=fatal)
+ if code:
sts = int_or_none(self._search_regex(
r'(?:signatureTimestamp|sts)\s*:\s*(?P<sts>[0-9]{5})', code,
'JS player signature timestamp', group='sts', fatal=fatal))
return prs, player_url
def _extract_formats(self, streaming_data, video_id, player_url, is_live):
- itags, stream_ids = [], []
+ itags, stream_ids = {}, []
itag_qualities, res_qualities = {}, {}
q = qualities([
# Normally tiny is the smallest video-only formats. But
sp = try_get(sc, lambda x: x['sp'][0]) or 'signature'
fmt_url += '&' + sp + '=' + signature
+ query = parse_qs(fmt_url)
+ throttled = False
+ if query.get('ratebypass') != ['yes'] and query.get('n'):
+ try:
+ fmt_url = update_url_query(fmt_url, {
+ 'n': self._decrypt_nsig(query['n'][0], video_id, player_url)})
+ except ExtractorError as e:
+ self.report_warning(f'nsig extraction failed: You may experience throttling for some formats\n{e}', only_once=True)
+ throttled = True
+
if itag:
- itags.append(itag)
+ itags[itag] = 'https'
stream_ids.append(stream_id)
tbr = float_or_none(
'format_note': ', '.join(filter(None, (
'%s%s' % (audio_track.get('displayName') or '',
' (default)' if audio_track.get('audioIsDefault') else ''),
- fmt.get('qualityLabel') or quality.replace('audio_quality_', '')))),
+ fmt.get('qualityLabel') or quality.replace('audio_quality_', ''),
+ throttled and 'THROTTLED'))),
+ 'source_preference': -10 if not throttled else -1,
'fps': int_or_none(fmt.get('fps')),
'height': height,
'quality': q(quality),
and 'dash' not in skip_manifests and self.get_param('youtube_include_dash_manifest', True))
get_hls = 'hls' not in skip_manifests and self.get_param('youtube_include_hls_manifest', True)
- def guess_quality(f):
- for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities)):
- if val in qdict:
- return q(qdict[val])
- return -1
+ def process_manifest_format(f, proto, itag):
+ if itag in itags:
+ if itags[itag] == proto or f'{itag}-{proto}' in itags:
+ return False
+ itag = f'{itag}-{proto}'
+ if itag:
+ f['format_id'] = itag
+ itags[itag] = proto
+
+ f['quality'] = next((
+ q(qdict[val])
+ for val, qdict in ((f.get('format_id'), itag_qualities), (f.get('height'), res_qualities))
+ if val in qdict), -1)
+ return True
for sd in streaming_data:
hls_manifest_url = get_hls and sd.get('hlsManifestUrl')
if hls_manifest_url:
for f in self._extract_m3u8_formats(hls_manifest_url, video_id, 'mp4', fatal=False):
- itag = self._search_regex(
- r'/itag/(\d+)', f['url'], 'itag', default=None)
- if itag in itags:
- itag += '-hls'
- if itag in itags:
- continue
- if itag:
- f['format_id'] = itag
- itags.append(itag)
- f['quality'] = guess_quality(f)
- yield f
+ if process_manifest_format(f, 'hls', self._search_regex(
+ r'/itag/(\d+)', f['url'], 'itag', default=None)):
+ yield f
dash_manifest_url = get_dash and sd.get('dashManifestUrl')
if dash_manifest_url:
for f in self._extract_mpd_formats(dash_manifest_url, video_id, fatal=False):
- itag = f['format_id']
- if itag in itags:
- itag += '-dash'
- if itag in itags:
- continue
- if itag:
- f['format_id'] = itag
- itags.append(itag)
- f['quality'] = guess_quality(f)
- filesize = int_or_none(self._search_regex(
- r'/clen/(\d+)', f.get('fragment_base_url')
- or f['url'], 'file size', default=None))
- if filesize:
- f['filesize'] = filesize
- yield f
+ if process_manifest_format(f, 'dash', f['format_id']):
+ f['filesize'] = int_or_none(self._search_regex(
+ r'/clen/(\d+)', f.get('fragment_base_url') or f['url'], 'file size', default=None))
+ yield f
def _real_extract(self, url):
url, smuggled_data = unsmuggle_url(url, {})
if reason:
self.raise_no_formats(reason, expected=True)
- for f in formats:
- if '&c=WEB&' in f['url'] and '&ratebypass=yes&' not in f['url']: # throttled
- f['source_preference'] = -10
- # TODO: this method is not reliable
- f['format_note'] = format_field(f, 'format_note', '%s ') + '(maybe throttled)'
-
# Source is given priority since formats that throttle are given lower source_preference
# When throttling issue is fully fixed, remove this
self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang'))
thumbnails.append({
'url': thumbnail_url,
})
+ original_thumbnails = thumbnails.copy()
+
# The best resolution thumbnails sometimes does not appear in the webpage
# See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
# List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
'default', '1', '2', '3'
]
n_thumbnail_names = len(thumbnail_names)
-
thumbnails.extend({
'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
video_id=video_id, name=name, ext=ext,
i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
self._remove_duplicate_formats(thumbnails)
+ self._downloader._sort_thumbnails(original_thumbnails)
category = get_first(microformats, 'category') or search_meta('genre')
channel_id = str_or_none(
'title': self._live_title(video_title) if is_live else video_title,
'formats': formats,
'thumbnails': thumbnails,
+ # The best thumbnail that we are sure exists. Prevents unnecessary
+ # URL checking if user don't care about getting the best possible thumbnail
+ 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
'description': video_description,
'upload_date': unified_strdate(
get_first(microformats, 'uploadDate')
class YoutubeTabIE(YoutubeBaseInfoExtractor):
- IE_DESC = 'YouTube.com tab'
+ IE_DESC = 'YouTube Tabs'
_VALID_URL = r'''(?x)
https?://
(?:\w+\.)?
class YoutubePlaylistIE(InfoExtractor):
- IE_DESC = 'YouTube.com playlists'
+ IE_DESC = 'YouTube playlists'
_VALID_URL = r'''(?x)(?:
(?:https?://)?
(?:\w+\.)?
class YoutubeYtUserIE(InfoExtractor):
- IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
+ IE_DESC = 'YouTube user videos; "ytuser:" prefix'
_VALID_URL = r'ytuser:(?P<id>.+)'
_TESTS = [{
'url': 'ytuser:phihag',
class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
IE_NAME = 'youtube:favorites'
- IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+ IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
_VALID_URL = r':ytfav(?:ou?rite)?s?'
_LOGIN_REQUIRED = True
_TESTS = [{
class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
- IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
- # there doesn't appear to be a real limit, for example if you search for
- # 'python' you get more than 8.000.000 results
- _MAX_RESULTS = float('inf')
+ IE_DESC = 'YouTube searches'
IE_NAME = 'youtube:search'
_SEARCH_KEY = 'ytsearch'
_SEARCH_PARAMS = None
class YoutubeSearchDateIE(YoutubeSearchIE):
IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
_SEARCH_KEY = 'ytsearchdate'
- IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
+ IE_DESC = 'YouTube searches, newest videos first'
_SEARCH_PARAMS = 'CAI%3D'
class YoutubeSearchURLIE(YoutubeSearchIE):
- IE_DESC = 'YouTube.com search URLs'
+ IE_DESC = 'YouTube search URLs with sorting and filter support'
IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+ _SEARCH_KEY = None
_VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
# _MAX_RESULTS = 100
_TESTS = [{
class YoutubeWatchLaterIE(InfoExtractor):
IE_NAME = 'youtube:watchlater'
- IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+ IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
_VALID_URL = r':ytwatchlater'
_TESTS = [{
'url': ':ytwatchlater',
class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+ IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
_VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
_FEED_NAME = 'recommended'
_LOGIN_REQUIRED = False
class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
+ IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
_VALID_URL = r':ytsub(?:scription)?s?'
_FEED_NAME = 'subscriptions'
_TESTS = [{
class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
- IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
+ IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
_VALID_URL = r':ythis(?:tory)?'
_FEED_NAME = 'history'
_TESTS = [{