X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/24b0a72b302a8ba67eb7301911d8fedfa90f0ecc..fccf502118466bbfde7c5c6dd0279f0dfdb1311c:/yt_dlp/extractor/youtube.py diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py index b71cd4292..658b45fe1 100644 --- a/yt_dlp/extractor/youtube.py +++ b/yt_dlp/extractor/youtube.py @@ -695,7 +695,7 @@ def _extract_video(self, renderer): class YoutubeIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com' + IE_DESC = 'YouTube' _INVIDIOUS_SITES = ( # invidious-redirect websites r'(?:www\.)?redirect\.invidious\.io', @@ -2653,7 +2653,7 @@ def feed_entry(name): # Source is given priority since formats that throttle are given lower source_preference # When throttling issue is fully fixed, remove this - self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang')) + self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang')) keywords = get_first(video_details, 'keywords', expected_type=list) or [] if not keywords and webpage: @@ -2696,29 +2696,28 @@ def feed_entry(name): thumbnails.append({ 'url': thumbnail_url, }) + original_thumbnails = thumbnails.copy() + # The best resolution thumbnails sometimes does not appear in the webpage # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340 # List of possible thumbnails - Ref: - hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3'] - # TODO: Test them also? - For some videos, even these don't exist - guaranteed_thumbnail_names = [ + thumbnail_names = [ + 'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3', 'hqdefault', 'hq1', 'hq2', 'hq3', '0', 'mqdefault', 'mq1', 'mq2', 'mq3', 'default', '1', '2', '3' ] - thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names n_thumbnail_names = len(thumbnail_names) - thumbnails.extend({ 'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format( video_id=video_id, name=name, ext=ext, webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''), - '_test_url': name in hq_thumbnail_names, } for name in thumbnail_names for ext in ('webp', 'jpg')) for thumb in thumbnails: i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names) thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i) self._remove_duplicate_formats(thumbnails) + self._downloader._sort_thumbnails(original_thumbnails) category = get_first(microformats, 'category') or search_meta('genre') channel_id = str_or_none( @@ -2748,6 +2747,9 @@ def feed_entry(name): 'title': self._live_title(video_title) if is_live else video_title, 'formats': formats, 'thumbnails': thumbnails, + # The best thumbnail that we are sure exists. Prevents unnecessary + # URL checking if user don't care about getting the best possible thumbnail + 'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')), 'description': video_description, 'upload_date': unified_strdate( get_first(microformats, 'uploadDate') @@ -3013,7 +3015,7 @@ def process_language(container, base_url, lang_code, sub_name, query): class YoutubeTabIE(YoutubeBaseInfoExtractor): - IE_DESC = 'YouTube.com tab' + IE_DESC = 'YouTube Tabs' _VALID_URL = r'''(?x) https?:// (?:\w+\.)? @@ -4241,7 +4243,7 @@ def get_mobj(url): class YoutubePlaylistIE(InfoExtractor): - IE_DESC = 'YouTube.com playlists' + IE_DESC = 'YouTube playlists' _VALID_URL = r'''(?x)(?: (?:https?://)? (?:\w+\.)? @@ -4307,9 +4309,7 @@ class YoutubePlaylistIE(InfoExtractor): def suitable(cls, url): if YoutubeTabIE.suitable(url): return False - # Hack for lazy extractors until more generic solution is implemented - # (see #28780) - from .youtube import parse_qs + from ..utils import parse_qs qs = parse_qs(url) if qs.get('v', [None])[0]: return False @@ -4367,7 +4367,7 @@ def _real_extract(self, url): class YoutubeYtUserIE(InfoExtractor): - IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword' + IE_DESC = 'YouTube user videos; "ytuser:" prefix' _VALID_URL = r'ytuser:(?P.+)' _TESTS = [{ 'url': 'ytuser:phihag', @@ -4383,7 +4383,7 @@ def _real_extract(self, url): class YoutubeFavouritesIE(YoutubeBaseInfoExtractor): IE_NAME = 'youtube:favorites' - IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)' + IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)' _VALID_URL = r':ytfav(?:ou?rite)?s?' _LOGIN_REQUIRED = True _TESTS = [{ @@ -4401,10 +4401,7 @@ def _real_extract(self, url): class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE): - IE_DESC = 'YouTube.com searches, "ytsearch" keyword' - # there doesn't appear to be a real limit, for example if you search for - # 'python' you get more than 8.000.000 results - _MAX_RESULTS = float('inf') + IE_DESC = 'YouTube searches' IE_NAME = 'youtube:search' _SEARCH_KEY = 'ytsearch' _SEARCH_PARAMS = None @@ -4464,13 +4461,14 @@ def _search_results(self, query): class YoutubeSearchDateIE(YoutubeSearchIE): IE_NAME = YoutubeSearchIE.IE_NAME + ':date' _SEARCH_KEY = 'ytsearchdate' - IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword' + IE_DESC = 'YouTube searches, newest videos first' _SEARCH_PARAMS = 'CAI%3D' class YoutubeSearchURLIE(YoutubeSearchIE): - IE_DESC = 'YouTube.com search URLs' + IE_DESC = 'YouTube search URLs with sorting and filter support' IE_NAME = YoutubeSearchIE.IE_NAME + '_url' + _SEARCH_KEY = None _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)' # _MAX_RESULTS = 100 _TESTS = [{ @@ -4516,7 +4514,7 @@ def _real_extract(self, url): class YoutubeWatchLaterIE(InfoExtractor): IE_NAME = 'youtube:watchlater' - IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)' + IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)' _VALID_URL = r':ytwatchlater' _TESTS = [{ 'url': ':ytwatchlater', @@ -4529,7 +4527,7 @@ def _real_extract(self, url): class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)' + IE_DESC = 'YouTube recommended videos; ":ytrec" keyword' _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?' _FEED_NAME = 'recommended' _LOGIN_REQUIRED = False @@ -4546,7 +4544,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor): class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)' + IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)' _VALID_URL = r':ytsub(?:scription)?s?' _FEED_NAME = 'subscriptions' _TESTS = [{ @@ -4559,7 +4557,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor): class YoutubeHistoryIE(YoutubeFeedsInfoExtractor): - IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)' + IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)' _VALID_URL = r':ythis(?:tory)?' _FEED_NAME = 'history' _TESTS = [{