[youtube] Populate `thumbnail` with the best "known" thumbnail

[yt-dlp.git] / yt_dlp / extractor / youtube.py
diff --git a/yt_dlp/extractor/youtube.py b/yt_dlp/extractor/youtube.py

index b71cd4292f047f4eae7a4cfaee84e487908a80d9..658b45fe14fe2697b96bb0c86b165f9c92605a7b 100644 (file)
--- a/yt_dlp/extractor/youtube.py
+++ b/yt_dlp/extractor/youtube.py
@@ -695,7 +695,7 @@ def _extract_video(self, renderer):
  
  
  class YoutubeIE(YoutubeBaseInfoExtractor):
-    IE_DESC = 'YouTube.com'
+    IE_DESC = 'YouTube'
      _INVIDIOUS_SITES = (
          # invidious-redirect websites
          r'(?:www\.)?redirect\.invidious\.io',
@@ -2653,7 +2653,7 @@ def feed_entry(name):
  
          # Source is given priority since formats that throttle are given lower source_preference
          # When throttling issue is fully fixed, remove this
-        self._sort_formats(formats, ('quality', 'res', 'fps', 'source', 'codec:vp9.2', 'lang'))
+        self._sort_formats(formats, ('quality', 'res', 'fps', 'hdr:12', 'source', 'codec:vp9.2', 'lang'))
  
          keywords = get_first(video_details, 'keywords', expected_type=list) or []
          if not keywords and webpage:
@@ -2696,29 +2696,28 @@ def feed_entry(name):
              thumbnails.append({
                  'url': thumbnail_url,
              })
+        original_thumbnails = thumbnails.copy()
+
          # The best resolution thumbnails sometimes does not appear in the webpage
          # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
          # List of possible thumbnails - Ref: <https://stackoverflow.com/a/20542029>
-        hq_thumbnail_names = ['maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3']
-        # TODO: Test them also? - For some videos, even these don't exist
-        guaranteed_thumbnail_names = [
+        thumbnail_names = [
+            'maxresdefault', 'hq720', 'sddefault', 'sd1', 'sd2', 'sd3',
              'hqdefault', 'hq1', 'hq2', 'hq3', '0',
              'mqdefault', 'mq1', 'mq2', 'mq3',
              'default', '1', '2', '3'
          ]
-        thumbnail_names = hq_thumbnail_names + guaranteed_thumbnail_names
          n_thumbnail_names = len(thumbnail_names)
-
          thumbnails.extend({
              'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
                  video_id=video_id, name=name, ext=ext,
                  webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
-            '_test_url': name in hq_thumbnail_names,
          } for name in thumbnail_names for ext in ('webp', 'jpg'))
          for thumb in thumbnails:
              i = next((i for i, t in enumerate(thumbnail_names) if f'/{video_id}/{t}' in thumb['url']), n_thumbnail_names)
              thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
          self._remove_duplicate_formats(thumbnails)
+        self._downloader._sort_thumbnails(original_thumbnails)
  
          category = get_first(microformats, 'category') or search_meta('genre')
          channel_id = str_or_none(
@@ -2748,6 +2747,9 @@ def feed_entry(name):
              'title': self._live_title(video_title) if is_live else video_title,
              'formats': formats,
              'thumbnails': thumbnails,
+            # The best thumbnail that we are sure exists. Prevents unnecessary
+            # URL checking if user don't care about getting the best possible thumbnail
+            'thumbnail': traverse_obj(original_thumbnails, (-1, 'url')),
              'description': video_description,
              'upload_date': unified_strdate(
                  get_first(microformats, 'uploadDate')
@@ -3013,7 +3015,7 @@ def process_language(container, base_url, lang_code, sub_name, query):
  
  
  class YoutubeTabIE(YoutubeBaseInfoExtractor):
-    IE_DESC = 'YouTube.com tab'
+    IE_DESC = 'YouTube Tabs'
      _VALID_URL = r'''(?x)
                      https?://
                          (?:\w+\.)?
@@ -4241,7 +4243,7 @@ def get_mobj(url):
  
  
  class YoutubePlaylistIE(InfoExtractor):
-    IE_DESC = 'YouTube.com playlists'
+    IE_DESC = 'YouTube playlists'
      _VALID_URL = r'''(?x)(?:
                          (?:https?://)?
                          (?:\w+\.)?
@@ -4307,9 +4309,7 @@ class YoutubePlaylistIE(InfoExtractor):
      def suitable(cls, url):
          if YoutubeTabIE.suitable(url):
              return False
-        # Hack for lazy extractors until more generic solution is implemented
-        # (see #28780)
-        from .youtube import parse_qs
+        from ..utils import parse_qs
          qs = parse_qs(url)
          if qs.get('v', [None])[0]:
              return False
@@ -4367,7 +4367,7 @@ def _real_extract(self, url):
  
  
  class YoutubeYtUserIE(InfoExtractor):
-    IE_DESC = 'YouTube.com user videos, URL or "ytuser" keyword'
+    IE_DESC = 'YouTube user videos; "ytuser:" prefix'
      _VALID_URL = r'ytuser:(?P<id>.+)'
      _TESTS = [{
          'url': 'ytuser:phihag',
@@ -4383,7 +4383,7 @@ def _real_extract(self, url):
  
  class YoutubeFavouritesIE(YoutubeBaseInfoExtractor):
      IE_NAME = 'youtube:favorites'
-    IE_DESC = 'YouTube.com liked videos, ":ytfav" for short (requires authentication)'
+    IE_DESC = 'YouTube liked videos; ":ytfav" keyword (requires cookies)'
      _VALID_URL = r':ytfav(?:ou?rite)?s?'
      _LOGIN_REQUIRED = True
      _TESTS = [{
@@ -4401,10 +4401,7 @@ def _real_extract(self, url):
  
  
  class YoutubeSearchIE(SearchInfoExtractor, YoutubeTabIE):
-    IE_DESC = 'YouTube.com searches, "ytsearch" keyword'
-    # there doesn't appear to be a real limit, for example if you search for
-    # 'python' you get more than 8.000.000 results
-    _MAX_RESULTS = float('inf')
+    IE_DESC = 'YouTube searches'
      IE_NAME = 'youtube:search'
      _SEARCH_KEY = 'ytsearch'
      _SEARCH_PARAMS = None
@@ -4464,13 +4461,14 @@ def _search_results(self, query):
  class YoutubeSearchDateIE(YoutubeSearchIE):
      IE_NAME = YoutubeSearchIE.IE_NAME + ':date'
      _SEARCH_KEY = 'ytsearchdate'
-    IE_DESC = 'YouTube.com searches, newest videos first, "ytsearchdate" keyword'
+    IE_DESC = 'YouTube searches, newest videos first'
      _SEARCH_PARAMS = 'CAI%3D'
  
  
  class YoutubeSearchURLIE(YoutubeSearchIE):
-    IE_DESC = 'YouTube.com search URLs'
+    IE_DESC = 'YouTube search URLs with sorting and filter support'
      IE_NAME = YoutubeSearchIE.IE_NAME + '_url'
+    _SEARCH_KEY = None
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/results\?(.*?&)?(?:search_query|q)=(?:[^&]+)(?:[&]|$)'
      # _MAX_RESULTS = 100
      _TESTS = [{
@@ -4516,7 +4514,7 @@ def _real_extract(self, url):
  
  class YoutubeWatchLaterIE(InfoExtractor):
      IE_NAME = 'youtube:watchlater'
-    IE_DESC = 'Youtube watch later list, ":ytwatchlater" for short (requires authentication)'
+    IE_DESC = 'Youtube watch later list; ":ytwatchlater" keyword (requires cookies)'
      _VALID_URL = r':ytwatchlater'
      _TESTS = [{
          'url': ':ytwatchlater',
@@ -4529,7 +4527,7 @@ def _real_extract(self, url):
  
  
  class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com recommended videos, ":ytrec" for short (requires authentication)'
+    IE_DESC = 'YouTube recommended videos; ":ytrec" keyword'
      _VALID_URL = r'https?://(?:www\.)?youtube\.com/?(?:[?#]|$)|:ytrec(?:ommended)?'
      _FEED_NAME = 'recommended'
      _LOGIN_REQUIRED = False
@@ -4546,7 +4544,7 @@ class YoutubeRecommendedIE(YoutubeFeedsInfoExtractor):
  
  
  class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'YouTube.com subscriptions feed, ":ytsubs" for short (requires authentication)'
+    IE_DESC = 'YouTube subscriptions feed; ":ytsubs" keyword (requires cookies)'
      _VALID_URL = r':ytsub(?:scription)?s?'
      _FEED_NAME = 'subscriptions'
      _TESTS = [{
@@ -4559,7 +4557,7 @@ class YoutubeSubscriptionsIE(YoutubeFeedsInfoExtractor):
  
  
  class YoutubeHistoryIE(YoutubeFeedsInfoExtractor):
-    IE_DESC = 'Youtube watch history, ":ythis" for short (requires authentication)'
+    IE_DESC = 'Youtube watch history; ":ythis" keyword (requires cookies)'
      _VALID_URL = r':ythis(?:tory)?'
      _FEED_NAME = 'history'
      _TESTS = [{