]> jfr.im git - yt-dlp.git/commitdiff
[youtube] Extract more thumbnails
authorpukkandan <redacted>
Thu, 15 Jul 2021 17:19:59 +0000 (22:49 +0530)
committerpukkandan <redacted>
Mon, 19 Jul 2021 21:12:11 +0000 (02:42 +0530)
* The thumbnail URLs are hard-coded and their actual existence is tested lazily
* Added option `--no-check-formats` to not test them

Closes #340, Related: #402, #337, https://github.com/ytdl-org/youtube-dl/issues/29049

README.md
yt_dlp/YoutubeDL.py
yt_dlp/extractor/common.py
yt_dlp/extractor/youtube.py
yt_dlp/options.py

index 8fd327f3e85b930f9ff03edfa73c0e7fefc48f4a..6ff6d93d6cd5dc69b31ee50d150e77b4811cfe6f 100644 (file)
--- a/README.md
+++ b/README.md
@@ -638,7 +638,9 @@ ## Video Format Options:
     --no-prefer-free-formats         Don't give any special preference to free
                                      containers (default)
     --check-formats                  Check that the formats selected are
-                                     actually downloadable (Experimental)
+                                     actually downloadable
+    --no-check-formats               Do not check that the formats selected are
+                                     actually downloadable
     -F, --list-formats               List all available formats of requested
                                      videos
     --merge-output-format FORMAT     If a merge is required (e.g.
index 5b603690c3f5561effca261a7bb20a57c0bb3bd4..d4d1af4fd7dda3decd53eee894f0e28636c117e1 100644 (file)
@@ -209,6 +209,9 @@ class YoutubeDL(object):
                        into a single file
     allow_multiple_audio_streams:   Allow multiple audio streams to be merged
                        into a single file
+    check_formats      Whether to test if the formats are downloadable.
+                       Can be True (check all), False (check none)
+                       or None (check only if requested by extractor)
     paths:             Dictionary of output paths. The allowed keys are 'home'
                        'temp' and the keys of OUTTMPL_TYPES (in utils.py)
     outtmpl:           Dictionary of templates for output names. Allowed keys
@@ -1944,15 +1947,24 @@ def _sanitize_thumbnails(self, info_dict):
                 t.get('id') if t.get('id') is not None else '',
                 t.get('url')))
 
-            def test_thumbnail(t):
-                self.to_screen('[info] Testing thumbnail %s' % t['id'])
-                try:
-                    self.urlopen(HEADRequest(t['url']))
-                except network_exceptions as err:
-                    self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
-                        t['id'], t['url'], error_to_compat_str(err)))
-                    return False
-                return True
+            def thumbnail_tester():
+                if self.params.get('check_formats'):
+                    def to_screen(msg):
+                        return self.to_screen(f'[info] {msg}')
+                else:
+                    to_screen = self.write_debug
+
+                def test_thumbnail(t):
+                    to_screen('Testing thumbnail %s' % t['id'])
+                    try:
+                        self.urlopen(HEADRequest(t['url']))
+                    except network_exceptions as err:
+                        to_screen('Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
+                            t['id'], t['url'], error_to_compat_str(err)))
+                        return False
+                    return True
+
+                return test_thumbnail
 
             for i, t in enumerate(thumbnails):
                 if t.get('id') is None:
@@ -1960,8 +1972,11 @@ def test_thumbnail(t):
                 if t.get('width') and t.get('height'):
                     t['resolution'] = '%dx%d' % (t['width'], t['height'])
                 t['url'] = sanitize_url(t['url'])
-            if self.params.get('check_formats'):
-                info_dict['thumbnails'] = LazyList(filter(test_thumbnail, thumbnails[::-1])).reverse()
+
+            if self.params.get('check_formats') is not False:
+                info_dict['thumbnails'] = LazyList(filter(thumbnail_tester(), thumbnails[::-1])).reverse()
+            else:
+                info_dict['thumbnails'] = thumbnails
 
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
index 0ee7ee3b125e5e6eb529fa2babc557bd2af46230..a6fc5d11a3e134e2f300dd92994350f694a68858 100644 (file)
@@ -229,6 +229,7 @@ class InfoExtractor(object):
                         * "resolution" (optional, string "{width}x{height}",
                                         deprecated)
                         * "filesize" (optional, int)
+                        * "_test_url" (optional, bool) - If true, test the URL
     thumbnail:      Full URL to a video thumbnail image.
     description:    Full video description.
     uploader:       Full name of the video uploader.
index dee2dbebcf39cbd59c9115c5fcf7c106fd7e13bc..ae1c1bca5e8a4a9d6b554501f0efbb0799423413 100644 (file)
@@ -2645,7 +2645,21 @@ def feed_entry(name):
                                 f['stretched_ratio'] = ratio
                         break
 
+        category = microformat.get('category') or search_meta('genre')
+        channel_id = video_details.get('channelId') \
+            or microformat.get('externalChannelId') \
+            or search_meta('channelId')
+        duration = int_or_none(
+            video_details.get('lengthSeconds')
+            or microformat.get('lengthSeconds')) \
+            or parse_duration(search_meta('duration'))
+        is_live = video_details.get('isLive')
+        is_upcoming = video_details.get('isUpcoming')
+        owner_profile_url = microformat.get('ownerProfileUrl')
+
         thumbnails = []
+        thumbnail_types = ['maxresdefault', 'sddefault', 'hqdefault', '0', 'mqdefault', 'default', '1', '2', '3']
+
         for container in (video_details, microformat):
             for thumbnail in (try_get(
                     container,
@@ -2662,34 +2676,25 @@ def feed_entry(name):
                     'url': thumbnail_url,
                     'height': int_or_none(thumbnail.get('height')),
                     'width': int_or_none(thumbnail.get('width')),
-                    'preference': 1 if 'maxresdefault' in thumbnail_url else -1
                 })
         thumbnail_url = search_meta(['og:image', 'twitter:image'])
         if thumbnail_url:
             thumbnails.append({
                 'url': thumbnail_url,
-                'preference': 1 if 'maxresdefault' in thumbnail_url else -1
             })
-        # All videos have a maxresdefault thumbnail, but sometimes it does not appear in the webpage
-        # See: https://github.com/ytdl-org/youtube-dl/issues/29049
-        thumbnails.append({
-            'url': 'https://i.ytimg.com/vi/%s/maxresdefault.jpg' % video_id,
-            'preference': 1,
-        })
+        # The best resolution thumbnails sometimes does not appear in the webpage
+        # See: https://github.com/ytdl-org/youtube-dl/issues/29049, https://github.com/yt-dlp/yt-dlp/issues/340
+        thumbnails.extend({
+            'url': 'https://i.ytimg.com/vi{webp}/{video_id}/{name}{live}.{ext}'.format(
+                video_id=video_id, name=name, ext=ext,
+                webp='_webp' if ext == 'webp' else '', live='_live' if is_live else ''),
+            '_test_url': True,
+        } for name in thumbnail_types for ext in ('webp', 'jpg'))
+        for thumb in thumbnails:
+            i = next((i for i, t in enumerate(thumbnail_types) if f'/{video_id}/{t}' in thumb['url']), 20)
+            thumb['preference'] = (0 if '.webp' in thumb['url'] else -1) - (2 * i)
         self._remove_duplicate_formats(thumbnails)
 
-        category = microformat.get('category') or search_meta('genre')
-        channel_id = video_details.get('channelId') \
-            or microformat.get('externalChannelId') \
-            or search_meta('channelId')
-        duration = int_or_none(
-            video_details.get('lengthSeconds')
-            or microformat.get('lengthSeconds')) \
-            or parse_duration(search_meta('duration'))
-        is_live = video_details.get('isLive')
-        is_upcoming = video_details.get('isUpcoming')
-        owner_profile_url = microformat.get('ownerProfileUrl')
-
         info = {
             'id': video_id,
             'title': self._live_title(video_title) if is_live else video_title,
index 2a42712b6f9fbbc3c1b8fa90c91a3f3b2db3e350..f9201bf01c0b323a4bfd363cb135a078a4b37848 100644 (file)
@@ -524,8 +524,12 @@ def _dict_from_options_callback(
         help="Don't give any special preference to free containers (default)")
     video_format.add_option(
         '--check-formats',
-        action='store_true', dest='check_formats', default=False,
-        help="Check that the formats selected are actually downloadable (Experimental)")
+        action='store_true', dest='check_formats', default=None,
+        help='Check that the formats selected are actually downloadable')
+    video_format.add_option(
+        '--no-check-formats',
+        action='store_false', dest='check_formats',
+        help='Do not check that the formats selected are actually downloadable')
     video_format.add_option(
         '-F', '--list-formats',
         action='store_true', dest='listformats',