]> jfr.im git - yt-dlp.git/blobdiff - yt_dlp/YoutubeDL.py
Expand `--check-formats` to thumbnails
[yt-dlp.git] / yt_dlp / YoutubeDL.py
index 0edbb4119fb393132a7f2b6191d79f9fbfd17261..b1bc05a80e7e4e18ed9d35cd30da69d4e1ddae59 100644 (file)
@@ -20,6 +20,7 @@
 import shutil
 import subprocess
 import sys
+import tempfile
 import time
 import tokenize
 import traceback
@@ -67,6 +68,7 @@
     STR_FORMAT_RE,
     formatSeconds,
     GeoRestrictedError,
+    HEADRequest,
     int_or_none,
     iri_to_uri,
     ISO3166Utils,
@@ -86,7 +88,6 @@
     preferredencoding,
     prepend_extension,
     process_communicate_or_kill,
-    random_uuidv4,
     register_socks_protocols,
     RejectedVideoReached,
     render_table,
@@ -472,8 +473,7 @@ def __init__(self, params=None, auto_init=True):
 
         if sys.version_info < (3, 6):
             self.report_warning(
-                'Support for Python version %d.%d have been deprecated and will break in future versions of yt-dlp! '
-                'Update to Python 3.6 or above' % sys.version_info[:2])
+                'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
 
         def check_deprecated(param, option, suggestion):
             if self.params.get(param) is not None:
@@ -539,6 +539,11 @@ def check_deprecated(param, option, suggestion):
 
         self.outtmpl_dict = self.parse_outtmpl()
 
+        # Creating format selector here allows us to catch syntax errors before the extraction
+        self.format_selector = (
+            None if self.params.get('format') is None
+            else self.build_format_selector(self.params['format']))
+
         self._setup_opener()
 
         """Preload the archive, if any is specified"""
@@ -813,6 +818,21 @@ def parse_outtmpl(self):
                     'Put  from __future__ import unicode_literals  at the top of your code file or consider switching to Python 3.x.')
         return outtmpl_dict
 
+    def get_output_path(self, dir_type='', filename=None):
+        paths = self.params.get('paths', {})
+        assert isinstance(paths, dict)
+        path = os.path.join(
+            expand_path(paths.get('home', '').strip()),
+            expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
+            filename or '')
+
+        # Temporary fix for #4787
+        # 'Treat' all problem characters by passing filename through preferredencoding
+        # to workaround encoding issues with subprocess on python2 @ Windows
+        if sys.version_info < (3, 0) and sys.platform == 'win32':
+            path = encodeFilename(path, True).decode(preferredencoding())
+        return sanitize_path(path, force=self.params.get('windowsfilenames'))
+
     @staticmethod
     def validate_outtmpl(tmpl):
         ''' @return None or Exception object '''
@@ -990,12 +1010,11 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
 
     def prepare_filename(self, info_dict, dir_type='', warn=False):
         """Generate the output filename."""
-        paths = self.params.get('paths', {})
-        assert isinstance(paths, dict)
+
         filename = self._prepare_filename(info_dict, dir_type or 'default')
 
         if warn and not self.__prepare_filename_warned:
-            if not paths:
+            if not self.params.get('paths'):
                 pass
             elif filename == '-':
                 self.report_warning('--paths is ignored when an outputting to stdout')
@@ -1005,18 +1024,7 @@ def prepare_filename(self, info_dict, dir_type='', warn=False):
         if filename == '-' or not filename:
             return filename
 
-        homepath = expand_path(paths.get('home', '').strip())
-        assert isinstance(homepath, compat_str)
-        subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
-        assert isinstance(subdir, compat_str)
-        path = os.path.join(homepath, subdir, filename)
-
-        # Temporary fix for #4787
-        # 'Treat' all problem characters by passing filename through preferredencoding
-        # to workaround encoding issues with subprocess on python2 @ Windows
-        if sys.version_info < (3, 0) and sys.platform == 'win32':
-            path = encodeFilename(path, True).decode(preferredencoding())
-        return sanitize_path(path, force=self.params.get('windowsfilenames'))
+        return self.get_output_path(dir_type, filename)
 
     def _match_entry(self, info_dict, incomplete=False, silent=False):
         """ Returns None if the file should be downloaded """
@@ -1488,12 +1496,11 @@ def _build_format_filter(self, filter_spec):
             '!=': operator.ne,
         }
         operator_rex = re.compile(r'''(?x)\s*
-            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
-            \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
-            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
-            $
+            (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
+            (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+            (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
             ''' % '|'.join(map(re.escape, OPERATORS.keys())))
-        m = operator_rex.search(filter_spec)
+        m = operator_rex.fullmatch(filter_spec)
         if m:
             try:
                 comparison_value = int(m.group('value'))
@@ -1514,13 +1521,12 @@ def _build_format_filter(self, filter_spec):
                 '$=': lambda attr, value: attr.endswith(value),
                 '*=': lambda attr, value: value in attr,
             }
-            str_operator_rex = re.compile(r'''(?x)
-                \s*(?P<key>[a-zA-Z0-9._-]+)
-                \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
-                \s*(?P<value>[a-zA-Z0-9._-]+)
-                \s*$
+            str_operator_rex = re.compile(r'''(?x)\s*
+                (?P<key>[a-zA-Z0-9._-]+)\s*
+                (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+                (?P<value>[a-zA-Z0-9._-]+)\s*
                 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
-            m = str_operator_rex.search(filter_spec)
+            m = str_operator_rex.fullmatch(filter_spec)
             if m:
                 comparison_value = m.group('value')
                 str_op = STR_OPERATORS[m.group('op')]
@@ -1530,7 +1536,7 @@ def _build_format_filter(self, filter_spec):
                     op = str_op
 
         if not m:
-            raise ValueError('Invalid filter specification %r' % filter_spec)
+            raise SyntaxError('Invalid filter specification %r' % filter_spec)
 
         def _filter(f):
             actual_value = f.get(m.group('key'))
@@ -1740,18 +1746,20 @@ def _merge(formats_pair):
         def _check_formats(formats):
             for f in formats:
                 self.to_screen('[info] Testing format %s' % f['format_id'])
-                paths = self.params.get('paths', {})
-                temp_file = os.path.join(
-                    expand_path(paths.get('home', '').strip()),
-                    expand_path(paths.get('temp', '').strip()),
-                    'ytdl.%s.f%s.check-format' % (random_uuidv4(), f['format_id']))
+                temp_file = tempfile.NamedTemporaryFile(
+                    suffix='.tmp', delete=False,
+                    dir=self.get_output_path('temp') or None)
+                temp_file.close()
                 try:
-                    dl, _ = self.dl(temp_file, f, test=True)
+                    dl, _ = self.dl(temp_file.name, f, test=True)
                 except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
                     dl = False
                 finally:
-                    if os.path.exists(temp_file):
-                        os.remove(temp_file)
+                    if os.path.exists(temp_file.name):
+                        try:
+                            os.remove(temp_file.name)
+                        except OSError:
+                            self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
                 if dl:
                     yield f
                 else:
@@ -1914,8 +1922,7 @@ def _calc_cookies(self, info_dict):
         self.cookiejar.add_cookie_header(pr)
         return pr.get_header('Cookie')
 
-    @staticmethod
-    def _sanitize_thumbnails(info_dict):
+    def _sanitize_thumbnails(self, info_dict):
         thumbnails = info_dict.get('thumbnails')
         if thumbnails is None:
             thumbnail = info_dict.get('thumbnail')
@@ -1928,12 +1935,25 @@ def _sanitize_thumbnails(info_dict):
                 t.get('height') if t.get('height') is not None else -1,
                 t.get('id') if t.get('id') is not None else '',
                 t.get('url')))
+
+            def test_thumbnail(t):
+                self.to_screen('[info] Testing thumbnail %s' % t['id'])
+                try:
+                    self.urlopen(HEADRequest(t['url']))
+                except network_exceptions as err:
+                    self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
+                        t['id'], t['url'], error_to_compat_str(err)))
+                    return False
+                return True
+
             for i, t in enumerate(thumbnails):
-                t['url'] = sanitize_url(t['url'])
-                if t.get('width') and t.get('height'):
-                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
                 if t.get('id') is None:
                     t['id'] = '%d' % i
+                if t.get('width') and t.get('height'):
+                    t['resolution'] = '%dx%d' % (t['width'], t['height'])
+                t['url'] = sanitize_url(t['url'])
+            if self.params.get('check_formats'):
+                info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1])))
 
     def process_video_result(self, info_dict, download=True):
         assert info_dict.get('_type', 'video') == 'video'
@@ -2119,12 +2139,11 @@ def is_wellformed(f):
             self.list_formats(info_dict)
             return
 
-        req_format = self.params.get('format')
-        if req_format is None:
+        format_selector = self.format_selector
+        if format_selector is None:
             req_format = self._default_format_spec(info_dict, download=download)
             self.write_debug('Default format spec: %s' % req_format)
-
-        format_selector = self.build_format_selector(req_format)
+            format_selector = self.build_format_selector(req_format)
 
         # While in format selection we may need to have an access to the original
         # format set in order to calculate some metrics or do some processing.
@@ -2798,7 +2817,7 @@ def filter_requested_info(info_dict, actually_filter=True):
             info_dict['epoch'] = int(time.time())
             reject = lambda k, v: k in remove_keys
         filter_fn = lambda obj: (
-            list(map(filter_fn, obj)) if isinstance(obj, (list, tuple, set))
+            list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
             else obj if not isinstance(obj, dict)
             else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
         return filter_fn(info_dict)
@@ -3036,7 +3055,7 @@ def list_formats(self, info_dict):
                 hideEmpty=new_format)))
 
     def list_thumbnails(self, info_dict):
-        thumbnails = info_dict.get('thumbnails')
+        thumbnails = list(info_dict.get('thumbnails'))
         if not thumbnails:
             self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
             return
@@ -3246,6 +3265,7 @@ def _write_thumbnails(self, info_dict, filename):  # return the extensions
 
             if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
                 ret.append(suffix + thumb_ext)
+                t['filepath'] = thumb_filename
                 self.to_screen('[%s] %s: Thumbnail %sis already present' %
                                (info_dict['extractor'], info_dict['id'], thumb_display_id))
             else: