]> jfr.im git - yt-dlp.git/commitdiff
Fix and refactor `prepare_outtmpl`
authorpukkandan <redacted>
Thu, 3 Jun 2021 18:00:38 +0000 (23:30 +0530)
committerpukkandan <redacted>
Sat, 5 Jun 2021 19:29:04 +0000 (00:59 +0530)
The following tests would have failed previously:
%(id)d %(id)r
%(ext)s-%(ext|def)d
%(width|)d
%(id)r %(height)r
%(formats.0)r
%s

test/test_YoutubeDL.py
test/test_postprocessors.py
yt_dlp/YoutubeDL.py
yt_dlp/postprocessor/execafterdownload.py
yt_dlp/postprocessor/metadatafromfield.py
yt_dlp/utils.py

index 2b3ed8f7b5e07d9ef81378127bad27fcbb1923d7..48015f98bcdd9525e3dc9ced1abe51d75412b571 100644 (file)
@@ -17,7 +17,7 @@
 from yt_dlp.extractor import YoutubeIE
 from yt_dlp.extractor.common import InfoExtractor
 from yt_dlp.postprocessor.common import PostProcessor
-from yt_dlp.utils import ExtractorError, match_filter_func
+from yt_dlp.utils import ExtractorError, float_or_none, match_filter_func
 
 TEST_URL = 'http://localhost/sample.mp4'
 
@@ -648,56 +648,95 @@ def test_add_extra_info(self):
         self.assertEqual(test_dict['extractor'], 'Foo')
         self.assertEqual(test_dict['playlist'], 'funny videos')
 
-    def test_prepare_filename(self):
-        info = {
-            'id': '1234',
-            'ext': 'mp4',
-            'width': None,
-            'height': 1080,
-            'title1': '$PATH',
-            'title2': '%PATH%',
-            'timestamp': 1618488000,
-            'formats': [{'id': 'id1'}, {'id': 'id2'}]
-        }
+    outtmpl_info = {
+        'id': '1234',
+        'ext': 'mp4',
+        'width': None,
+        'height': 1080,
+        'title1': '$PATH',
+        'title2': '%PATH%',
+        'timestamp': 1618488000,
+        'duration': 100000,
+        'playlist_index': 1,
+        '_last_playlist_index': 100,
+        'n_entries': 10,
+        'formats': [{'id': 'id1'}, {'id': 'id2'}, {'id': 'id3'}]
+    }
+
+    def test_prepare_outtmpl(self):
+        def out(tmpl, **params):
+            params['outtmpl'] = tmpl
+            ydl = YoutubeDL(params)
+            ydl._num_downloads = 1
+            outtmpl, tmpl_dict = ydl.prepare_outtmpl(tmpl, self.outtmpl_info)
+            return outtmpl % tmpl_dict
+
+        self.assertEqual(out('%(id)s.%(ext)s'), '1234.mp4')
+        self.assertEqual(out('%(duration_string)s'), '27:46:40')
+        self.assertTrue(float_or_none(out('%(epoch)d')))
+        self.assertEqual(out('%(resolution)s'), '1080p')
+        self.assertEqual(out('%(playlist_index)s'), '001')
+        self.assertEqual(out('%(autonumber)s'), '00001')
+        self.assertEqual(out('%(autonumber+2)03d', autonumber_start=3), '005')
+        self.assertEqual(out('%(autonumber)s', autonumber_size=3), '001')
+
+        self.assertEqual(out('%%'), '%')
+        self.assertEqual(out('%%%%'), '%%')
+        self.assertEqual(out('%(invalid@tmpl|def)s', outtmpl_na_placeholder='none'), 'none')
+        self.assertEqual(out('%()s'), 'NA')
+        self.assertEqual(out('%s'), '%s')
+
+        NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(x|def)s-%(id)s.%(ext)s'
+        self.assertEqual(out(NA_TEST_OUTTMPL), 'NA-NA-def-1234.mp4')
+        self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder='none'), 'none-none-def-1234.mp4')
+        self.assertEqual(out(NA_TEST_OUTTMPL, outtmpl_na_placeholder=''), '--def-1234.mp4')
+
+        FMT_TEST_OUTTMPL = '%%(height)%s.%%(ext)s'
+        self.assertEqual(out(FMT_TEST_OUTTMPL % 's'), '1080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % 'd'), '1080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '6d'), '  1080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '-6d'), '1080  .mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '06d'), '001080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % ' 06d'), ' 01080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '   06d'), ' 01080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '0 6d'), ' 01080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '0   6d'), ' 01080.mp4')
+        self.assertEqual(out(FMT_TEST_OUTTMPL % '   0   6d'), ' 01080.mp4')
+
+        self.assertEqual(out('%(id)d'), '1234')
+        self.assertEqual(out('%(id)d %(id)r'), "1234 '1234'")
+        self.assertEqual(out('%(ext)s-%(ext|def)d'), 'mp4-def')
+        self.assertEqual(out('%(width|0)04d'), '0000')
+        self.assertEqual(out('%(width|)d', outtmpl_na_placeholder='none'), '')
+
+        FORMATS = self.outtmpl_info['formats']
+        self.assertEqual(out('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
+        self.assertEqual(out('%(id+1-height+3)05d'), '00158')
+        self.assertEqual(out('%(width+100)05d'), 'NA')
+        self.assertEqual(out('%(formats.0)s'), str(FORMATS[0]))
+        self.assertEqual(out('%(formats.-1.id)s'), str(FORMATS[-1]['id']))
+        self.assertEqual(out('%(formats.3)s'), 'NA')
+        self.assertEqual(out('%(formats.:2:-1)r'), repr(FORMATS[:2:-1]))
+        self.assertEqual(out('%(formats.0.id.-1+id)f'), '1235.000000')
 
-        def fname(templ, na_placeholder='NA'):
+    def test_prepare_filename(self):
+        def fname(templ):
             params = {'outtmpl': templ}
-            if na_placeholder != 'NA':
-                params['outtmpl_na_placeholder'] = na_placeholder
             ydl = YoutubeDL(params)
-            return ydl.prepare_filename(info)
-        self.assertEqual(fname('%(id)s.%(ext)s'), '1234.mp4')
-        self.assertEqual(fname('%(id)s-%(width)s.%(ext)s'), '1234-NA.mp4')
-        NA_TEST_OUTTMPL = '%(uploader_date)s-%(width)d-%(id)s.%(ext)s'
-        # Replace missing fields with 'NA' by default
-        self.assertEqual(fname(NA_TEST_OUTTMPL), 'NA-NA-1234.mp4')
-        # Or by provided placeholder
-        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder='none'), 'none-none-1234.mp4')
-        self.assertEqual(fname(NA_TEST_OUTTMPL, na_placeholder=''), '--1234.mp4')
-        self.assertEqual(fname('%(height)s.%(ext)s'), '1080.mp4')
-        self.assertEqual(fname('%(height)d.%(ext)s'), '1080.mp4')
-        self.assertEqual(fname('%(height)6d.%(ext)s'), '  1080.mp4')
-        self.assertEqual(fname('%(height)-6d.%(ext)s'), '1080  .mp4')
-        self.assertEqual(fname('%(height)06d.%(ext)s'), '001080.mp4')
-        self.assertEqual(fname('%(height) 06d.%(ext)s'), ' 01080.mp4')
-        self.assertEqual(fname('%(height)   06d.%(ext)s'), ' 01080.mp4')
-        self.assertEqual(fname('%(height)0 6d.%(ext)s'), ' 01080.mp4')
-        self.assertEqual(fname('%(height)0   6d.%(ext)s'), ' 01080.mp4')
-        self.assertEqual(fname('%(height)   0   6d.%(ext)s'), ' 01080.mp4')
+            return ydl.prepare_filename(self.outtmpl_info)
+
         self.assertEqual(fname('%%'), '%')
         self.assertEqual(fname('%%%%'), '%%')
-        self.assertEqual(fname('%%(height)06d.%(ext)s'), '%(height)06d.mp4')
+        self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
         self.assertEqual(fname('%(width)06d.%(ext)s'), 'NA.mp4')
         self.assertEqual(fname('%(width)06d.%%(ext)s'), 'NA.%(ext)s')
         self.assertEqual(fname('%%(width)06d.%(ext)s'), '%(width)06d.mp4')
+
         self.assertEqual(fname('Hello %(title1)s'), 'Hello $PATH')
         self.assertEqual(fname('Hello %(title2)s'), 'Hello %PATH%')
-        self.assertEqual(fname('%(timestamp+-1000>%H-%M-%S)s'), '11-43-20')
-        self.assertEqual(fname('%(id+1)05d'), '01235')
-        self.assertEqual(fname('%(width+100)05d'), 'NA')
-        self.assertEqual(fname('%(formats.0)s').replace("u", ""), "{'id' - 'id1'}")
-        self.assertEqual(fname('%(formats.-1.id)s'), 'id2')
-        self.assertEqual(fname('%(formats.2)s'), 'NA')
+
+        self.assertEqual(fname('%(id)r %(height)r'), "'1234' 1080")
+        self.assertEqual(fname('%(formats.0)r'), "{'id' - 'id1'}")
 
     def test_format_note(self):
         ydl = YoutubeDL()
index 1f8f375cc3fc3d28c6467ddcfff59a38ca5858e3..bdc2d93cb6ffe55fee1d1b17dcaacf00ccc3023a 100644 (file)
@@ -8,7 +8,10 @@
 import unittest
 sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
+from yt_dlp import YoutubeDL
+from yt_dlp.compat import compat_shlex_quote
 from yt_dlp.postprocessor import (
+    ExecAfterDownloadPP,
     FFmpegThumbnailsConvertorPP,
     MetadataFromFieldPP,
     MetadataFromTitlePP,
@@ -55,3 +58,14 @@ def test_escaping(self):
 
         for _, out in tests:
             os.remove(file.format(out))
+
+
+class TestExecAfterDownload(unittest.TestCase):
+    def test_parse_cmd(self):
+        pp = ExecAfterDownloadPP(YoutubeDL(), '')
+        info = {'filepath': 'file name'}
+        quoted_filepath = compat_shlex_quote(info['filepath'])
+
+        self.assertEqual(pp.parse_cmd('echo', info), 'echo %s' % quoted_filepath)
+        self.assertEqual(pp.parse_cmd('echo.{}', info), 'echo.%s' % quoted_filepath)
+        self.assertEqual(pp.parse_cmd('echo "%(filepath)s"', info), 'echo "%s"' % info['filepath'])
index a09c05b414c341174d5752a54125fe09ba07c075..5b9cc235e9d86ec2a1672548e9b2c1f2c1153547 100644 (file)
@@ -64,7 +64,7 @@
     float_or_none,
     format_bytes,
     format_field,
-    FORMAT_RE,
+    STR_FORMAT_RE,
     formatSeconds,
     GeoRestrictedError,
     int_or_none,
@@ -815,52 +815,26 @@ def parse_outtmpl(self):
 
     def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
         """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
-        template_dict = dict(info_dict)
+        info_dict = dict(info_dict)
         na = self.params.get('outtmpl_na_placeholder', 'NA')
 
-        # duration_string
-        template_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+        info_dict['duration_string'] = (  # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
             formatSeconds(info_dict['duration'], '-' if sanitize else ':')
             if info_dict.get('duration', None) is not None
             else None)
-
-        # epoch
-        template_dict['epoch'] = int(time.time())
-
-        # autonumber
-        autonumber_size = self.params.get('autonumber_size')
-        if autonumber_size is None:
-            autonumber_size = 5
-        template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
-
-        # resolution if not defined
-        if template_dict.get('resolution') is None:
-            if template_dict.get('width') and template_dict.get('height'):
-                template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
-            elif template_dict.get('height'):
-                template_dict['resolution'] = '%sp' % template_dict['height']
-            elif template_dict.get('width'):
-                template_dict['resolution'] = '%dx?' % template_dict['width']
+        info_dict['epoch'] = int(time.time())
+        info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+        if info_dict.get('resolution') is None:
+            info_dict['resolution'] = self.format_resolution(info_dict, default=None)
 
         # For fields playlist_index and autonumber convert all occurrences
         # of %(field)s to %(field)0Nd for backward compatibility
         field_size_compat_map = {
-            'playlist_index': len(str(template_dict.get('_last_playlist_index') or '')),
-            'autonumber': autonumber_size,
+            'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
+            'autonumber': self.params.get('autonumber_size') or 5,
         }
-        FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
-        mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
-        if mobj:
-            outtmpl = re.sub(
-                FIELD_SIZE_COMPAT_RE,
-                r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
-                outtmpl)
-
-        numeric_fields = list(self._NUMERIC_FIELDS)
-        if sanitize is None:
-            sanitize = lambda k, v: v
-
-        EXTERNAL_FORMAT_RE = FORMAT_RE.format('(?P<key>[^)]*)')
+
+        EXTERNAL_FORMAT_RE = STR_FORMAT_RE.format('[^)]*')
         # Field is of the form key1.key2...
         # where keys (except first) can be string, int or slice
         FIELD_RE = r'\w+(?:\.(?:\w+|[-\d]*(?::[-\d]*){0,2}))*'
@@ -876,71 +850,76 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
             '+': float.__add__,
             '-': float.__sub__,
         }
-        for outer_mobj in re.finditer(EXTERNAL_FORMAT_RE, outtmpl):
-            final_key = outer_mobj.group('key')
-            str_type = outer_mobj.group('type')
-            value = None
-            mobj = re.match(INTERNAL_FORMAT_RE, final_key)
-            if mobj is not None:
+        tmpl_dict = {}
+
+        def get_value(mdict):
+            # Object traversal
+            fields = mdict['fields'].split('.')
+            value = traverse_dict(info_dict, fields)
+            # Negative
+            if mdict['negate']:
+                value = float_or_none(value)
+                if value is not None:
+                    value *= -1
+            # Do maths
+            if mdict['maths']:
+                value = float_or_none(value)
+                operator = None
+                for item in MATH_OPERATORS_RE.split(mdict['maths'])[1:]:
+                    if item == '' or value is None:
+                        return None
+                    if operator:
+                        item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
+                        offset = float_or_none(item)
+                        if offset is None:
+                            offset = float_or_none(traverse_dict(info_dict, item.split('.')))
+                        try:
+                            value = operator(value, multiplier * offset)
+                        except (TypeError, ZeroDivisionError):
+                            return None
+                        operator = None
+                    else:
+                        operator = MATH_FUNCTIONS[item]
+            # Datetime formatting
+            if mdict['strf_format']:
+                value = strftime_or_none(value, mdict['strf_format'])
+
+            return value
+
+        def create_key(outer_mobj):
+            if not outer_mobj.group('has_key'):
+                return '%{}'.format(outer_mobj.group(0))
+
+            key = outer_mobj.group('key')
+            fmt = outer_mobj.group('format')
+            mobj = re.match(INTERNAL_FORMAT_RE, key)
+            if mobj is None:
+                value, default = None, na
+            else:
                 mobj = mobj.groupdict()
-                # Object traversal
-                fields = mobj['fields'].split('.')
-                value = traverse_dict(template_dict, fields)
-                # Negative
-                if mobj['negate']:
-                    value = float_or_none(value)
-                    if value is not None:
-                        value *= -1
-                # Do maths
-                if mobj['maths']:
-                    value = float_or_none(value)
-                    operator = None
-                    for item in MATH_OPERATORS_RE.split(mobj['maths'])[1:]:
-                        if item == '':
-                            value = None
-                        if value is None:
-                            break
-                        if operator:
-                            item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
-                            offset = float_or_none(item)
-                            if offset is None:
-                                offset = float_or_none(traverse_dict(template_dict, item.split('.')))
-                            try:
-                                value = operator(value, multiplier * offset)
-                            except (TypeError, ZeroDivisionError):
-                                value = None
-                            operator = None
-                        else:
-                            operator = MATH_FUNCTIONS[item]
-                # Datetime formatting
-                if mobj['strf_format']:
-                    value = strftime_or_none(value, mobj['strf_format'])
-                # Set default
-                if value is None and mobj['default'] is not None:
-                    value = mobj['default']
-            # Sanitize
-            if str_type in 'crs' and value is not None:  # string
-                value = sanitize('%{}'.format(str_type) % fields[-1], value)
-            else:  # numeric
-                numeric_fields.append(final_key)
+                default = mobj['default'] if mobj['default'] is not None else na
+                value = get_value(mobj)
+
+            if fmt == 's' and value is not None and key in field_size_compat_map.keys():
+                fmt = '0{:d}d'.format(field_size_compat_map[key])
+
+            value = default if value is None else value
+            key += '\0%s' % fmt
+
+            if fmt[-1] not in 'crs':  # numeric
                 value = float_or_none(value)
-            if value is not None:
-                template_dict[final_key] = value
-
-        # Missing numeric fields used together with integer presentation types
-        # in format specification will break the argument substitution since
-        # string NA placeholder is returned for missing fields. We will patch
-        # output template for missing fields to meet string presentation type.
-        for numeric_field in numeric_fields:
-            if template_dict.get(numeric_field) is None:
-                outtmpl = re.sub(
-                    FORMAT_RE.format(re.escape(numeric_field)),
-                    r'%({0})s'.format(numeric_field), outtmpl)
-
-        template_dict = collections.defaultdict(lambda: na, (
-            (k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
-            for k, v in template_dict.items() if v is not None))
-        return outtmpl, template_dict
+                if value is None:
+                    value, fmt = default, 's'
+            if sanitize:
+                if fmt[-1] == 'r':
+                    # If value is an object, sanitize might convert it to a string
+                    # So we convert it to repr first
+                    value, fmt = repr(value), '%ss' % fmt[:-1]
+                    value = sanitize(key, value)
+            tmpl_dict[key] = value
+            return '%({key}){fmt}'.format(key=key, fmt=fmt)
+
+        return re.sub(EXTERNAL_FORMAT_RE, create_key, outtmpl), tmpl_dict
 
     def _prepare_filename(self, info_dict, tmpl_type='default'):
         try:
@@ -966,7 +945,7 @@ def _prepare_filename(self, info_dict, tmpl_type='default'):
 
             force_ext = OUTTMPL_TYPES.get(tmpl_type)
             if force_ext is not None:
-                filename = replace_extension(filename, force_ext, template_dict.get('ext'))
+                filename = replace_extension(filename, force_ext, info_dict.get('ext'))
 
             # https://github.com/blackjack4494/youtube-dlc/issues/85
             trim_file_name = self.params.get('trim_file_name', False)
index 9d68583e77af82d2fe78d205655726d22ea4e476..948b3ffb35d8284cf4b0bef58994758150683320 100644 (file)
@@ -1,13 +1,11 @@
 from __future__ import unicode_literals
 
-import re
 import subprocess
 
 from .common import PostProcessor
 from ..compat import compat_shlex_quote
 from ..utils import (
     encodeArgument,
-    FORMAT_RE,
     PostProcessingError,
 )
 
@@ -23,14 +21,14 @@ def pp_key(cls):
         return 'Exec'
 
     def parse_cmd(self, cmd, info):
-        # If no %(key)s is found, replace {} for backard compatibility
-        if not re.search(FORMAT_RE.format(r'[^)]*'), cmd):
-            if '{}' not in cmd:
-                cmd += ' {}'
-            return cmd.replace('{}', compat_shlex_quote(info['filepath']))
-
-        tmpl, info_copy = self._downloader.prepare_outtmpl(cmd, info)
-        return tmpl % info_copy
+        tmpl, tmpl_dict = self._downloader.prepare_outtmpl(cmd, info)
+        if tmpl_dict:  # if there are no replacements, tmpl_dict = {}
+            return tmpl % tmpl_dict
+
+        # If no replacements are found, replace {} for backard compatibility
+        if '{}' not in cmd:
+            cmd += ' {}'
+        return cmd.replace('{}', compat_shlex_quote(info['filepath']))
 
     def run(self, info):
         cmd = self.parse_cmd(self.exec_cmd, info)
index 1def868e812323476c895f75f974a9d815f84f36..8c795586cd2df8463aa332895daf4cb40cd30a53 100644 (file)
@@ -54,9 +54,9 @@ def format_to_regex(fmt):
 
     def run(self, info):
         for dictn in self._data:
-            tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
-            data_to_parse = tmpl % info_copy
-            self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
+            tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
+            data_to_parse = tmpl % tmpl_dict
+            self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
             match = re.search(dictn['regex'], data_to_parse)
             if match is None:
                 self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
index dea7d85cd4e600d0122e4258878d35c1ca54df5b..72fd8a0e7dda4973a81667b4805c0691c923c710 100644 (file)
@@ -4393,15 +4393,17 @@ def q(qid):
 # As of [1] format syntax is:
 #  %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
 # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
-FORMAT_RE = r'''(?x)
+STR_FORMAT_RE = r'''(?x)
     (?<!%)
     %
-    \({0}\)  # mapping key
-    (?:[#0\-+ ]+)?  # conversion flags (optional)
-    (?:\d+)?  # minimum field width (optional)
-    (?:\.\d+)?  # precision (optional)
-    [hlL]?  # length modifier (optional)
-    (?P<type>[diouxXeEfFgGcrs%])  # conversion type
+    (?P<has_key>\((?P<key>{0})\))?  # mapping key
+    (?P<format>
+        (?:[#0\-+ ]+)?  # conversion flags (optional)
+        (?:\d+)?  # minimum field width (optional)
+        (?:\.\d+)?  # precision (optional)
+        [hlL]?  # length modifier (optional)
+        [diouxXeEfFgGcrs]  # conversion type
+    )
 '''