[outtmpl] Add alternate forms `F`, `D`

author pukkandan <redacted>

Thu, 23 Dec 2021 01:14:42 +0000 (06:44 +0530)

committer pukkandan <redacted>

Thu, 23 Dec 2021 01:19:16 +0000 (06:49 +0530)
author pukkandan <redacted>
Thu, 23 Dec 2021 01:14:42 +0000 (06:44 +0530)
committer pukkandan <redacted>
Thu, 23 Dec 2021 01:19:16 +0000 (06:49 +0530)
diff --git a/README.md b/README.md

index 98c737118b7e12c8238ac993c1ae1ea516a16332..c57cabf6b4b4a952ed4b71b034d5a3e5670a3325 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1085,7 +1085,7 @@ # OUTPUT TEMPLATE
  
  1. **Default**: A literal default value can be specified for when the field is empty using a `|` separator. This overrides `--output-na-template`. Eg: `%(uploader|Unknown)s`
  
-1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q` can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated) and a string **q**uoted for the terminal (flag `#` to split a list into different arguments), respectively
+1. **More Conversions**: In addition to the normal format types `diouxXeEfFgGcrs`, `B`, `j`, `l`, `q`, `D`, 'F' can be used for converting to **B**ytes, **j**son (flag `#` for pretty-printing), a comma separated **l**ist (flag `#` for `\n` newline-separated), a string **q**uoted for the terminal (flag `#` to split a list into different arguments), to add **D**ecimal suffixes (Eg: 10M), and to sanitize as **F**ilename (flag `#` for restricted), respectively
  
  1. **Unicode normalization**: The format type `U` can be used for NFC [unicode normalization](https://docs.python.org/3/library/unicodedata.html#unicodedata.normalize). The alternate form flag (`#`) changes the normalization to NFD and the conversion flag `+` can be used for NFKC/NFKD compatibility equivalence normalization. Eg: `%(title)+.100U` is NFKC
  
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 6c2530046815d152af3fc5196485d2143b8a9094..39d7e1ec55f3d656ab8d61a7625babe9f387eec7 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -717,6 +717,7 @@ def test(tmpl, expected, *, info=None, **params):
          test('%(id)s', '.abcd', info={'id': '.abcd'})
          test('%(id)s', 'ab__cd', info={'id': 'ab__cd'})
          test('%(id)s', ('ab:cd', 'ab -cd'), info={'id': 'ab:cd'})
+        test('%(id.0)s', '-', info={'id': '--'})
  
          # Invalid templates
          self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
@@ -777,6 +778,10 @@ def expect_same_infodict(out):
          test('%(title5)#U', 'a\u0301e\u0301i\u0301 𝐀')
          test('%(title5)+U', 'áéí A')
          test('%(title5)+#U', 'a\u0301e\u0301i\u0301 A')
+        test('%(height)D', '1K')
+        test('%(height)5.2D', ' 1.08K')
+        test('%(title4).10F', ('foo \'bar\' ', 'foo \'bar\'#'))
+        test('%(title4)#F', 'foo_bar_test')
          if compat_os_name == 'nt':
              test('%(title4)q', ('"foo \\"bar\\" test"', "'foo _'bar_' test'"))
              test('%(formats.:.id)#q', ('"id 1" "id 2" "id 3"', "'id 1' 'id 2' 'id 3'"))
@@ -808,6 +813,11 @@ def expect_same_infodict(out):
          test('%(width-100,height+width|def)s', 'def')
          test('%(timestamp-x>%H\\,%M\\,%S,timestamp>%H\\,%M\\,%S)s', '12,00,00')
  
+        # Replacement
+        test('%(id&foo)s.bar', 'foo.bar')
+        test('%(title&foo)s.bar', 'NA.bar')
+        test('%(title&foo|baz)s.bar', 'baz.bar')
+
          # Laziness
          def gen():
              yield from range(5)
@@ -836,11 +846,6 @@ def gen():
          test('%(title3)s', ('foo/bar\\test', 'foo_bar_test'))
          test('folder/%(title3)s', ('folder/foo/bar\\test', 'folder%sfoo_bar_test' % os.path.sep))
  
-        # Replacement
-        test('%(id&foo)s.bar', 'foo.bar')
-        test('%(title&foo)s.bar', 'NA.bar')
-        test('%(title&foo|baz)s.bar', 'baz.bar')
-
      def test_format_note(self):
          ydl = YoutubeDL()
          self.assertEqual(ydl._format_note({}), '')
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index be0a9c43d0a52ba8a220c66e34ed8fa9a2338767..277b24a47020c089e02643c56577bf7ea7135ff1 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -67,6 +67,7 @@
      float_or_none,
      format_bytes,
      format_field,
+    format_decimal_suffix,
      formatSeconds,
      GeoRestrictedError,
      get_domain,
@@ -1005,7 +1006,7 @@ def escape_outtmpl(outtmpl):
      def validate_outtmpl(cls, outtmpl):
          ''' @return None or Exception object '''
          outtmpl = re.sub(
-            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBU]'),
+            STR_FORMAT_RE_TMPL.format('[^)]*', '[ljqBUDF]'),
              lambda mobj: f'{mobj.group(0)[:-1]}s',
              cls._outtmpl_expandpath(outtmpl))
          try:
@@ -1021,8 +1022,12 @@ def _copy_infodict(info_dict):
              info_dict.pop(key, None)
          return info_dict
  
-    def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
-        """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict """
+    def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
+        """ Make the outtmpl and info_dict suitable for substitution: ydl.escape_outtmpl(outtmpl) % info_dict
+        @param sanitize    Whether to sanitize the output as a filename.
+                           For backward compatibility, a function can also be passed
+        """
+
          info_dict.setdefault('epoch', int(time.time()))  # keep epoch consistent once set
  
          info_dict = self._copy_infodict(info_dict)
@@ -1043,7 +1048,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
          }
  
          TMPL_DICT = {}
-        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBU]'))
+        EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE_TMPL.format('[^)]*', f'[{STR_FORMAT_TYPES}ljqBUDF]'))
          MATH_FUNCTIONS = {
              '+': float.__add__,
              '-': float.__sub__,
@@ -1051,7 +1056,7 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
          # Field is of the form key1.key2...
          # where keys (except first) can be string, int or slice
          FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
-        MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+        MATH_FIELD_RE = r'''(?:{field}|{num})'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
          MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
          INTERNAL_FORMAT_RE = re.compile(r'''(?x)
              (?P<negate>-)?
@@ -1107,6 +1112,13 @@ def get_value(mdict):
  
          na = self.params.get('outtmpl_na_placeholder', 'NA')
  
+        def filename_sanitizer(key, value, restricted=self.params.get('restrictfilenames')):
+            return sanitize_filename(str(value), restricted=restricted,
+                                     is_id=re.search(r'(^|[_.])id(\.|$)', key))
+
+        sanitizer = sanitize if callable(sanitize) else filename_sanitizer
+        sanitize = bool(sanitize)
+
          def _dumpjson_default(obj):
              if isinstance(obj, (set, LazyList)):
                  return list(obj)
@@ -1117,7 +1129,7 @@ def create_key(outer_mobj):
                  return outer_mobj.group(0)
              key = outer_mobj.group('key')
              mobj = re.match(INTERNAL_FORMAT_RE, key)
-            initial_field = mobj.group('fields').split('.')[-1] if mobj else ''
+            initial_field = mobj.group('fields') if mobj else ''
              value, replacement, default = None, None, na
              while mobj:
                  mobj = mobj.groupdict()
@@ -1153,6 +1165,10 @@ def create_key(outer_mobj):
                      # "+" = compatibility equivalence, "#" = NFD
                      'NF%s%s' % ('K' if '+' in flags else '', 'D' if '#' in flags else 'C'),
                      value), str_fmt
+            elif fmt[-1] == 'D':  # decimal suffix
+                value, fmt = format_decimal_suffix(value, f'%{fmt[:-1]}f%s' if fmt[:-1] else '%d%s'), 's'
+            elif fmt[-1] == 'F':  # filename sanitization
+                value, fmt = filename_sanitizer(initial_field, value, restricted='#' in flags), str_fmt
              elif fmt[-1] == 'c':
                  if value:
                      value = str(value)[0]
@@ -1169,7 +1185,7 @@ def create_key(outer_mobj):
                      # So we convert it to repr first
                      value, fmt = repr(value), str_fmt
                  if fmt[-1] in 'csr':
-                    value = sanitize(initial_field, value)
+                    value = sanitizer(initial_field, value)
  
              key = '%s\0%s' % (key.replace('%', '%\0'), outer_mobj.group('format'))
              TMPL_DICT[key] = value
@@ -1183,12 +1199,8 @@ def evaluate_outtmpl(self, outtmpl, info_dict, *args, **kwargs):
  
      def _prepare_filename(self, info_dict, tmpl_type='default'):
          try:
-            sanitize = lambda k, v: sanitize_filename(
-                compat_str(v),
-                restricted=self.params.get('restrictfilenames'),
-                is_id=(k == 'id' or k.endswith('_id')))
              outtmpl = self._outtmpl_expandpath(self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default']))
-            filename = self.evaluate_outtmpl(outtmpl, info_dict, sanitize)
+            filename = self.evaluate_outtmpl(outtmpl, info_dict, True)
  
              force_ext = OUTTMPL_TYPES.get(tmpl_type)
              if filename and force_ext is not None:
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 2919324c660a558aa8778d4c31064bd41ea7a53f..b1929f4dbba996bd3fda4b0bc5f2e1a6b797eef5 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -2110,18 +2110,19 @@ def unsmuggle_url(smug_url, default=None):
      return url, data
  
  
+def format_decimal_suffix(num, fmt='%d%s', *, factor=1000):
+    """ Formats numbers with decimal sufixes like K, M, etc """
+    num, factor = float_or_none(num), float(factor)
+    if num is None:
+        return None
+    exponent = 0 if num == 0 else int(math.log(num, factor))
+    suffix = ['', *'KMGTPEZY'][exponent]
+    converted = num / (factor ** exponent)
+    return fmt % (converted, suffix)
+
+
  def format_bytes(bytes):
-    if bytes is None:
-        return 'N/A'
-    if type(bytes) is str:
-        bytes = float(bytes)
-    if bytes == 0.0:
-        exponent = 0
-    else:
-        exponent = int(math.log(bytes, 1024.0))
-    suffix = ['B', 'KiB', 'MiB', 'GiB', 'TiB', 'PiB', 'EiB', 'ZiB', 'YiB'][exponent]
-    converted = float(bytes) / float(1024 ** exponent)
-    return '%.2f%s' % (converted, suffix)
+    return format_decimal_suffix(bytes, '%.2f%siB', factor=1024) or 'N/A'
  
  
  def lookup_unit_table(unit_table, s):
author	pukkandan <redacted>
	Thu, 23 Dec 2021 01:14:42 +0000 (06:44 +0530)
committer	pukkandan <redacted>
	Thu, 23 Dec 2021 01:19:16 +0000 (06:49 +0530)
README.md		patch \| blob \| blame \| history
test/test_YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history