[outtmpl] Curly braces to filter keys

author pukkandan <redacted>

Sat, 3 Sep 2022 12:26:23 +0000 (17:56 +0530)

committer pukkandan <redacted>

Sat, 3 Sep 2022 12:58:24 +0000 (18:28 +0530)
author pukkandan <redacted>
Sat, 3 Sep 2022 12:26:23 +0000 (17:56 +0530)
committer pukkandan <redacted>
Sat, 3 Sep 2022 12:58:24 +0000 (18:28 +0530)
diff --git a/README.md b/README.md

index 28fad2815cd475cf78a90039c4e30ef3a98fe9f6..4a5456f97ef308f947b74fb9d51f6aa20b701aba 100644 (file)
--- a/README.md
+++ b/README.md
@@ -1210,7 +1210,7 @@ # OUTPUT TEMPLATE
  
  The field names themselves (the part inside the parenthesis) can also have some special formatting:
  
-1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a `.` (dot) separator. You can also do python slicing using `:`. E.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`, `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. `%()s` refers to the entire infodict. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
+1. **Object traversal**: The dictionaries and lists available in metadata can be traversed by using a dot `.` separator; e.g. `%(tags.0)s`, `%(subtitles.en.-1.ext)s`. You can do Python slicing with colon `:`; E.g. `%(id.3:7:-1)s`, `%(formats.:.format_id)s`. Curly braces `{}` can be used to build dictionaries with only specific keys; e.g. `%(formats.:.{format_id,height})#j`. An empty field name `%()s` refers to the entire infodict; e.g. `%(.{id,title})s`. Note that all the fields that become available using this method are not listed below. Use `-j` to see such fields
  
  1. **Addition**: Addition and subtraction of numeric fields can be done using `+` and `-` respectively. E.g. `%(playlist_index+10)03d`, `%(n_entries+1-playlist_index)d`
  
diff --git a/test/test_YoutubeDL.py b/test/test_YoutubeDL.py

index 426e52305d72f6751c786b6bbf7a703317e8c34f..60e457108433faf63d71186dda821c420520f2f4 100644 (file)
--- a/test/test_YoutubeDL.py
+++ b/test/test_YoutubeDL.py
@@ -662,7 +662,11 @@ def test_add_extra_info(self):
          'playlist_autonumber': 2,
          '__last_playlist_index': 100,
          'n_entries': 10,
-        'formats': [{'id': 'id 1'}, {'id': 'id 2'}, {'id': 'id 3'}]
+        'formats': [
+            {'id': 'id 1', 'height': 1080, 'width': 1920},
+            {'id': 'id 2', 'height': 720},
+            {'id': 'id 3'}
+        ]
      }
  
      def test_prepare_outtmpl_and_filename(self):
@@ -729,6 +733,7 @@ def test(tmpl, expected, *, info=None, **params):
          self.assertTrue(isinstance(YoutubeDL.validate_outtmpl('%(title)'), ValueError))
          test('%(invalid@tmpl|def)s', 'none', outtmpl_na_placeholder='none')
          test('%(..)s', 'NA')
+        test('%(formats.{id)s', 'NA')
  
          # Entire info_dict
          def expect_same_infodict(out):
@@ -813,6 +818,12 @@ def expect_same_infodict(out):
          test('%(formats.:2:-1)r', repr(FORMATS[:2:-1]))
          test('%(formats.0.id.-1+id)f', '1235.000000')
          test('%(formats.0.id.-1+formats.1.id.-1)d', '3')
+        out = json.dumps([{'id': f['id'], 'height.:2': str(f['height'])[:2]}
+                          if 'height' in f else {'id': f['id']}
+                          for f in FORMATS])
+        test('%(formats.:.{id,height.:2})j', (out, sanitize(out)))
+        test('%(formats.:.{id,height}.id)l', ', '.join(f['id'] for f in FORMATS))
+        test('%(.{id,title})j', ('{"id": "1234"}', '{＂id＂： ＂1234＂}'))
  
          # Alternates
          test('%(title,id)s', '1234')
diff --git a/yt_dlp/YoutubeDL.py b/yt_dlp/YoutubeDL.py

index a6bbbb12808486d33ddda6080b48a12642f66ec9..58c5c47501d75baa5061b33442a5b042dde26fbb 100644 (file)
--- a/yt_dlp/YoutubeDL.py
+++ b/yt_dlp/YoutubeDL.py
@@ -1127,8 +1127,12 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
              '-': float.__sub__,
          }
          # Field is of the form key1.key2...
-        # where keys (except first) can be string, int or slice
-        FIELD_RE = r'\w*(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
+        # where keys (except first) can be string, int, slice or "{field, ...}"
+        FIELD_INNER_RE = r'(?:\w+|%(num)s|%(num)s?(?::%(num)s?){1,2})' % {'num': r'(?:-?\d+)'}
+        FIELD_RE = r'\w*(?:\.(?:%(inner)s|{%(field)s(?:,%(field)s)*}))*' % {
+            'inner': FIELD_INNER_RE,
+            'field': rf'\w*(?:\.{FIELD_INNER_RE})*'
+        }
          MATH_FIELD_RE = rf'(?:{FIELD_RE}|-?{NUMBER_RE})'
          MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
          INTERNAL_FORMAT_RE = re.compile(rf'''(?x)
@@ -1142,11 +1146,20 @@ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=False):
                  (?:\|(?P<default>.*?))?
              )$''')
  
-        def _traverse_infodict(k):
-            k = k.split('.')
-            if k[0] == '':
-                k.pop(0)
-            return traverse_obj(info_dict, k, is_user_input=True, traverse_string=True)
+        def _traverse_infodict(fields):
+            fields = [f for x in re.split(r'\.({.+?})\.?', fields)
+                      for f in ([x] if x.startswith('{') else x.split('.'))]
+            for i in (0, -1):
+                if fields and not fields[i]:
+                    fields.pop(i)
+
+            for i, f in enumerate(fields):
+                if not f.startswith('{'):
+                    continue
+                assert f.endswith('}'), f'No closing brace for {f} in {fields}'
+                fields[i] = {k: k.split('.') for k in f[1:-1].split(',')}
+
+            return traverse_obj(info_dict, fields, is_user_input=True, traverse_string=True)
  
          def get_value(mdict):
              # Object traversal
@@ -2800,12 +2813,13 @@ def _forceprint(self, key, info_dict):
          info_copy['automatic_captions_table'] = self.render_subtitles_table(info_dict.get('id'), info_dict.get('automatic_captions'))
  
          def format_tmpl(tmpl):
-            mobj = re.match(r'\w+(=?)$', tmpl)
-            if mobj and mobj.group(1):
-                return f'{tmpl[:-1]} = %({tmpl[:-1]})r'
-            elif mobj:
-                return f'%({tmpl})s'
-            return tmpl
+            mobj = re.fullmatch(r'([\w.:,-]|(?P<dict>{[\w.:,-]+}))+=', tmpl)
+            if not mobj:
+                return tmpl
+            elif not mobj.group('dict'):
+                return '\n'.join(f'{f} = %({f})r' for f in tmpl[:-1].split(','))
+            tmpl = f'.{tmpl[:-1]}' if tmpl.startswith('{') else tmpl[:-1]
+            return f'{tmpl} = %({tmpl})#j'
  
          for tmpl in self.params['forceprint'].get(key, []):
              self.to_stdout(self.evaluate_outtmpl(format_tmpl(tmpl), info_copy))
diff --git a/yt_dlp/utils.py b/yt_dlp/utils.py

index 00f2fbf423d82d410eaa7091eaca8d836c8320e0..90042aa8b99a0f334bafc3d97de9f4959feef686 100644 (file)
--- a/yt_dlp/utils.py
+++ b/yt_dlp/utils.py
@@ -5280,7 +5280,7 @@ def traverse_obj(
      @param path_list        A list of paths which are checked one by one.
                              Each path is a list of keys where each key is a:
                                - None:     Do nothing
-                              - string:   A dictionary key
+                              - string:   A dictionary key / regex group
                                - int:      An index into a list
                                - tuple:    A list of keys all of which will be traversed
                                - Ellipsis: Fetch all values in the object
@@ -5290,12 +5290,16 @@ def traverse_obj(
      @param expected_type    Only accept final value of this type (Can also be any callable)
      @param get_all          Return all the values obtained from a path or only the first one
      @param casesense        Whether to consider dictionary keys as case sensitive
+
+    The following are only meant to be used by YoutubeDL.prepare_outtmpl and is not part of the API
+
+    @param path_list        In addition to the above,
+                              - dict:     Given {k:v, ...}; return {k: traverse_obj(obj, v), ...}
      @param is_user_input    Whether the keys are generated from user input. If True,
                              strings are converted to int/slice if necessary
      @param traverse_string  Whether to traverse inside strings. If True, any
                              non-compatible object will also be converted into a string
-    # TODO: Write tests
-    '''
+    '''  # TODO: Write tests
      if not casesense:
          _lower = lambda k: (k.lower() if isinstance(k, str) else k)
          path_list = (map(_lower, variadic(path)) for path in path_list)
@@ -5309,6 +5313,7 @@ def _traverse_obj(obj, path, _current_depth=0):
              if isinstance(key, (list, tuple)):
                  obj = [_traverse_obj(obj, sub_key, _current_depth) for sub_key in key]
                  key = ...
+
              if key is ...:
                  obj = (obj.values() if isinstance(obj, dict)
                         else obj if isinstance(obj, (list, tuple, LazyList))
@@ -5316,6 +5321,8 @@ def _traverse_obj(obj, path, _current_depth=0):
                  _current_depth += 1
                  depth = max(depth, _current_depth)
                  return [_traverse_obj(inner_obj, path[i + 1:], _current_depth) for inner_obj in obj]
+            elif isinstance(key, dict):
+                obj = filter_dict({k: _traverse_obj(obj, v, _current_depth) for k, v in key.items()})
              elif callable(key):
                  if isinstance(obj, (list, tuple, LazyList)):
                      obj = enumerate(obj)
author	pukkandan <redacted>
	Sat, 3 Sep 2022 12:26:23 +0000 (17:56 +0530)
committer	pukkandan <redacted>
	Sat, 3 Sep 2022 12:58:24 +0000 (18:28 +0530)
README.md		patch \| blob \| blame \| history
test/test_YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/YoutubeDL.py		patch \| blob \| blame \| history
yt_dlp/utils.py		patch \| blob \| blame \| history