]> jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadataparser.py
Add option `--replace-in-metadata`
[yt-dlp.git] / yt_dlp / postprocessor / metadataparser.py
1 import re
2
3 from enum import Enum
4
5 from .common import PostProcessor
6
7
8 class MetadataParserPP(PostProcessor):
9 class Actions(Enum):
10 INTERPRET = 'interpretter'
11 REPLACE = 'replacer'
12
13 def __init__(self, downloader, actions):
14 PostProcessor.__init__(self, downloader)
15 self._actions = []
16 for f in actions:
17 action = f[0]
18 assert isinstance(action, self.Actions)
19 self._actions.append(getattr(self, action._value_)(*f[1:]))
20
21 @classmethod
22 def validate_action(cls, action, *data):
23 ''' Each action can be:
24 (Actions.INTERPRET, from, to) OR
25 (Actions.REPLACE, field, search, replace)
26 '''
27 if not isinstance(action, cls.Actions):
28 raise ValueError(f'{action!r} is not a valid action')
29 getattr(cls, action._value_)(cls, *data)
30
31 @staticmethod
32 def field_to_template(tmpl):
33 if re.match(r'[a-zA-Z_]+$', tmpl):
34 return f'%({tmpl})s'
35 return tmpl
36
37 @staticmethod
38 def format_to_regex(fmt):
39 r"""
40 Converts a string like
41 '%(title)s - %(artist)s'
42 to a regex like
43 '(?P<title>.+)\ \-\ (?P<artist>.+)'
44 """
45 if not re.search(r'%\(\w+\)s', fmt):
46 return fmt
47 lastpos = 0
48 regex = ''
49 # replace %(..)s with regex group and escape other string parts
50 for match in re.finditer(r'%\((\w+)\)s', fmt):
51 regex += re.escape(fmt[lastpos:match.start()])
52 regex += rf'(?P<{match.group(1)}>.+)'
53 lastpos = match.end()
54 if lastpos < len(fmt):
55 regex += re.escape(fmt[lastpos:])
56 return regex
57
58 def run(self, info):
59 for f in self._actions:
60 f(info)
61 return [], info
62
63 def interpretter(self, inp, out):
64 def f(info):
65 outtmpl, tmpl_dict = self._downloader.prepare_outtmpl(template, info)
66 data_to_parse = self._downloader.escape_outtmpl(outtmpl) % tmpl_dict
67 self.write_debug(f'Searching for r{out_re.pattern!r} in {template!r}')
68 match = out_re.search(data_to_parse)
69 if match is None:
70 self.report_warning('Could not interpret {inp!r} as {out!r}')
71 return
72 for attribute, value in match.groupdict().items():
73 info[attribute] = value
74 self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA'))
75
76 template = self.field_to_template(inp)
77 out_re = re.compile(self.format_to_regex(out))
78 return f
79
80 def replacer(self, field, search, replace):
81 def f(info):
82 val = info.get(field)
83 if val is None:
84 self.report_warning(f'Video does not have a {field}')
85 return
86 elif not isinstance(val, str):
87 self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}')
88 return
89 self.write_debug(f'Replacing all r{search!r} in {field} with {replace!r}')
90 info[field], n = search_re.subn(replace, val)
91 if n:
92 self.to_screen(f'Changed {field} to: {info[field]}')
93 else:
94 self.to_screen(f'Did not find r{search!r} in {field}')
95
96 search_re = re.compile(search)
97 return f
98
99
100 class MetadataFromFieldPP(MetadataParserPP):
101 @classmethod
102 def to_action(cls, f):
103 match = re.match(r'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f)
104 if match is None:
105 raise ValueError(f'it should be FROM:TO, not {f!r}')
106 return (
107 cls.Actions.INTERPRET,
108 match.group('in').replace('\\:', ':'),
109 match.group('out'))
110
111 def __init__(self, downloader, formats):
112 MetadataParserPP.__init__(self, downloader, [self.to_action(f) for f in formats])
113
114
115 class MetadataFromTitlePP(MetadataParserPP): # for backward compatibility
116 def __init__(self, downloader, titleformat):
117 MetadataParserPP.__init__(self, downloader, [(self.Actions.INTERPRET, 'title', titleformat)])