]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from enum import Enum | |
4 | ||
5 | from .common import PostProcessor | |
6 | ||
7 | ||
8 | class MetadataParserPP(PostProcessor): | |
9 | class Actions(Enum): | |
10 | INTERPRET = 'interpretter' | |
11 | REPLACE = 'replacer' | |
12 | ||
13 | def __init__(self, downloader, actions): | |
14 | PostProcessor.__init__(self, downloader) | |
15 | self._actions = [] | |
16 | for f in actions: | |
17 | action = f[0] | |
18 | assert isinstance(action, self.Actions) | |
19 | self._actions.append(getattr(self, action.value)(*f[1:])) | |
20 | ||
21 | @classmethod | |
22 | def validate_action(cls, action, *data): | |
23 | ''' Each action can be: | |
24 | (Actions.INTERPRET, from, to) OR | |
25 | (Actions.REPLACE, field, search, replace) | |
26 | ''' | |
27 | if not isinstance(action, cls.Actions): | |
28 | raise ValueError(f'{action!r} is not a valid action') | |
29 | getattr(cls, action.value)(cls, *data) | |
30 | ||
31 | @staticmethod | |
32 | def field_to_template(tmpl): | |
33 | if re.match(r'[a-zA-Z_]+$', tmpl): | |
34 | return f'%({tmpl})s' | |
35 | return tmpl | |
36 | ||
37 | @staticmethod | |
38 | def format_to_regex(fmt): | |
39 | r""" | |
40 | Converts a string like | |
41 | '%(title)s - %(artist)s' | |
42 | to a regex like | |
43 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
44 | """ | |
45 | if not re.search(r'%\(\w+\)s', fmt): | |
46 | return fmt | |
47 | lastpos = 0 | |
48 | regex = '' | |
49 | # replace %(..)s with regex group and escape other string parts | |
50 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
51 | regex += re.escape(fmt[lastpos:match.start()]) | |
52 | regex += rf'(?P<{match.group(1)}>.+)' | |
53 | lastpos = match.end() | |
54 | if lastpos < len(fmt): | |
55 | regex += re.escape(fmt[lastpos:]) | |
56 | return regex | |
57 | ||
58 | def run(self, info): | |
59 | for f in self._actions: | |
60 | f(info) | |
61 | return [], info | |
62 | ||
63 | def interpretter(self, inp, out): | |
64 | def f(info): | |
65 | data_to_parse = self._downloader.evaluate_outtmpl(template, info) | |
66 | self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') | |
67 | match = out_re.search(data_to_parse) | |
68 | if match is None: | |
69 | self.report_warning(f'Could not interpret {inp!r} as {out!r}') | |
70 | return | |
71 | for attribute, value in match.groupdict().items(): | |
72 | info[attribute] = value | |
73 | self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA')) | |
74 | ||
75 | template = self.field_to_template(inp) | |
76 | out_re = re.compile(self.format_to_regex(out)) | |
77 | return f | |
78 | ||
79 | def replacer(self, field, search, replace): | |
80 | def f(info): | |
81 | val = info.get(field) | |
82 | if val is None: | |
83 | self.report_warning(f'Video does not have a {field}') | |
84 | return | |
85 | elif not isinstance(val, str): | |
86 | self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}') | |
87 | return | |
88 | self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}') | |
89 | info[field], n = search_re.subn(replace, val) | |
90 | if n: | |
91 | self.to_screen(f'Changed {field} to: {info[field]}') | |
92 | else: | |
93 | self.to_screen(f'Did not find {search!r} in {field}') | |
94 | ||
95 | search_re = re.compile(search) | |
96 | return f | |
97 | ||
98 | ||
99 | class MetadataFromFieldPP(MetadataParserPP): | |
100 | @classmethod | |
101 | def to_action(cls, f): | |
102 | match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f) | |
103 | if match is None: | |
104 | raise ValueError(f'it should be FROM:TO, not {f!r}') | |
105 | return ( | |
106 | cls.Actions.INTERPRET, | |
107 | match.group('in').replace('\\:', ':'), | |
108 | match.group('out'), | |
109 | ) | |
110 | ||
111 | def __init__(self, downloader, formats): | |
112 | super().__init__(downloader, [self.to_action(f) for f in formats]) | |
113 | ||
114 | ||
115 | # Deprecated | |
116 | class MetadataFromTitlePP(MetadataParserPP): | |
117 | def __init__(self, downloader, titleformat): | |
118 | super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) | |
119 | self.deprecation_warning( | |
120 | 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' | |
121 | 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead') |