]>
Commit | Line | Data |
---|---|---|
e9f4ccd1 | 1 | import re |
e9f4ccd1 | 2 | |
3 | from .common import PostProcessor | |
4f547d6d | 4 | from ..utils import Namespace, filter_dict |
e9f4ccd1 | 5 | |
6 | ||
7 | class MetadataParserPP(PostProcessor): | |
e9f4ccd1 | 8 | def __init__(self, downloader, actions): |
1e9969f4 | 9 | super().__init__(downloader) |
e9f4ccd1 | 10 | self._actions = [] |
11 | for f in actions: | |
19a03940 | 12 | action, *args = f |
13 | assert action in self.Actions | |
1e9969f4 | 14 | self._actions.append(action(self, *args)) |
e9f4ccd1 | 15 | |
16 | @classmethod | |
17 | def validate_action(cls, action, *data): | |
19a03940 | 18 | """Each action can be: |
e9f4ccd1 | 19 | (Actions.INTERPRET, from, to) OR |
20 | (Actions.REPLACE, field, search, replace) | |
19a03940 | 21 | """ |
22 | if action not in cls.Actions: | |
e9f4ccd1 | 23 | raise ValueError(f'{action!r} is not a valid action') |
1e9969f4 | 24 | action(cls, *data) # So this can raise error to validate |
e9f4ccd1 | 25 | |
26 | @staticmethod | |
27 | def field_to_template(tmpl): | |
28 | if re.match(r'[a-zA-Z_]+$', tmpl): | |
29 | return f'%({tmpl})s' | |
2b38f7b2 | 30 | |
31 | from ..YoutubeDL import YoutubeDL | |
32 | err = YoutubeDL.validate_outtmpl(tmpl) | |
33 | if err: | |
34 | raise err | |
e9f4ccd1 | 35 | return tmpl |
36 | ||
37 | @staticmethod | |
38 | def format_to_regex(fmt): | |
39 | r""" | |
40 | Converts a string like | |
41 | '%(title)s - %(artist)s' | |
42 | to a regex like | |
43 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
44 | """ | |
45 | if not re.search(r'%\(\w+\)s', fmt): | |
46 | return fmt | |
47 | lastpos = 0 | |
48 | regex = '' | |
49 | # replace %(..)s with regex group and escape other string parts | |
50 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
51 | regex += re.escape(fmt[lastpos:match.start()]) | |
52 | regex += rf'(?P<{match.group(1)}>.+)' | |
53 | lastpos = match.end() | |
54 | if lastpos < len(fmt): | |
55 | regex += re.escape(fmt[lastpos:]) | |
56 | return regex | |
57 | ||
58 | def run(self, info): | |
59 | for f in self._actions: | |
60 | f(info) | |
61 | return [], info | |
62 | ||
63 | def interpretter(self, inp, out): | |
64 | def f(info): | |
819e0531 | 65 | data_to_parse = self._downloader.evaluate_outtmpl(template, info) |
a8731fcc | 66 | self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') |
e9f4ccd1 | 67 | match = out_re.search(data_to_parse) |
68 | if match is None: | |
88f23a18 | 69 | self.to_screen(f'Could not interpret {inp!r} as {out!r}') |
e9f4ccd1 | 70 | return |
4f547d6d | 71 | for attribute, value in filter_dict(match.groupdict()).items(): |
e9f4ccd1 | 72 | info[attribute] = value |
4f547d6d | 73 | self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}') |
e9f4ccd1 | 74 | |
75 | template = self.field_to_template(inp) | |
76 | out_re = re.compile(self.format_to_regex(out)) | |
77 | return f | |
78 | ||
79 | def replacer(self, field, search, replace): | |
80 | def f(info): | |
81 | val = info.get(field) | |
82 | if val is None: | |
88f23a18 | 83 | self.to_screen(f'Video does not have a {field}') |
e9f4ccd1 | 84 | return |
85 | elif not isinstance(val, str): | |
86 | self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}') | |
87 | return | |
a8731fcc | 88 | self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}') |
e9f4ccd1 | 89 | info[field], n = search_re.subn(replace, val) |
90 | if n: | |
91 | self.to_screen(f'Changed {field} to: {info[field]}') | |
92 | else: | |
a8731fcc | 93 | self.to_screen(f'Did not find {search!r} in {field}') |
e9f4ccd1 | 94 | |
95 | search_re = re.compile(search) | |
96 | return f | |
97 | ||
19a03940 | 98 | Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) |
99 | ||
e9f4ccd1 | 100 | |
101 | class MetadataFromFieldPP(MetadataParserPP): | |
102 | @classmethod | |
103 | def to_action(cls, f): | |
b69fd25c | 104 | match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f) |
e9f4ccd1 | 105 | if match is None: |
106 | raise ValueError(f'it should be FROM:TO, not {f!r}') | |
107 | return ( | |
108 | cls.Actions.INTERPRET, | |
109 | match.group('in').replace('\\:', ':'), | |
a804f6d8 | 110 | match.group('out'), |
111 | ) | |
e9f4ccd1 | 112 | |
113 | def __init__(self, downloader, formats): | |
a804f6d8 | 114 | super().__init__(downloader, [self.to_action(f) for f in formats]) |
e9f4ccd1 | 115 | |
116 | ||
ee8dd27a | 117 | # Deprecated |
118 | class MetadataFromTitlePP(MetadataParserPP): | |
e9f4ccd1 | 119 | def __init__(self, downloader, titleformat): |
a804f6d8 | 120 | super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) |
ee8dd27a | 121 | self.deprecation_warning( |
122 | 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' | |
a804f6d8 | 123 | 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead') |