]>
Commit | Line | Data |
---|---|---|
e9f4ccd1 | 1 | import re |
e9f4ccd1 | 2 | from enum import Enum |
3 | ||
4 | from .common import PostProcessor | |
5 | ||
6 | ||
7 | class MetadataParserPP(PostProcessor): | |
8 | class Actions(Enum): | |
9 | INTERPRET = 'interpretter' | |
10 | REPLACE = 'replacer' | |
11 | ||
12 | def __init__(self, downloader, actions): | |
13 | PostProcessor.__init__(self, downloader) | |
14 | self._actions = [] | |
15 | for f in actions: | |
16 | action = f[0] | |
17 | assert isinstance(action, self.Actions) | |
f304da8a | 18 | self._actions.append(getattr(self, action.value)(*f[1:])) |
e9f4ccd1 | 19 | |
20 | @classmethod | |
21 | def validate_action(cls, action, *data): | |
22 | ''' Each action can be: | |
23 | (Actions.INTERPRET, from, to) OR | |
24 | (Actions.REPLACE, field, search, replace) | |
25 | ''' | |
26 | if not isinstance(action, cls.Actions): | |
27 | raise ValueError(f'{action!r} is not a valid action') | |
2b38f7b2 | 28 | getattr(cls, action.value)(cls, *data) # So this can raise error to validate |
e9f4ccd1 | 29 | |
30 | @staticmethod | |
31 | def field_to_template(tmpl): | |
32 | if re.match(r'[a-zA-Z_]+$', tmpl): | |
33 | return f'%({tmpl})s' | |
2b38f7b2 | 34 | |
35 | from ..YoutubeDL import YoutubeDL | |
36 | err = YoutubeDL.validate_outtmpl(tmpl) | |
37 | if err: | |
38 | raise err | |
e9f4ccd1 | 39 | return tmpl |
40 | ||
41 | @staticmethod | |
42 | def format_to_regex(fmt): | |
43 | r""" | |
44 | Converts a string like | |
45 | '%(title)s - %(artist)s' | |
46 | to a regex like | |
47 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
48 | """ | |
49 | if not re.search(r'%\(\w+\)s', fmt): | |
50 | return fmt | |
51 | lastpos = 0 | |
52 | regex = '' | |
53 | # replace %(..)s with regex group and escape other string parts | |
54 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
55 | regex += re.escape(fmt[lastpos:match.start()]) | |
56 | regex += rf'(?P<{match.group(1)}>.+)' | |
57 | lastpos = match.end() | |
58 | if lastpos < len(fmt): | |
59 | regex += re.escape(fmt[lastpos:]) | |
60 | return regex | |
61 | ||
62 | def run(self, info): | |
63 | for f in self._actions: | |
64 | f(info) | |
65 | return [], info | |
66 | ||
67 | def interpretter(self, inp, out): | |
68 | def f(info): | |
819e0531 | 69 | data_to_parse = self._downloader.evaluate_outtmpl(template, info) |
a8731fcc | 70 | self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') |
e9f4ccd1 | 71 | match = out_re.search(data_to_parse) |
72 | if match is None: | |
88f23a18 | 73 | self.to_screen(f'Could not interpret {inp!r} as {out!r}') |
e9f4ccd1 | 74 | return |
75 | for attribute, value in match.groupdict().items(): | |
76 | info[attribute] = value | |
77 | self.to_screen('Parsed %s from %r: %r' % (attribute, template, value if value is not None else 'NA')) | |
78 | ||
79 | template = self.field_to_template(inp) | |
80 | out_re = re.compile(self.format_to_regex(out)) | |
81 | return f | |
82 | ||
83 | def replacer(self, field, search, replace): | |
84 | def f(info): | |
85 | val = info.get(field) | |
86 | if val is None: | |
88f23a18 | 87 | self.to_screen(f'Video does not have a {field}') |
e9f4ccd1 | 88 | return |
89 | elif not isinstance(val, str): | |
90 | self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}') | |
91 | return | |
a8731fcc | 92 | self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}') |
e9f4ccd1 | 93 | info[field], n = search_re.subn(replace, val) |
94 | if n: | |
95 | self.to_screen(f'Changed {field} to: {info[field]}') | |
96 | else: | |
a8731fcc | 97 | self.to_screen(f'Did not find {search!r} in {field}') |
e9f4ccd1 | 98 | |
99 | search_re = re.compile(search) | |
100 | return f | |
101 | ||
102 | ||
103 | class MetadataFromFieldPP(MetadataParserPP): | |
104 | @classmethod | |
105 | def to_action(cls, f): | |
b69fd25c | 106 | match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f) |
e9f4ccd1 | 107 | if match is None: |
108 | raise ValueError(f'it should be FROM:TO, not {f!r}') | |
109 | return ( | |
110 | cls.Actions.INTERPRET, | |
111 | match.group('in').replace('\\:', ':'), | |
a804f6d8 | 112 | match.group('out'), |
113 | ) | |
e9f4ccd1 | 114 | |
115 | def __init__(self, downloader, formats): | |
a804f6d8 | 116 | super().__init__(downloader, [self.to_action(f) for f in formats]) |
e9f4ccd1 | 117 | |
118 | ||
ee8dd27a | 119 | # Deprecated |
120 | class MetadataFromTitlePP(MetadataParserPP): | |
e9f4ccd1 | 121 | def __init__(self, downloader, titleformat): |
a804f6d8 | 122 | super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) |
ee8dd27a | 123 | self.deprecation_warning( |
124 | 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' | |
a804f6d8 | 125 | 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead') |