]>
Commit | Line | Data |
---|---|---|
1 | import re | |
2 | ||
3 | from .common import PostProcessor | |
4 | from ..utils import Namespace, filter_dict, function_with_repr | |
5 | ||
6 | ||
7 | class MetadataParserPP(PostProcessor): | |
8 | def __init__(self, downloader, actions): | |
9 | super().__init__(downloader) | |
10 | self._actions = [] | |
11 | for f in actions: | |
12 | action, *args = f | |
13 | assert action in self.Actions | |
14 | self._actions.append(action(self, *args)) | |
15 | ||
16 | @classmethod | |
17 | def validate_action(cls, action, *data): | |
18 | """Each action can be: | |
19 | (Actions.INTERPRET, from, to) OR | |
20 | (Actions.REPLACE, field, search, replace) | |
21 | """ | |
22 | if action not in cls.Actions: | |
23 | raise ValueError(f'{action!r} is not a valid action') | |
24 | action(cls, *data) # So this can raise error to validate | |
25 | ||
26 | @staticmethod | |
27 | def field_to_template(tmpl): | |
28 | if re.match(r'[a-zA-Z_]+$', tmpl): | |
29 | return f'%({tmpl})s' | |
30 | ||
31 | from ..YoutubeDL import YoutubeDL | |
32 | err = YoutubeDL.validate_outtmpl(tmpl) | |
33 | if err: | |
34 | raise err | |
35 | return tmpl | |
36 | ||
37 | @staticmethod | |
38 | def format_to_regex(fmt): | |
39 | r""" | |
40 | Converts a string like | |
41 | '%(title)s - %(artist)s' | |
42 | to a regex like | |
43 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
44 | """ | |
45 | if not re.search(r'%\(\w+\)s', fmt): | |
46 | return fmt | |
47 | lastpos = 0 | |
48 | regex = '' | |
49 | # replace %(..)s with regex group and escape other string parts | |
50 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
51 | regex += re.escape(fmt[lastpos:match.start()]) | |
52 | regex += rf'(?P<{match.group(1)}>.+)' | |
53 | lastpos = match.end() | |
54 | if lastpos < len(fmt): | |
55 | regex += re.escape(fmt[lastpos:]) | |
56 | return regex | |
57 | ||
58 | def run(self, info): | |
59 | for f in self._actions: | |
60 | f(info) | |
61 | return [], info | |
62 | ||
63 | @function_with_repr | |
64 | def interpretter(self, inp, out): | |
65 | def f(info): | |
66 | data_to_parse = self._downloader.evaluate_outtmpl(template, info) | |
67 | self.write_debug(f'Searching for {out_re.pattern!r} in {template!r}') | |
68 | match = out_re.search(data_to_parse) | |
69 | if match is None: | |
70 | self.to_screen(f'Could not interpret {inp!r} as {out!r}') | |
71 | return | |
72 | for attribute, value in filter_dict(match.groupdict()).items(): | |
73 | info[attribute] = value | |
74 | self.to_screen(f'Parsed {attribute} from {template!r}: {value!r}') | |
75 | ||
76 | template = self.field_to_template(inp) | |
77 | out_re = re.compile(self.format_to_regex(out)) | |
78 | return f | |
79 | ||
80 | @function_with_repr | |
81 | def replacer(self, field, search, replace): | |
82 | def f(info): | |
83 | val = info.get(field) | |
84 | if val is None: | |
85 | self.to_screen(f'Video does not have a {field}') | |
86 | return | |
87 | elif not isinstance(val, str): | |
88 | self.report_warning(f'Cannot replace in field {field} since it is a {type(val).__name__}') | |
89 | return | |
90 | self.write_debug(f'Replacing all {search!r} in {field} with {replace!r}') | |
91 | info[field], n = search_re.subn(replace, val) | |
92 | if n: | |
93 | self.to_screen(f'Changed {field} to: {info[field]}') | |
94 | else: | |
95 | self.to_screen(f'Did not find {search!r} in {field}') | |
96 | ||
97 | search_re = re.compile(search) | |
98 | return f | |
99 | ||
100 | Actions = Namespace(INTERPRET=interpretter, REPLACE=replacer) | |
101 | ||
102 | ||
103 | class MetadataFromFieldPP(MetadataParserPP): | |
104 | @classmethod | |
105 | def to_action(cls, f): | |
106 | match = re.match(r'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f) | |
107 | if match is None: | |
108 | raise ValueError(f'it should be FROM:TO, not {f!r}') | |
109 | return ( | |
110 | cls.Actions.INTERPRET, | |
111 | match.group('in').replace('\\:', ':'), | |
112 | match.group('out'), | |
113 | ) | |
114 | ||
115 | def __init__(self, downloader, formats): | |
116 | super().__init__(downloader, [self.to_action(f) for f in formats]) | |
117 | ||
118 | ||
119 | # Deprecated | |
120 | class MetadataFromTitlePP(MetadataParserPP): | |
121 | def __init__(self, downloader, titleformat): | |
122 | super().__init__(downloader, [(self.Actions.INTERPRET, 'title', titleformat)]) | |
123 | self.deprecation_warning( | |
124 | 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated ' | |
125 | 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead') |