]>
Commit | Line | Data |
---|---|---|
5bfa4862 | 1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import PostProcessor | |
6 | from ..compat import compat_str | |
7 | ||
8 | ||
9 | class MetadataFromFieldPP(PostProcessor): | |
a3faeb7d | 10 | regex = r'(?P<in>.*?)(?<!\\):(?P<out>.+)$' |
5bfa4862 | 11 | |
12 | def __init__(self, downloader, formats): | |
13 | PostProcessor.__init__(self, downloader) | |
14 | assert isinstance(formats, (list, tuple)) | |
15 | self._data = [] | |
16 | for f in formats: | |
17 | assert isinstance(f, compat_str) | |
18 | match = re.match(self.regex, f) | |
19 | assert match is not None | |
a3faeb7d | 20 | inp = match.group('in').replace('\\:', ':') |
5bfa4862 | 21 | self._data.append({ |
a3faeb7d | 22 | 'in': inp, |
143db31d | 23 | 'out': match.group('out'), |
a3faeb7d | 24 | 'tmpl': self.field_to_template(inp), |
143db31d | 25 | 'regex': self.format_to_regex(match.group('out')), |
26 | }) | |
5bfa4862 | 27 | |
143db31d | 28 | @staticmethod |
29 | def field_to_template(tmpl): | |
501dd1ad | 30 | if re.match(r'[a-zA-Z_]+$', tmpl): |
143db31d | 31 | return '%%(%s)s' % tmpl |
32 | return tmpl | |
33 | ||
34 | @staticmethod | |
35 | def format_to_regex(fmt): | |
5bfa4862 | 36 | r""" |
37 | Converts a string like | |
38 | '%(title)s - %(artist)s' | |
39 | to a regex like | |
40 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
41 | """ | |
42 | if not re.search(r'%\(\w+\)s', fmt): | |
43 | return fmt | |
44 | lastpos = 0 | |
45 | regex = '' | |
46 | # replace %(..)s with regex group and escape other string parts | |
47 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
48 | regex += re.escape(fmt[lastpos:match.start()]) | |
84601bb7 | 49 | regex += r'(?P<%s>.+)' % match.group(1) |
5bfa4862 | 50 | lastpos = match.end() |
51 | if lastpos < len(fmt): | |
52 | regex += re.escape(fmt[lastpos:]) | |
53 | return regex | |
54 | ||
55 | def run(self, info): | |
56 | for dictn in self._data: | |
752cda38 | 57 | tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info) |
901130bb | 58 | data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict |
752cda38 | 59 | self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl'])) |
143db31d | 60 | match = re.search(dictn['regex'], data_to_parse) |
5bfa4862 | 61 | if match is None: |
143db31d | 62 | self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out'])) |
5bfa4862 | 63 | continue |
64 | for attribute, value in match.groupdict().items(): | |
65 | info[attribute] = value | |
501dd1ad | 66 | self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA')) |
5bfa4862 | 67 | return [], info |
68 | ||
69 | ||
70 | class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility | |
71 | def __init__(self, downloader, titleformat): | |
a3faeb7d | 72 | super(MetadataFromTitlePP, self).__init__(downloader, ['%%(title)s:%s' % titleformat]) |
5bfa4862 | 73 | self._titleformat = titleformat |
74 | self._titleregex = self._data[0]['regex'] |