]>
Commit | Line | Data |
---|---|---|
5bfa4862 | 1 | from __future__ import unicode_literals |
2 | ||
3 | import re | |
4 | ||
5 | from .common import PostProcessor | |
6 | from ..compat import compat_str | |
7 | ||
8 | ||
9 | class MetadataFromFieldPP(PostProcessor): | |
143db31d | 10 | regex = r'(?P<in>.+):(?P<out>.+)$' |
5bfa4862 | 11 | |
12 | def __init__(self, downloader, formats): | |
13 | PostProcessor.__init__(self, downloader) | |
14 | assert isinstance(formats, (list, tuple)) | |
15 | self._data = [] | |
16 | for f in formats: | |
17 | assert isinstance(f, compat_str) | |
18 | match = re.match(self.regex, f) | |
19 | assert match is not None | |
20 | self._data.append({ | |
143db31d | 21 | 'in': match.group('in'), |
22 | 'out': match.group('out'), | |
23 | 'tmpl': self.field_to_template(match.group('in')), | |
24 | 'regex': self.format_to_regex(match.group('out')), | |
25 | }) | |
5bfa4862 | 26 | |
143db31d | 27 | @staticmethod |
28 | def field_to_template(tmpl): | |
29 | if re.match(r'\w+$', tmpl): | |
30 | return '%%(%s)s' % tmpl | |
31 | return tmpl | |
32 | ||
33 | @staticmethod | |
34 | def format_to_regex(fmt): | |
5bfa4862 | 35 | r""" |
36 | Converts a string like | |
37 | '%(title)s - %(artist)s' | |
38 | to a regex like | |
39 | '(?P<title>.+)\ \-\ (?P<artist>.+)' | |
40 | """ | |
41 | if not re.search(r'%\(\w+\)s', fmt): | |
42 | return fmt | |
43 | lastpos = 0 | |
44 | regex = '' | |
45 | # replace %(..)s with regex group and escape other string parts | |
46 | for match in re.finditer(r'%\((\w+)\)s', fmt): | |
47 | regex += re.escape(fmt[lastpos:match.start()]) | |
143db31d | 48 | regex += r'(?P<%s>[^\r\n]+)' % match.group(1) |
5bfa4862 | 49 | lastpos = match.end() |
50 | if lastpos < len(fmt): | |
51 | regex += re.escape(fmt[lastpos:]) | |
52 | return regex | |
53 | ||
54 | def run(self, info): | |
55 | for dictn in self._data: | |
143db31d | 56 | tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info) |
57 | data_to_parse = tmpl % info_copy | |
58 | self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl)) | |
59 | match = re.search(dictn['regex'], data_to_parse) | |
5bfa4862 | 60 | if match is None: |
143db31d | 61 | self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out'])) |
5bfa4862 | 62 | continue |
63 | for attribute, value in match.groupdict().items(): | |
64 | info[attribute] = value | |
143db31d | 65 | self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA')) |
5bfa4862 | 66 | return [], info |
67 | ||
68 | ||
69 | class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility | |
70 | def __init__(self, downloader, titleformat): | |
71 | super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat]) | |
72 | self._titleformat = titleformat | |
73 | self._titleregex = self._data[0]['regex'] |