]> jfr.im git - yt-dlp.git/blame - yt_dlp/postprocessor/metadatafromfield.py
[viki] Print error message from API request
[yt-dlp.git] / yt_dlp / postprocessor / metadatafromfield.py
CommitLineData
5bfa4862 1from __future__ import unicode_literals
2
3import re
4
5from .common import PostProcessor
6from ..compat import compat_str
7
8
9class MetadataFromFieldPP(PostProcessor):
a3faeb7d 10 regex = r'(?P<in>.*?)(?<!\\):(?P<out>.+)$'
5bfa4862 11
12 def __init__(self, downloader, formats):
13 PostProcessor.__init__(self, downloader)
14 assert isinstance(formats, (list, tuple))
15 self._data = []
16 for f in formats:
17 assert isinstance(f, compat_str)
18 match = re.match(self.regex, f)
19 assert match is not None
a3faeb7d 20 inp = match.group('in').replace('\\:', ':')
5bfa4862 21 self._data.append({
a3faeb7d 22 'in': inp,
143db31d 23 'out': match.group('out'),
a3faeb7d 24 'tmpl': self.field_to_template(inp),
143db31d 25 'regex': self.format_to_regex(match.group('out')),
26 })
5bfa4862 27
143db31d 28 @staticmethod
29 def field_to_template(tmpl):
501dd1ad 30 if re.match(r'[a-zA-Z_]+$', tmpl):
143db31d 31 return '%%(%s)s' % tmpl
32 return tmpl
33
34 @staticmethod
35 def format_to_regex(fmt):
5bfa4862 36 r"""
37 Converts a string like
38 '%(title)s - %(artist)s'
39 to a regex like
40 '(?P<title>.+)\ \-\ (?P<artist>.+)'
41 """
42 if not re.search(r'%\(\w+\)s', fmt):
43 return fmt
44 lastpos = 0
45 regex = ''
46 # replace %(..)s with regex group and escape other string parts
47 for match in re.finditer(r'%\((\w+)\)s', fmt):
48 regex += re.escape(fmt[lastpos:match.start()])
84601bb7 49 regex += r'(?P<%s>.+)' % match.group(1)
5bfa4862 50 lastpos = match.end()
51 if lastpos < len(fmt):
52 regex += re.escape(fmt[lastpos:])
53 return regex
54
55 def run(self, info):
56 for dictn in self._data:
752cda38 57 tmpl, tmpl_dict = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
901130bb 58 data_to_parse = self._downloader.escape_outtmpl(tmpl) % tmpl_dict
752cda38 59 self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], dictn['tmpl']))
143db31d 60 match = re.search(dictn['regex'], data_to_parse)
5bfa4862 61 if match is None:
143db31d 62 self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
5bfa4862 63 continue
64 for attribute, value in match.groupdict().items():
65 info[attribute] = value
501dd1ad 66 self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['tmpl'], value if value is not None else 'NA'))
5bfa4862 67 return [], info
68
69
70class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
71 def __init__(self, downloader, titleformat):
a3faeb7d 72 super(MetadataFromTitlePP, self).__init__(downloader, ['%%(title)s:%s' % titleformat])
5bfa4862 73 self._titleformat = titleformat
74 self._titleregex = self._data[0]['regex']