]> jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadatafromfield.py
Release 2021.03.24
[yt-dlp.git] / yt_dlp / postprocessor / metadatafromfield.py
1 from __future__ import unicode_literals
2
3 import re
4
5 from .common import PostProcessor
6 from ..compat import compat_str
7
8
9 class MetadataFromFieldPP(PostProcessor):
10 regex = r'(?P<in>.+):(?P<out>.+)$'
11
12 def __init__(self, downloader, formats):
13 PostProcessor.__init__(self, downloader)
14 assert isinstance(formats, (list, tuple))
15 self._data = []
16 for f in formats:
17 assert isinstance(f, compat_str)
18 match = re.match(self.regex, f)
19 assert match is not None
20 self._data.append({
21 'in': match.group('in'),
22 'out': match.group('out'),
23 'tmpl': self.field_to_template(match.group('in')),
24 'regex': self.format_to_regex(match.group('out')),
25 })
26
27 @staticmethod
28 def field_to_template(tmpl):
29 if re.match(r'\w+$', tmpl):
30 return '%%(%s)s' % tmpl
31 return tmpl
32
33 @staticmethod
34 def format_to_regex(fmt):
35 r"""
36 Converts a string like
37 '%(title)s - %(artist)s'
38 to a regex like
39 '(?P<title>.+)\ \-\ (?P<artist>.+)'
40 """
41 if not re.search(r'%\(\w+\)s', fmt):
42 return fmt
43 lastpos = 0
44 regex = ''
45 # replace %(..)s with regex group and escape other string parts
46 for match in re.finditer(r'%\((\w+)\)s', fmt):
47 regex += re.escape(fmt[lastpos:match.start()])
48 regex += r'(?P<%s>[^\r\n]+)' % match.group(1)
49 lastpos = match.end()
50 if lastpos < len(fmt):
51 regex += re.escape(fmt[lastpos:])
52 return regex
53
54 def run(self, info):
55 for dictn in self._data:
56 tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
57 data_to_parse = tmpl % info_copy
58 self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
59 match = re.search(dictn['regex'], data_to_parse)
60 if match is None:
61 self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
62 continue
63 for attribute, value in match.groupdict().items():
64 info[attribute] = value
65 self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA'))
66 return [], info
67
68
69 class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
70 def __init__(self, downloader, titleformat):
71 super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat])
72 self._titleformat = titleformat
73 self._titleregex = self._data[0]['regex']