]>
jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadatafromfield.py
1 from __future__
import unicode_literals
5 from .common
import PostProcessor
6 from ..compat
import compat_str
9 class MetadataFromFieldPP(PostProcessor
):
10 regex
= r
'(?P<in>.*?)(?<!\\):(?P<out>.+)$'
12 def __init__(self
, downloader
, formats
):
13 PostProcessor
.__init
__(self
, downloader
)
14 assert isinstance(formats
, (list, tuple))
17 assert isinstance(f
, compat_str
)
18 match
= re
.match(self
.regex
, f
)
19 assert match
is not None
20 inp
= match
.group('in').replace('\\:', ':')
23 'out': match
.group('out'),
24 'tmpl': self
.field_to_template(inp
),
25 'regex': self
.format_to_regex(match
.group('out')),
29 def field_to_template(tmpl
):
30 if re
.match(r
'[a-zA-Z_]+$', tmpl
):
31 return '%%(%s)s' % tmpl
35 def format_to_regex(fmt
):
37 Converts a string like
38 '%(title)s - %(artist)s'
40 '(?P<title>.+)\ \-\ (?P<artist>.+)'
42 if not re
.search(r
'%\(\w+\)s', fmt
):
46 # replace %(..)s with regex group and escape other string parts
47 for match
in re
.finditer(r
'%\((\w+)\)s', fmt
):
48 regex
+= re
.escape(fmt
[lastpos
:match
.start()])
49 regex
+= r
'(?P<%s>.+)' % match
.group(1)
51 if lastpos
< len(fmt
):
52 regex
+= re
.escape(fmt
[lastpos
:])
56 for dictn
in self
._data
:
57 tmpl
, tmpl_dict
= self
._downloader
.prepare_outtmpl(dictn
['tmpl'], info
)
58 data_to_parse
= self
._downloader
.escape_outtmpl(tmpl
) % tmpl_dict
59 self
.write_debug('Searching for r"%s" in %s' % (dictn
['regex'], dictn
['tmpl']))
60 match
= re
.search(dictn
['regex'], data_to_parse
)
62 self
.report_warning('Could not interpret video %s as "%s"' % (dictn
['in'], dictn
['out']))
64 for attribute
, value
in match
.groupdict().items():
65 info
[attribute
] = value
66 self
.to_screen('parsed %s from "%s": %s' % (attribute
, dictn
['tmpl'], value
if value
is not None else 'NA'))
70 class MetadataFromTitlePP(MetadataFromFieldPP
): # for backward compatibility
71 def __init__(self
, downloader
, titleformat
):
72 super(MetadataFromTitlePP
, self
).__init
__(downloader
, ['%%(title)s:%s' % titleformat
])
73 self
._titleformat
= titleformat
74 self
._titleregex
= self
._data
[0]['regex']