]>
jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadatafromfield.py
1 from __future__
import unicode_literals
5 from .common
import PostProcessor
6 from ..compat
import compat_str
9 class MetadataFromFieldPP(PostProcessor
):
10 regex
= r
'(?P<in>.+):(?P<out>.+)$'
12 def __init__(self
, downloader
, formats
):
13 PostProcessor
.__init
__(self
, downloader
)
14 assert isinstance(formats
, (list, tuple))
17 assert isinstance(f
, compat_str
)
18 match
= re
.match(self
.regex
, f
)
19 assert match
is not None
21 'in': match
.group('in'),
22 'out': match
.group('out'),
23 'tmpl': self
.field_to_template(match
.group('in')),
24 'regex': self
.format_to_regex(match
.group('out')),
28 def field_to_template(tmpl
):
29 if re
.match(r
'\w+$', tmpl
):
30 return '%%(%s)s' % tmpl
34 def format_to_regex(fmt
):
36 Converts a string like
37 '%(title)s - %(artist)s'
39 '(?P<title>.+)\ \-\ (?P<artist>.+)'
41 if not re
.search(r
'%\(\w+\)s', fmt
):
45 # replace %(..)s with regex group and escape other string parts
46 for match
in re
.finditer(r
'%\((\w+)\)s', fmt
):
47 regex
+= re
.escape(fmt
[lastpos
:match
.start()])
48 regex
+= r
'(?P<%s>[^\r\n]+)' % match
.group(1)
50 if lastpos
< len(fmt
):
51 regex
+= re
.escape(fmt
[lastpos
:])
55 for dictn
in self
._data
:
56 tmpl
, info_copy
= self
._downloader
.prepare_outtmpl(dictn
['tmpl'], info
)
57 data_to_parse
= tmpl
% info_copy
58 self
.write_debug('Searching for r"%s" in %s' % (dictn
['regex'], tmpl
))
59 match
= re
.search(dictn
['regex'], data_to_parse
)
61 self
.report_warning('Could not interpret video %s as "%s"' % (dictn
['in'], dictn
['out']))
63 for attribute
, value
in match
.groupdict().items():
64 info
[attribute
] = value
65 self
.to_screen('parsed %s from "%s": %s' % (attribute
, dictn
['in'], value
if value
is not None else 'NA'))
69 class MetadataFromTitlePP(MetadataFromFieldPP
): # for backward compatibility
70 def __init__(self
, downloader
, titleformat
):
71 super(MetadataFromTitlePP
, self
).__init
__(downloader
, ['title:%s' % titleformat
])
72 self
._titleformat
= titleformat
73 self
._titleregex
= self
._data
[0]['regex']