]>
jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadataparser.py
5 from .common
import PostProcessor
8 class MetadataParserPP(PostProcessor
):
10 INTERPRET
= 'interpretter'
13 def __init__(self
, downloader
, actions
):
14 PostProcessor
.__init
__(self
, downloader
)
18 assert isinstance(action
, self
.Actions
)
19 self
._actions
.append(getattr(self
, action
._value
_)(*f
[1:]))
22 def validate_action(cls
, action
, *data
):
23 ''' Each action can be:
24 (Actions.INTERPRET, from, to) OR
25 (Actions.REPLACE, field, search, replace)
27 if not isinstance(action
, cls
.Actions
):
28 raise ValueError(f
'{action!r} is not a valid action')
29 getattr(cls
, action
._value
_)(cls
, *data
)
32 def field_to_template(tmpl
):
33 if re
.match(r
'[a-zA-Z_]+$', tmpl
):
38 def format_to_regex(fmt
):
40 Converts a string like
41 '%(title)s - %(artist)s'
43 '(?P<title>.+)\ \-\ (?P<artist>.+)'
45 if not re
.search(r
'%\(\w+\)s', fmt
):
49 # replace %(..)s with regex group and escape other string parts
50 for match
in re
.finditer(r
'%\((\w+)\)s', fmt
):
51 regex
+= re
.escape(fmt
[lastpos
:match
.start()])
52 regex
+= rf
'(?P<{match.group(1)}>.+)'
54 if lastpos
< len(fmt
):
55 regex
+= re
.escape(fmt
[lastpos
:])
59 for f
in self
._actions
:
63 def interpretter(self
, inp
, out
):
65 outtmpl
, tmpl_dict
= self
._downloader
.prepare_outtmpl(template
, info
)
66 data_to_parse
= self
._downloader
.escape_outtmpl(outtmpl
) % tmpl_dict
67 self
.write_debug(f
'Searching for r{out_re.pattern!r} in {template!r}')
68 match
= out_re
.search(data_to_parse
)
70 self
.report_warning('Could not interpret {inp!r} as {out!r}')
72 for attribute
, value
in match
.groupdict().items():
73 info
[attribute
] = value
74 self
.to_screen('Parsed %s from %r: %r' % (attribute
, template
, value
if value
is not None else 'NA'))
76 template
= self
.field_to_template(inp
)
77 out_re
= re
.compile(self
.format_to_regex(out
))
80 def replacer(self
, field
, search
, replace
):
84 self
.report_warning(f
'Video does not have a {field}')
86 elif not isinstance(val
, str):
87 self
.report_warning(f
'Cannot replace in field {field} since it is a {type(val).__name__}')
89 self
.write_debug(f
'Replacing all r{search!r} in {field} with {replace!r}')
90 info
[field
], n
= search_re
.subn(replace
, val
)
92 self
.to_screen(f
'Changed {field} to: {info[field]}')
94 self
.to_screen(f
'Did not find r{search!r} in {field}')
96 search_re
= re
.compile(search
)
100 class MetadataFromFieldPP(MetadataParserPP
):
102 def to_action(cls
, f
):
103 match
= re
.match(r
'(?P<in>.*?)(?<!\\):(?P<out>.+)$', f
)
105 raise ValueError(f
'it should be FROM:TO, not {f!r}')
107 cls
.Actions
.INTERPRET
,
108 match
.group('in').replace('\\:', ':'),
111 def __init__(self
, downloader
, formats
):
112 MetadataParserPP
.__init
__(self
, downloader
, [self
.to_action(f
) for f
in formats
])
115 class MetadataFromTitlePP(MetadataParserPP
): # for backward compatibility
116 def __init__(self
, downloader
, titleformat
):
117 MetadataParserPP
.__init
__(self
, downloader
, [(self
.Actions
.INTERPRET
, 'title', titleformat
)])