]>
jfr.im git - yt-dlp.git/blob - yt_dlp/postprocessor/metadataparser.py
3 from .common
import PostProcessor
4 from ..utils
import Namespace
7 class MetadataParserPP(PostProcessor
):
8 def __init__(self
, downloader
, actions
):
9 super().__init
__(downloader
)
13 assert action
in self
.Actions
14 self
._actions
.append(action(self
, *args
))
17 def validate_action(cls
, action
, *data
):
18 """Each action can be:
19 (Actions.INTERPRET, from, to) OR
20 (Actions.REPLACE, field, search, replace)
22 if action
not in cls
.Actions
:
23 raise ValueError(f
'{action!r} is not a valid action')
24 action(cls
, *data
) # So this can raise error to validate
27 def field_to_template(tmpl
):
28 if re
.match(r
'[a-zA-Z_]+$', tmpl
):
31 from ..YoutubeDL
import YoutubeDL
32 err
= YoutubeDL
.validate_outtmpl(tmpl
)
38 def format_to_regex(fmt
):
40 Converts a string like
41 '%(title)s - %(artist)s'
43 '(?P<title>.+)\ \-\ (?P<artist>.+)'
45 if not re
.search(r
'%\(\w+\)s', fmt
):
49 # replace %(..)s with regex group and escape other string parts
50 for match
in re
.finditer(r
'%\((\w+)\)s', fmt
):
51 regex
+= re
.escape(fmt
[lastpos
:match
.start()])
52 regex
+= rf
'(?P<{match.group(1)}>.+)'
54 if lastpos
< len(fmt
):
55 regex
+= re
.escape(fmt
[lastpos
:])
59 for f
in self
._actions
:
63 def interpretter(self
, inp
, out
):
65 data_to_parse
= self
._downloader
.evaluate_outtmpl(template
, info
)
66 self
.write_debug(f
'Searching for {out_re.pattern!r} in {template!r}')
67 match
= out_re
.search(data_to_parse
)
69 self
.to_screen(f
'Could not interpret {inp!r} as {out!r}')
71 for attribute
, value
in match
.groupdict().items():
72 info
[attribute
] = value
73 self
.to_screen('Parsed %s from %r: %r' % (attribute
, template
, value
if value
is not None else 'NA'))
75 template
= self
.field_to_template(inp
)
76 out_re
= re
.compile(self
.format_to_regex(out
))
79 def replacer(self
, field
, search
, replace
):
83 self
.to_screen(f
'Video does not have a {field}')
85 elif not isinstance(val
, str):
86 self
.report_warning(f
'Cannot replace in field {field} since it is a {type(val).__name__}')
88 self
.write_debug(f
'Replacing all {search!r} in {field} with {replace!r}')
89 info
[field
], n
= search_re
.subn(replace
, val
)
91 self
.to_screen(f
'Changed {field} to: {info[field]}')
93 self
.to_screen(f
'Did not find {search!r} in {field}')
95 search_re
= re
.compile(search
)
98 Actions
= Namespace(INTERPRET
=interpretter
, REPLACE
=replacer
)
101 class MetadataFromFieldPP(MetadataParserPP
):
103 def to_action(cls
, f
):
104 match
= re
.match(r
'(?s)(?P<in>.*?)(?<!\\):(?P<out>.+)$', f
)
106 raise ValueError(f
'it should be FROM:TO, not {f!r}')
108 cls
.Actions
.INTERPRET
,
109 match
.group('in').replace('\\:', ':'),
113 def __init__(self
, downloader
, formats
):
114 super().__init
__(downloader
, [self
.to_action(f
) for f
in formats
])
118 class MetadataFromTitlePP(MetadataParserPP
):
119 def __init__(self
, downloader
, titleformat
):
120 super().__init
__(downloader
, [(self
.Actions
.INTERPRET
, 'title', titleformat
)])
121 self
.deprecation_warning(
122 'yt_dlp.postprocessor.MetadataFromTitlePP is deprecated '
123 'and may be removed in a future version. Use yt_dlp.postprocessor.MetadataFromFieldPP instead')