--add-metadata Write metadata to the video file
--no-add-metadata Do not write metadata (default)
--parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
- from other fields. Give field name to
- extract data from, and format of the field
- seperated by a ":". Either regular
- expression with named capture groups or a
- similar syntax to the output template can
- also be used. The parsed parameters replace
- any existing values and can be use in
- output template. This option can be used
- multiple times. Example: --parse-metadata
- "title:%(artist)s - %(title)s" matches a
- title like "Coldplay - Paradise". Example
- (regex): --parse-metadata
+ from other fields. Give a template or field
+ name to extract data from and the format to
+ interpret it as, seperated by a ":". Either
+ regular expression with named capture
+ groups or a similar syntax to the output
+ template can be used for the FORMAT.
+ Similarly, the syntax for output template
+ can be used for FIELD to parse the data
+ from multiple fields. The parsed parameters
+ replace any existing values and can be used
+ in output templates. This option can be
+ used multiple times. Example: --parse-
+ metadata "title:%(artist)s - %(title)s"
+ matches a title like "Coldplay - Paradise".
+ Example: --parse-metadata "%(series)s
+ %(episode_number)s:%(title)s" sets the
+ title using series and episode number.
+ Example (regex): --parse-metadata
"description:Artist - (?P<artist>.+?)"
--xattrs Write metadata to the video file's xattrs
(using dublin core and xdg standards)
float_or_none,
format_bytes,
format_field,
+ FORMAT_RE,
formatSeconds,
GeoRestrictedError,
int_or_none,
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
return outtmpl_dict
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
+ """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
+ template_dict = dict(info_dict)
+
+ # duration_string
+ template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-')
+ if info_dict.get('duration', None) is not None
+ else None)
+
+ # epoch
+ template_dict['epoch'] = int(time.time())
+
+ # autonumber
+ autonumber_size = self.params.get('autonumber_size')
+ if autonumber_size is None:
+ autonumber_size = 5
+ template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+
+ # resolution if not defined
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ template_dict['resolution'] = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ template_dict['resolution'] = '%dx?' % template_dict['width']
+
+ if sanitize is None:
+ sanitize = lambda k, v: v
+ template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
+ for k, v in template_dict.items()
+ if v is not None and not isinstance(v, (list, tuple, dict)))
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
+ template_dict = collections.defaultdict(lambda: na, template_dict)
+
+ # For fields playlist_index and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(template_dict['n_entries'])),
+ 'autonumber': autonumber_size,
+ }
+ FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+ mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+ if mobj:
+ outtmpl = re.sub(
+ FIELD_SIZE_COMPAT_RE,
+ r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+ outtmpl)
+
+ numeric_fields = list(self._NUMERIC_FIELDS)
+
+ # Format date
+ FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
+ for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
+ conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
+ if key in template_dict:
+ continue
+ value = strftime_or_none(template_dict.get(field), frmt, na)
+ if conv_type in 'crs': # string
+ value = sanitize(field, value)
+ else: # number
+ numeric_fields.append(key)
+ value = float_or_none(value, default=None)
+ if value is not None:
+ template_dict[key] = value
+
+ # Missing numeric fields used together with integer presentation types
+ # in format specification will break the argument substitution since
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
+ for numeric_field in numeric_fields:
+ if numeric_field not in template_dict:
+ outtmpl = re.sub(
+ FORMAT_RE.format(re.escape(numeric_field)),
+ r'%({0})s'.format(numeric_field), outtmpl)
+
+ return outtmpl, template_dict
+
def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
- template_dict = dict(info_dict)
-
- template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
- formatSeconds(info_dict['duration'], '-')
- if info_dict.get('duration', None) is not None
- else None)
-
- template_dict['epoch'] = int(time.time())
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
-
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
restricted=self.params.get('restrictfilenames'),
is_id=(k == 'id' or k.endswith('_id')))
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- na = self.params.get('outtmpl_na_placeholder', 'NA')
- template_dict = collections.defaultdict(lambda: na, template_dict)
-
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
- force_ext = OUTTMPL_TYPES.get(tmpl_type)
-
- # For fields playlist_index and autonumber convert all occurrences
- # of %(field)s to %(field)0Nd for backward compatibility
- field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
- }
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- (?P<type>[diouxXeEfFgGcrs%]) # conversion type
- '''
-
- numeric_fields = list(self._NUMERIC_FIELDS)
-
- # Format date
- FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
- for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
- conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
- if key in template_dict:
- continue
- value = strftime_or_none(template_dict.get(field), frmt, na)
- if conv_type in 'crs': # string
- value = sanitize(field, value)
- else: # number
- numeric_fields.append(key)
- value = float_or_none(value, default=None)
- if value is not None:
- template_dict[key] = value
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string NA placeholder is returned for missing fields. We will patch
- # output template for missing fields to meet string presentation type.
- for numeric_field in numeric_fields:
- if numeric_field not in template_dict:
- outtmpl = re.sub(
- FORMAT_RE.format(re.escape(numeric_field)),
- r'%({0})s'.format(numeric_field), outtmpl)
+ outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
+ force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None:
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
class MetadataFromFieldPP(PostProcessor):
- regex = r'(?P<field>\w+):(?P<format>.+)$'
+ regex = r'(?P<in>.+):(?P<out>.+)$'
def __init__(self, downloader, formats):
PostProcessor.__init__(self, downloader)
match = re.match(self.regex, f)
assert match is not None
self._data.append({
- 'field': match.group('field'),
- 'format': match.group('format'),
- 'regex': self.format_to_regex(match.group('format'))})
+ 'in': match.group('in'),
+ 'out': match.group('out'),
+ 'tmpl': self.field_to_template(match.group('in')),
+ 'regex': self.format_to_regex(match.group('out')),
+ })
- def format_to_regex(self, fmt):
+ @staticmethod
+ def field_to_template(tmpl):
+ if re.match(r'\w+$', tmpl):
+ return '%%(%s)s' % tmpl
+ return tmpl
+
+ @staticmethod
+ def format_to_regex(fmt):
r"""
Converts a string like
'%(title)s - %(artist)s'
# replace %(..)s with regex group and escape other string parts
for match in re.finditer(r'%\((\w+)\)s', fmt):
regex += re.escape(fmt[lastpos:match.start()])
- regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
+ regex += r'(?P<%s>[^\r\n]+)' % match.group(1)
lastpos = match.end()
if lastpos < len(fmt):
regex += re.escape(fmt[lastpos:])
def run(self, info):
for dictn in self._data:
- field, regex = dictn['field'], dictn['regex']
- if field not in info:
- self.report_warning('Video doesnot have a %s' % field)
- continue
- data_to_parse = str_or_none(info[field])
- if data_to_parse is None:
- self.report_warning('Field %s cannot be parsed' % field)
- continue
- self.write_debug('Searching for r"%s" in %s' % (regex, field))
- match = re.search(regex, data_to_parse)
+ tmpl, info_copy = self._downloader.prepare_outtmpl(dictn['tmpl'], info)
+ data_to_parse = tmpl % info_copy
+ self.write_debug('Searching for r"%s" in %s' % (dictn['regex'], tmpl))
+ match = re.search(dictn['regex'], data_to_parse)
if match is None:
- self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
+ self.report_warning('Could not interpret video %s as "%s"' % (dictn['in'], dictn['out']))
continue
for attribute, value in match.groupdict().items():
info[attribute] = value
- self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
+ self.to_screen('parsed %s from "%s": %s' % (attribute, dictn['in'], value if value is not None else 'NA'))
return [], info