--no-embed-thumbnail Do not embed thumbnail (default)
--add-metadata Write metadata to the video file
--no-add-metadata Do not write metadata (default)
- --metadata-from-title FORMAT Parse additional metadata like song title /
- artist from the video title. The format
- syntax is the same as --output. Regular
- expression with named capture groups may
- also be used. The parsed parameters replace
- existing values. Example: --metadata-from-
- title "%(artist)s - %(title)s" matches a
+ --parse-metadata FIELD:FORMAT Parse additional metadata like title/artist
+ from other fields. Give field name to
+ extract data from, and format of the field
+ seperated by a ":". The format syntax is
+ the same as --output. Regular expression
+ with named capture groups may also be used.
+ The parsed parameters replace existing
+ values. This option can be used multiple
+ times. Example: --parse-metadata
+ "title:%(artist)s - %(title)s" matches a
title like "Coldplay - Paradise". Example
- (regex): --metadata-from-title
- "(?P<artist>.+?) - (?P<title>.+)"
+ (regex): --parse-metadata
+ "description:Artist - (?P<artist>.+?)"
--xattrs Write metadata to the video file's xattrs
(using dublin core and xdg standards)
--fixup POLICY Automatically correct known faults of the
Plugins are loaded from `<root-dir>/ytdlp_plugins/<type>/__init__.py`. Currently only `extractor` plugins are supported. Support for `downloader` and `postprocessor` plugins may be added in the future. See [ytdlp_plugins](ytdlp_plugins) for example.
-**Note**: `<root-dir>` is the directory of the binary (`<root-dir>/youtube-dlc`), or the root directory of the module if you are running directly from source-code ((`<root dir>/youtube_dlc/__main__.py`)
+**Note**: `<root-dir>` is the directory of the binary (`<root-dir>/youtube-dlc`), or the root directory of the module if you are running directly from source-code (`<root dir>/youtube_dlc/__main__.py`)
# MORE
-For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl)
+For FAQ, Developer Instructions etc., see the [original README](https://github.com/ytdl-org/youtube-dl#faq)
import unittest
sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
-from youtube_dlc.postprocessor import MetadataFromTitlePP
+from youtube_dlc.postprocessor import MetadataFromFieldPP, MetadataFromTitlePP
+
+
+class TestMetadataFromField(unittest.TestCase):
+ def test_format_to_regex(self):
+ pp = MetadataFromFieldPP(None, ['title:%(title)s - %(artist)s'])
+ self.assertEqual(pp._data[0]['regex'], r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
class TestMetadataFromTitle(unittest.TestCase):
def test_format_to_regex(self):
pp = MetadataFromTitlePP(None, '%(title)s - %(artist)s')
- self.assertEqual(pp._titleregex, r'(?P<title>.+)\ \-\ (?P<artist>.+)')
+ self.assertEqual(pp._titleregex, r'(?P<title>[^\r\n]+)\ \-\ (?P<artist>[^\r\n]+)')
params = None
_ies = []
- _pps = []
- _pps_end = []
+ _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
__prepare_filename_warned = False
_download_retcode = None
_num_downloads = None
params = {}
self._ies = []
self._ies_instances = {}
- self._pps = []
- self._pps_end = []
+ self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
self.__prepare_filename_warned = False
self._post_hooks = []
self._progress_hooks = []
pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
del pp_def['key']
- after_move = pp_def.get('_after_move', False)
- if '_after_move' in pp_def:
- del pp_def['_after_move']
+ if 'when' in pp_def:
+ when = pp_def['when']
+ del pp_def['when']
+ else:
+ when = 'normal'
pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp, after_move=after_move)
+ self.add_post_processor(pp, when=when)
for ph in self.params.get('post_hooks', []):
self.add_post_hook(ph)
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp, after_move=False):
+ def add_post_processor(self, pp, when='normal'):
"""Add a PostProcessor object to the end of the chain."""
- if after_move:
- self._pps_end.append(pp)
- else:
- self._pps.append(pp)
+ self._pps[when].append(pp)
pp.set_downloader(self)
def add_post_hook(self, ph):
self._num_downloads += 1
+ info_dict = self.pre_process(info_dict)
+
filename = self.prepare_filename(info_dict, warn=True)
info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
temp_filename = self.prepare_filepath(filename, 'temp')
(k, v) for k, v in info_dict.items()
if k not in ['requested_formats', 'requested_subtitles'])
+ def run_pp(self, pp, infodict, files_to_move={}):
+ files_to_delete = []
+ try:
+ files_to_delete, infodict = pp.run(infodict)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if not files_to_delete:
+ return files_to_move, infodict
+
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ files_to_move.setdefault(f, '')
+ else:
+ for old_filename in set(files_to_delete):
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+ if old_filename in files_to_move:
+ del files_to_move[old_filename]
+ return files_to_move, infodict
+
+ def pre_process(self, ie_info):
+ info = dict(ie_info)
+ for pp in self._pps['beforedl']:
+ info = self.run_pp(pp, info)[1]
+ return info
+
def post_process(self, filename, ie_info, files_to_move={}):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- def run_pp(pp):
- files_to_delete = []
- infodict = info
- try:
- files_to_delete, infodict = pp.run(infodict)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if not files_to_delete:
- return infodict
-
- if self.params.get('keepvideo', False):
- for f in files_to_delete:
- files_to_move.setdefault(f, '')
- else:
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
- if old_filename in files_to_move:
- del files_to_move[old_filename]
- return infodict
-
- for pp in ie_info.get('__postprocessors', []) + self._pps:
- info = run_pp(pp)
- info = run_pp(MoveFilesAfterDownloadPP(self, files_to_move))
- files_to_move = {}
- for pp in self._pps_end:
- info = run_pp(pp)
+ for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+ files_to_move, info = self.run_pp(pp, info, files_to_move)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info, files_to_move)[1]
+ for pp in self._pps['aftermove']:
+ files_to_move, info = self.run_pp(pp, info, {})
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
from .extractor import gen_extractors, list_extractors
from .extractor.common import InfoExtractor
from .extractor.adobepass import MSO_INFO
+from .postprocessor.metadatafromfield import MetadataFromFieldPP
from .YoutubeDL import YoutubeDL
if re.match(InfoExtractor.FormatSort.regex, f) is None:
parser.error('invalid format sort string "%s" specified' % f)
+ if opts.metafromfield is None:
+ opts.metafromfield = []
+ if opts.metafromtitle is not None:
+ opts.metafromfield.append('title:%s' % opts.metafromtitle)
+ for f in opts.metafromfield:
+ if re.match(MetadataFromFieldPP.regex, f) is None:
+ parser.error('invalid format string "%s" specified for --parse-metadata' % f)
+
any_getting = opts.geturl or opts.gettitle or opts.getid or opts.getthumbnail or opts.getdescription or opts.getfilename or opts.getformat or opts.getduration or opts.dumpjson or opts.dump_single_json
any_printing = opts.print_json
download_archive_fn = expand_path(opts.download_archive) if opts.download_archive is not None else opts.download_archive
# PostProcessors
postprocessors = []
- if opts.metafromtitle:
+ if opts.metafromfield:
postprocessors.append({
- 'key': 'MetadataFromTitle',
- 'titleformat': opts.metafromtitle
+ 'key': 'MetadataFromField',
+ 'formats': opts.metafromfield,
+ 'when': 'beforedl'
})
if opts.extractaudio:
postprocessors.append({
postprocessors.append({
'key': 'ExecAfterDownload',
'exec_cmd': opts.exec_cmd,
- '_after_move': True
+ 'when': 'aftermove'
})
_args_compat_warning = 'WARNING: %s given without specifying name. The arguments will be given to all %s\n'
postproc.add_option(
'--metadata-from-title',
metavar='FORMAT', dest='metafromtitle',
+ help=optparse.SUPPRESS_HELP)
+ postproc.add_option(
+ '--parse-metadata',
+ metavar='FIELD:FORMAT', dest='metafromfield', action='append',
help=(
- 'Parse additional metadata like song title / artist from the video title. '
- 'The format syntax is the same as --output. Regular expression with '
- 'named capture groups may also be used. '
+ 'Parse additional metadata like title/artist from other fields. '
+ 'Give field name to extract data from, and format of the field seperated by a ":". '
+ 'The format syntax is the same as --output. '
+ 'Regular expression with named capture groups may also be used. '
'The parsed parameters replace existing values. '
- 'Example: --metadata-from-title "%(artist)s - %(title)s" matches a title like '
+ 'This option can be used multiple times. '
+ 'Example: --parse-metadata "title:%(artist)s - %(title)s" matches a title like '
'"Coldplay - Paradise". '
- 'Example (regex): --metadata-from-title "(?P<artist>.+?) - (?P<title>.+)"'))
+ 'Example (regex): --parse-metadata "description:Artist - (?P<artist>.+?)"'))
postproc.add_option(
'--xattrs',
action='store_true', dest='xattrs', default=False,
)
from .xattrpp import XAttrMetadataPP
from .execafterdownload import ExecAfterDownloadPP
-from .metadatafromtitle import MetadataFromTitlePP
+from .metadatafromfield import MetadataFromFieldPP
+from .metadatafromfield import MetadataFromTitlePP
from .movefilesafterdownload import MoveFilesAfterDownloadPP
from .sponskrub import SponSkrubPP
'FFmpegSubtitlesConvertorPP',
'FFmpegVideoConvertorPP',
'FFmpegVideoRemuxerPP',
+ 'MetadataFromFieldPP',
'MetadataFromTitlePP',
'MoveFilesAfterDownloadPP',
'SponSkrubPP',
--- /dev/null
+from __future__ import unicode_literals
+
+import re
+
+from .common import PostProcessor
+from ..compat import compat_str
+
+
+class MetadataFromFieldPP(PostProcessor):
+ regex = r'(?P<field>\w+):(?P<format>.+)$'
+
+ def __init__(self, downloader, formats):
+ PostProcessor.__init__(self, downloader)
+ assert isinstance(formats, (list, tuple))
+ self._data = []
+ for f in formats:
+ assert isinstance(f, compat_str)
+ match = re.match(self.regex, f)
+ assert match is not None
+ self._data.append({
+ 'field': match.group('field'),
+ 'format': match.group('format'),
+ 'regex': self.format_to_regex(match.group('format'))})
+
+ def format_to_regex(self, fmt):
+ r"""
+ Converts a string like
+ '%(title)s - %(artist)s'
+ to a regex like
+ '(?P<title>.+)\ \-\ (?P<artist>.+)'
+ """
+ if not re.search(r'%\(\w+\)s', fmt):
+ return fmt
+ lastpos = 0
+ regex = ''
+ # replace %(..)s with regex group and escape other string parts
+ for match in re.finditer(r'%\((\w+)\)s', fmt):
+ regex += re.escape(fmt[lastpos:match.start()])
+ regex += r'(?P<' + match.group(1) + r'>[^\r\n]+)'
+ lastpos = match.end()
+ if lastpos < len(fmt):
+ regex += re.escape(fmt[lastpos:])
+ return regex
+
+ def run(self, info):
+ for dictn in self._data:
+ field, regex = dictn['field'], dictn['regex']
+ if field not in info:
+ self.report_warning('Video doesnot have a %s' % field)
+ continue
+ self.write_debug('Searching for r"%s" in %s' % (regex, field))
+ match = re.search(regex, info[field])
+ if match is None:
+ self.report_warning('Could not interpret video %s as "%s"' % (field, dictn['format']))
+ continue
+ for attribute, value in match.groupdict().items():
+ info[attribute] = value
+ self.to_screen('parsed %s from %s: %s' % (attribute, field, value if value is not None else 'NA'))
+ return [], info
+
+
+class MetadataFromTitlePP(MetadataFromFieldPP): # for backward compatibility
+ def __init__(self, downloader, titleformat):
+ super(MetadataFromTitlePP, self).__init__(downloader, ['title:%s' % titleformat])
+ self._titleformat = titleformat
+ self._titleregex = self._data[0]['regex']
+++ /dev/null
-from __future__ import unicode_literals
-
-import re
-
-from .common import PostProcessor
-
-
-class MetadataFromTitlePP(PostProcessor):
- def __init__(self, downloader, titleformat):
- super(MetadataFromTitlePP, self).__init__(downloader)
- self._titleformat = titleformat
- self._titleregex = (self.format_to_regex(titleformat)
- if re.search(r'%\(\w+\)s', titleformat)
- else titleformat)
-
- def format_to_regex(self, fmt):
- r"""
- Converts a string like
- '%(title)s - %(artist)s'
- to a regex like
- '(?P<title>.+)\ \-\ (?P<artist>.+)'
- """
- lastpos = 0
- regex = ''
- # replace %(..)s with regex group and escape other string parts
- for match in re.finditer(r'%\((\w+)\)s', fmt):
- regex += re.escape(fmt[lastpos:match.start()])
- regex += r'(?P<' + match.group(1) + '>.+)'
- lastpos = match.end()
- if lastpos < len(fmt):
- regex += re.escape(fmt[lastpos:])
- return regex
-
- def run(self, info):
- title = info['title']
- match = re.match(self._titleregex, title)
- if match is None:
- self.to_screen('Could not interpret title of video as "%s"' % self._titleformat)
- return [], info
- for attribute, value in match.groupdict().items():
- info[attribute] = value
- self.to_screen('parsed %s: %s' % (attribute, value if value is not None else 'NA'))
-
- return [], info