iri_to_uri,
ISO3166Utils,
locked_file,
+ make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
process_communicate_or_kill,
)
from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
FFmpegFixupStretchedPP,
FFmpegMergerPP,
FFmpegPostProcessor,
- FFmpegSubtitlesConvertorPP,
+ # FFmpegSubtitlesConvertorPP,
get_postprocessor,
+ MoveFilesAfterDownloadPP,
)
from .version import __version__
allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
- restrictfilenames: Do not allow "&" and spaces in file names.
- trim_file_name: Limit length of filename (extension excluded).
- ignoreerrors: Do not stop on download errors. (Default True when running youtube-dlc, but False when directly accessing YoutubeDL class)
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ trim_file_name: Limit length of filename (extension excluded)
+ ignoreerrors: Do not stop on download errors
+ (Default True when running youtube-dlc,
+ but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writecomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
- break_on_existing: Stop the download process after attempting to download a file that's
- in the archive.
- break_on_reject: Stop the download process when encountering a video that has been filtered out.
- cookiefile: File name where cookies should be read from and dumped to.
+ break_on_existing: Stop the download process after attempting to download a
+ file that is in the archive.
+ break_on_reject: Stop the download process when encountering a video that
+ has been filtered out.
+ cookiefile: File name where cookies should be read from and dumped to
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
youtube_dlc/postprocessor/__init__.py for a list.
+ * _after_move: Optional. If True, run this post_processor
+ after 'MoveFilesAfterDownload'
as well as any further keyword arguments for the
postprocessor.
post_hooks: A list of functions that get called as the final step
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
merge_output_format: Extension to use when merging formats.
+ final_ext: Expected final extension; used to detect when the file was
+ already downloaded and converted. "merge_output_format" is
+ replaced by this extension when given
fixup: Automatically correct known faults of the file.
One of:
- "never": do nothing
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
+ otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
- postprocessor_args: A dictionary of postprocessor names (in lower case) and a list
- of additional command-line arguments for the postprocessor.
- Use 'default' as the name for arguments to passed to all PP.
-
+ postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
+ and a list of additional command-line arguments for the
+ postprocessor/executable. The dict can also have "PP+EXE" keys
+ which are used when the given exe is used by the given PP.
+ Use 'default' as the name for arguments to passed to all PP
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
params = None
_ies = []
- _pps = []
+ _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ __prepare_filename_warned = False
_download_retcode = None
_num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
_screen_file = None
def __init__(self, params=None, auto_init=True):
params = {}
self._ies = []
self._ies_instances = {}
- self._pps = []
+ self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ self.__prepare_filename_warned = False
self._post_hooks = []
self._progress_hooks = []
self._download_retcode = 0
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+ if self.params.get('final_ext'):
+ if self.params.get('merge_output_format'):
+ self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
+ self.params['merge_output_format'] = self.params['final_ext']
+
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
del pp_def['key']
+ if 'when' in pp_def:
+ when = pp_def['when']
+ del pp_def['when']
+ else:
+ when = 'normal'
pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
+ self.add_post_processor(pp, when=when)
for ph in self.params.get('post_hooks', []):
self.add_post_hook(ph)
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp):
+ def add_post_processor(self, pp, when='normal'):
"""Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
+ self._pps[when].append(pp)
pp.set_downloader(self)
def add_post_hook(self, ph):
# already of type unicode()
ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
elif 'TERM' in os.environ:
- self._write_string('\033[0;%s\007' % message, self._screen_file)
+ self._write_string('\033]0;%s\007' % message, self._screen_file)
def save_console_title(self):
if not self.params.get('consoletitle', False):
except UnicodeEncodeError:
self.to_screen('Deleting already existent file')
- def prepare_filename(self, info_dict):
+ def prepare_filename(self, info_dict, warn=False):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# to workaround encoding issues with subprocess on python2 @ Windows
if sys.version_info < (3, 0) and sys.platform == 'win32':
filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
+ filename = sanitize_path(filename)
+
+ if warn and not self.__prepare_filename_warned:
+ if not self.params.get('paths'):
+ pass
+ elif filename == '-':
+ self.report_warning('--paths is ignored when an outputting to stdout')
+ elif os.path.isabs(filename):
+ self.report_warning('--paths is ignored since an absolute path is given in output template')
+ self.__prepare_filename_warned = True
+
+ return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
+ def prepare_filepath(self, filename, dir_type=''):
+ if filename == '-':
+ return filename
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict)
+ homepath = expand_path(paths.get('home', '').strip())
+ assert isinstance(homepath, compat_str)
+ subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
+ assert isinstance(subdir, compat_str)
+ return sanitize_path(os.path.join(homepath, subdir, filename))
+
def _match_entry(self, info_dict, incomplete):
""" Returns None if the file should be downloaded """
reason = check_filter()
if reason is not None:
self.to_screen('[download] ' + reason)
- if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
+ if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
raise ExistingVideoReached()
- elif self.params.get('break_on_reject'):
+ elif self.params.get('break_on_reject', False):
raise RejectedVideoReached()
return reason
'and will probably not work.')
try:
- temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+ temp_id = str_or_none(
+ ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
+ else ie._match_id(url))
except (AssertionError, IndexError, AttributeError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
-
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
-
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
self.__forced_printings(
- ie_result, self.prepare_filename(ie_result),
+ ie_result,
+ self.prepare_filepath(self.prepare_filename(ie_result)),
incomplete=True)
return ie_result
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result['webpage_url']
+ if webpage_url in self._playlist_urls:
self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- self._match_entry(entry, incomplete=True)
-
- entry_result = self.__process_iterable_entry(entry, download, extra)
- # TODO: skip failed (empty) entries?
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
else:
raise Exception('Invalid result type: %s' % result_type)
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+ ie_copy = {
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0
+ }
+ ie_copy.update(dict(ie_result))
+
+ def ensure_dir_exists(path):
+ return make_dir(path, self.report_error)
+
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(
+ self.prepare_filepath(self.prepare_filename(ie_copy), 'infojson'),
+ 'info.json', ie_result.get('ext'))
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Playlist metadata is already present')
+ else:
+ self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
+ playlist_info = dict(ie_result)
+ playlist_info.pop('entries')
+ try:
+ write_json_file(self.filter_requested_info(playlist_info), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
+
+ if self.params.get('writedescription', False):
+ descfn = replace_extension(
+ self.prepare_filepath(self.prepare_filename(ie_copy), 'description'),
+ 'description', ie_result.get('ext'))
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Playlist description is already present')
+ elif ie_result.get('description') is None:
+ self.report_warning('There\'s no playlist description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing playlist description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist description file ' + descfn)
+ return
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
assert info_dict.get('_type', 'video') == 'video'
+ info_dict.setdefault('__postprocessors', [])
+
max_downloads = self.params.get('max_downloads')
if max_downloads is not None:
if self._num_downloads >= int(max_downloads):
self._num_downloads += 1
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+ info_dict = self.pre_process(info_dict)
+
+ filename = self.prepare_filename(info_dict, warn=True)
+ info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
+ temp_filename = self.prepare_filepath(filename, 'temp')
+ files_to_move = {}
# Forced printings
- self.__forced_printings(info_dict, filename, incomplete=False)
+ self.__forced_printings(info_dict, full_filename, incomplete=False)
if self.params.get('simulate', False):
if self.params.get('force_write_download_archive', False):
return
def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
+ return make_dir(path, self.report_error)
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ if not ensure_dir_exists(encodeFilename(full_filename)):
+ return
+ if not ensure_dir_exists(encodeFilename(temp_filename)):
return
if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
+ descfn = replace_extension(
+ self.prepare_filepath(filename, 'description'),
+ 'description', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
return
if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
+ annofn = replace_extension(
+ self.prepare_filepath(filename, 'annotation'),
+ 'annotations.xml', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(annofn)):
+ return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'):
# ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(
+ self.prepare_filepath(filename, 'subtitle'),
+ sub_lang, sub_format, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ files_to_move[sub_filename] = sub_filename_final
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
+ files_to_move[sub_filename] = sub_filename_final
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
'''
+ files_to_move[sub_filename] = sub_filename_final
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
if self.params.get('skip_download', False):
if self.params.get('convertsubtitles', False):
- subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
+ # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
filename_real_ext = os.path.splitext(filename)[1][1:]
filename_wo_ext = (
- os.path.splitext(filename)[0]
+ os.path.splitext(full_filename)[0]
if filename_real_ext == info_dict['ext']
- else filename)
+ else full_filename)
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
- if subconv.available:
- info_dict.setdefault('__postprocessors', [])
- # info_dict['__postprocessors'].append(subconv)
+ # if subconv.available:
+ # info_dict['__postprocessors'].append(subconv)
if os.path.exists(encodeFilename(afilename)):
self.to_screen(
'[download] %s has already been downloaded and '
'converted' % afilename)
else:
try:
- self.post_process(filename, info_dict)
+ self.post_process(full_filename, info_dict, files_to_move)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
return
if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
+ infofn = replace_extension(
+ self.prepare_filepath(filename, 'infojson'),
+ 'info.json', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
+ self.to_screen('[info] Video metadata is already present')
else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
try:
write_json_file(self.filter_requested_info(info_dict), infofn)
except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
+ self.report_error('Cannot write video metadata to JSON file ' + infofn)
return
+ info_dict['__infojson_filepath'] = infofn
- self._write_thumbnails(info_dict, filename)
+ thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
+ for thumbfn in self._write_thumbnails(info_dict, temp_filename):
+ files_to_move[thumbfn] = os.path.join(thumbdir, os.path.basename(thumbfn))
# Write internet shortcut files
url_link = webloc_link = desktop_link = False
ascii_url = iri_to_uri(info_dict['webpage_url'])
def _write_link_file(extension, template, newline, embed_filename):
- linkfn = replace_extension(filename, extension, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(linkfn)):
+ linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
self.to_screen('[info] Internet shortcut is already present')
else:
try:
must_record_download_archive = False
if not self.params.get('skip_download', False):
try:
+
+ def existing_file(*filepaths):
+ ext = info_dict.get('ext')
+ final_ext = self.params.get('final_ext', ext)
+ existing_files = []
+ for file in orderedSet(filepaths):
+ if final_ext != ext:
+ converted = replace_extension(file, final_ext, ext)
+ if os.path.exists(encodeFilename(converted)):
+ existing_files.append(converted)
+ if os.path.exists(encodeFilename(file)):
+ existing_files.append(file)
+
+ if not existing_files or self.params.get('overwrites', False):
+ for file in orderedSet(existing_files):
+ self.report_file_delete(file)
+ os.remove(encodeFilename(file))
+ return None
+
+ self.report_file_already_downloaded(existing_files[0])
+ info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
+ return existing_files[0]
+
+ success = True
if info_dict.get('requested_formats') is not None:
downloaded = []
- success = True
merger = FFmpegMergerPP(self)
if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
+ 'formats but ffmpeg is not installed.'
' The formats won\'t be merged.')
else:
postprocessors = [merger]
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
requested_formats = info_dict['requested_formats']
+ old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+
+ def correct_ext(filename):
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == old_ext
+ else filename)
+ return '%s.%s' % (filename_wo_ext, info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- file_exists = os.path.exists(encodeFilename(filename))
- if not self.params.get('overwrites', False) and file_exists:
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
- else:
- if file_exists:
- self.report_file_delete(filename)
- os.remove(encodeFilename(filename))
+ full_filename = correct_ext(full_filename)
+ temp_filename = correct_ext(temp_filename)
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
fname = prepend_extension(
- self.prepare_filename(new_info),
+ self.prepare_filepath(self.prepare_filename(new_info), 'temp'),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
# Even if there were no downloads, it is being merged only now
info_dict['__real_download'] = True
else:
- # Delete existing file with --yes-overwrites
- if self.params.get('overwrites', False):
- if os.path.exists(encodeFilename(filename)):
- self.report_file_delete(filename)
- os.remove(encodeFilename(filename))
# Just a single file
- success, real_download = dl(filename, info_dict)
- info_dict['__real_download'] = real_download
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
+ success, real_download = dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+
+ dl_filename = dl_filename or temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
if fixup_policy is None:
fixup_policy = 'detect_or_warn'
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
elif fixup_policy == 'detect_or_warn':
stretched_pp = FFmpegFixupStretchedPP(self)
if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(stretched_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
if (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'):
+ and info_dict.get('container') == 'm4a_dash'
+ and info_dict.get('ext') == 'm4a'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
try:
- self.post_process(filename, info_dict)
+ self.post_process(dl_filename, info_dict, files_to_move)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
return
try:
for ph in self._post_hooks:
- ph(filename)
+ ph(full_filename)
except Exception as err:
self.report_error('post hooks: %s' % str(err))
return
self.to_screen('[info] Maximum number of downloaded files reached')
raise
except ExistingVideoReached:
- self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
+ self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
raise
except RejectedVideoReached:
- self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+ self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
raise
else:
if self.params.get('dump_single_json', False):
(k, v) for k, v in info_dict.items()
if k not in ['requested_formats', 'requested_subtitles'])
- def post_process(self, filename, ie_info):
+ def run_pp(self, pp, infodict, files_to_move={}):
+ files_to_delete = []
+ try:
+ files_to_delete, infodict = pp.run(infodict)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if not files_to_delete:
+ return files_to_move, infodict
+
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ files_to_move.setdefault(f, '')
+ else:
+ for old_filename in set(files_to_delete):
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+ if old_filename in files_to_move:
+ del files_to_move[old_filename]
+ return files_to_move, infodict
+
+ def pre_process(self, ie_info):
+ info = dict(ie_info)
+ for pp in self._pps['beforedl']:
+ info = self.run_pp(pp, info)[1]
+ return info
+
+ def post_process(self, filename, ie_info, files_to_move={}):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
+
+ for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+ files_to_move, info = self.run_pp(pp, info, files_to_move)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info, files_to_move)[1]
+ for pp in self._pps['aftermove']:
+ files_to_move, info = self.run_pp(pp, info, {})
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
break
else:
return
- return extractor.lower() + ' ' + video_id
+ return '%s %s' % (extractor.lower(), video_id)
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
self.get_encoding()))
write_string(encoding_str, encoding=None)
- self._write_string('[debug] yt-dlp version ' + __version__ + '\n')
+ self._write_string('[debug] yt-dlp version %s\n' % __version__)
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ self._write_string('[debug] Lazy loading extractors enabled\n')
+ if _PLUGIN_CLASSES:
+ self._write_string(
+ '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ self._write_string('[debug] Git HEAD: %s\n' % out)
except Exception:
try:
sys.exc_clear()
if thumbnails:
thumbnails = [thumbnails[-1]]
elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
+ thumbnails = info_dict.get('thumbnails') or []
else:
- return
-
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
+ thumbnails = []
+ ret = []
for t in thumbnails:
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
+ ret.append(thumb_filename)
self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
else:
uf = self.urlopen(t['url'])
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
+ ret.append(thumb_filename)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], error_to_compat_str(err)))
+ return ret