DEFAULT_OUTTMPL,
determine_ext,
determine_protocol,
+ DOT_DESKTOP_LINK_TEMPLATE,
+ DOT_URL_LINK_TEMPLATE,
+ DOT_WEBLOC_LINK_TEMPLATE,
DownloadError,
encode_compat_str,
encodeFilename,
error_to_compat_str,
+ ExistingVideoReached,
expand_path,
ExtractorError,
format_bytes,
+ format_field,
formatSeconds,
GeoRestrictedError,
int_or_none,
+ iri_to_uri,
ISO3166Utils,
locked_file,
+ make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
register_socks_protocols,
render_table,
replace_extension,
+ RejectedVideoReached,
SameFileError,
sanitize_filename,
sanitize_path,
std_headers,
str_or_none,
subtitles_filename,
+ to_high_limit_path,
UnavailableVideoError,
url_basename,
version_tuple,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
+ process_communicate_or_kill,
)
from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
FFmpegFixupStretchedPP,
FFmpegMergerPP,
FFmpegPostProcessor,
- FFmpegSubtitlesConvertorPP,
+ # FFmpegSubtitlesConvertorPP,
get_postprocessor,
+ MoveFilesAfterDownloadPP,
)
from .version import __version__
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
+ force_write_download_archive: Force writing download archive regardless of
+ 'skip_download' or 'simulate'.
simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
+ format: Video format code. see "FORMAT SELECTION" for more details.
+ format_sort: How to sort the video formats. see "Sorting Formats" for more details.
+ format_sort_force: Force the given format_sort. see "Sorting Formats" for more details.
+ allow_multiple_video_streams: Allow multiple video streams to be merged into a single file
+ allow_multiple_audio_streams: Allow multiple audio streams to be merged into a single file
outtmpl: Template for output names.
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
restrictfilenames: Do not allow "&" and spaces in file names
- ignoreerrors: Do not stop on download errors.
+ trim_file_name: Limit length of filename (extension excluded)
+ ignoreerrors: Do not stop on download errors
+ (Default True when running youtube-dlc,
+ but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
- nooverwrites: Prevent overwriting files.
+ overwrites: Overwrite all video and metadata files if True,
+ overwrite only non-video files if None
+ and don't overwrite any file if False
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writecomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
write_all_thumbnails: Write all thumbnail formats to files
+ writelink: Write an internet shortcut file, depending on the
+ current platform (.url/.webloc/.desktop)
+ writeurllink: Write a Windows internet shortcut file (.url)
+ writewebloclink: Write a macOS internet shortcut file (.webloc)
+ writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
- cookiefile: File name where cookies should be read from and dumped to.
+ break_on_existing: Stop the download process after attempting to download a
+ file that is in the archive.
+ break_on_reject: Stop the download process when encountering a video that
+ has been filtered out.
+ cookiefile: File name where cookies should be read from and dumped to
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
youtube_dlc/postprocessor/__init__.py for a list.
+ * _after_move: Optional. If True, run this post_processor
+ after 'MoveFilesAfterDownload'
as well as any further keyword arguments for the
postprocessor.
+ post_hooks: A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
merge_output_format: Extension to use when merging formats.
+ final_ext: Expected final extension; used to detect when the file was
+ already downloaded and converted. "merge_output_format" is
+ replaced by this extension when given
fixup: Automatically correct known faults of the file.
One of:
- "never": do nothing
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
+ otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
- postprocessor_args: A list of additional command-line arguments for the
- postprocessor.
-
+ postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
+ and a list of additional command-line arguments for the
+ postprocessor/executable. The dict can also have "PP+EXE" keys
+ which are used when the given exe is used by the given PP.
+ Use 'default' as the name for arguments to passed to all PP
The following options are used by the Youtube extractor:
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
params = None
_ies = []
- _pps = []
+ _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ __prepare_filename_warned = False
_download_retcode = None
_num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
_screen_file = None
def __init__(self, params=None, auto_init=True):
params = {}
self._ies = []
self._ies_instances = {}
- self._pps = []
+ self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ self.__prepare_filename_warned = False
+ self._post_hooks = []
self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+ if self.params.get('final_ext'):
+ if self.params.get('merge_output_format'):
+ self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
+ self.params['merge_output_format'] = self.params['final_ext']
+
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
del pp_def['key']
+ if 'when' in pp_def:
+ when = pp_def['when']
+ del pp_def['when']
+ else:
+ when = 'normal'
pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
+ self.add_post_processor(pp, when=when)
+
+ for ph in self.params.get('post_hooks', []):
+ self.add_post_hook(ph)
for ph in self.params.get('progress_hooks', []):
self.add_progress_hook(ph)
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp):
+ def add_post_processor(self, pp, when='normal'):
"""Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
+ self._pps[when].append(pp)
pp.set_downloader(self)
+ def add_post_hook(self, ph):
+ """Add the post hook"""
+ self._post_hooks.append(ph)
+
def add_progress_hook(self, ph):
"""Add the progress hook (currently only for the file downloader)"""
self._progress_hooks.append(ph)
except UnicodeEncodeError:
self.to_screen('[download] The file has already been downloaded')
- def prepare_filename(self, info_dict):
+ def report_file_delete(self, file_name):
+ """Report that existing file will be deleted."""
+ try:
+ self.to_screen('Deleting already existent file %s' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('Deleting already existent file')
+
+ def prepare_filename(self, info_dict, warn=False):
"""Generate the output filename."""
try:
template_dict = dict(info_dict)
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ template_dict = collections.defaultdict(lambda: self.params.get('outtmpl_na_placeholder', 'NA'), template_dict)
outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
for numeric_field in self._NUMERIC_FIELDS:
if numeric_field not in template_dict:
# As of [1] format syntax is:
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
+ # https://github.com/blackjack4494/youtube-dlc/issues/85
+ trim_file_name = self.params.get('trim_file_name', False)
+ if trim_file_name:
+ fn_groups = filename.rsplit('.')
+ ext = fn_groups[-1]
+ sub_ext = ''
+ if len(fn_groups) > 2:
+ sub_ext = fn_groups[-2]
+ filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
+
# Temporary fix for #4787
# 'Treat' all problem characters by passing filename through preferredencoding
# to workaround encoding issues with subprocess on python2 @ Windows
if sys.version_info < (3, 0) and sys.platform == 'win32':
filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
+ filename = sanitize_path(filename)
+
+ if warn and not self.__prepare_filename_warned:
+ if not self.params.get('paths'):
+ pass
+ elif filename == '-':
+ self.report_warning('--paths is ignored when an outputting to stdout')
+ elif os.path.isabs(filename):
+ self.report_warning('--paths is ignored since an absolute path is given in output template')
+ self.__prepare_filename_warned = True
+
+ return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
+ def prepare_filepath(self, filename, dir_type=''):
+ if filename == '-':
+ return filename
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict)
+ homepath = expand_path(paths.get('home', '').strip())
+ assert isinstance(homepath, compat_str)
+ subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
+ assert isinstance(subdir, compat_str)
+ return sanitize_path(os.path.join(homepath, subdir, filename))
+
def _match_entry(self, info_dict, incomplete):
""" Returns None if the file should be downloaded """
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- if 'title' in info_dict:
- # This can happen when we're just evaluating the playlist
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return '"' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
- date = info_dict.get('upload_date')
- if date is not None:
- dateRange = self.params.get('daterange', DateRange())
- if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
- view_count = info_dict.get('view_count')
- if view_count is not None:
- min_views = self.params.get('min_views')
- if min_views is not None and view_count < min_views:
- return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
- max_views = self.params.get('max_views')
- if max_views is not None and view_count > max_views:
- return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
-
- if not incomplete:
- match_filter = self.params.get('match_filter')
- if match_filter is not None:
- ret = match_filter(info_dict)
- if ret is not None:
- return ret
-
- return None
+ def check_filter():
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+ if self.in_download_archive(info_dict):
+ return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+ return None
+
+ reason = check_filter()
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
+ elif self.params.get('break_on_reject', False):
+ raise RejectedVideoReached()
+ return reason
@staticmethod
def add_extra_info(info_dict, extra_info):
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, extra_info={},
+ def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
process=True, force_generic_extractor=False):
'''
Returns a list with a dictionary for each video we find.
if not ie.suitable(url):
continue
- ie = self.get_info_extractor(ie.ie_key())
+ ie_key = ie.ie_key()
+ ie = self.get_info_extractor(ie_key)
if not ie.working():
self.report_warning('The program functionality for this site has been marked as broken, '
'and will probably not work.')
try:
- ie_result = ie.extract(url)
- if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
- break
- if isinstance(ie_result, list):
- # Backwards compatibility: old IE result format
- ie_result = {
- '_type': 'compat_list',
- 'entries': ie_result,
- }
- self.add_default_extra_info(ie_result, ie, url)
- if process:
- return self.process_ie_result(ie_result, download, extra_info)
- else:
- return ie_result
+ temp_id = str_or_none(
+ ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
+ else ie._match_id(url))
+ except (AssertionError, IndexError, AttributeError):
+ temp_id = None
+ if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
+ self.to_screen("[%s] %s: has already been recorded in archive" % (
+ ie_key, temp_id))
+ break
+ return self.__extract_info(url, ie, download, extra_info, process, info_dict)
+ else:
+ self.report_error('no suitable InfoExtractor for URL %s' % url)
+
+ def __handle_extraction_exceptions(func):
+ def wrapper(self, *args, **kwargs):
+ try:
+ return func(self, *args, **kwargs)
except GeoRestrictedError as e:
msg = e.msg
if e.countries:
map(ISO3166Utils.short2full, e.countries))
msg += '\nYou might want to use a VPN or a proxy server (with --proxy) to workaround.'
self.report_error(msg)
- break
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- break
- except MaxDownloadsReached:
+ except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
- break
else:
raise
+ return wrapper
+
+ @__handle_extraction_exceptions
+ def __extract_info(self, url, ie, download, extra_info, process, info_dict):
+ ie_result = ie.extract(url)
+ if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
+ return
+ if isinstance(ie_result, list):
+ # Backwards compatibility: old IE result format
+ ie_result = {
+ '_type': 'compat_list',
+ 'entries': ie_result,
+ }
+ if info_dict:
+ if info_dict.get('id'):
+ ie_result['id'] = info_dict['id']
+ if info_dict.get('title'):
+ ie_result['title'] = info_dict['title']
+ self.add_default_extra_info(ie_result, ie, url)
+ if process:
+ return self.process_ie_result(ie_result, download, extra_info)
else:
- self.report_error('no suitable InfoExtractor for URL %s' % url)
+ return ie_result
def add_default_extra_info(self, ie_result, ie, url):
self.add_extra_info(ie_result, {
'extractor': ie.IE_NAME,
'webpage_url': url,
+ 'duration_string': (
+ formatSeconds(ie_result['duration'], '-')
+ if ie_result.get('duration', None) is not None
+ else None),
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
self.__forced_printings(
- ie_result, self.prepare_filename(ie_result),
+ ie_result,
+ self.prepare_filepath(self.prepare_filename(ie_result)),
incomplete=True)
return ie_result
# We have to add extra_info to the results because it may be
# contained in a playlist
return self.extract_info(ie_result['url'],
- download,
+ download, info_dict=ie_result,
ie_key=ie_result.get('ie_key'),
extra_info=extra_info)
elif result_type == 'url_transparent':
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
-
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result['webpage_url']
+ if webpage_url in self._playlist_urls:
self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- self.to_screen('[download] ' + reason)
- continue
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
- entry_result = self.process_ie_result(entry,
- download=download,
- extra_info=extra)
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
else:
raise Exception('Invalid result type: %s' % result_type)
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+ ie_copy = {
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0
+ }
+ ie_copy.update(dict(ie_result))
+
+ def ensure_dir_exists(path):
+ return make_dir(path, self.report_error)
+
+ if self.params.get('writeinfojson', False):
+ infofn = replace_extension(
+ self.prepare_filepath(self.prepare_filename(ie_copy), 'infojson'),
+ 'info.json', ie_result.get('ext'))
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Playlist metadata is already present')
+ else:
+ self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
+ playlist_info = dict(ie_result)
+ playlist_info.pop('entries')
+ try:
+ write_json_file(self.filter_requested_info(playlist_info), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
+
+ if self.params.get('writedescription', False):
+ descfn = replace_extension(
+ self.prepare_filepath(self.prepare_filename(ie_copy), 'description'),
+ 'description', ie_result.get('ext'))
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Playlist description is already present')
+ elif ie_result.get('description') is None:
+ self.report_warning('There\'s no playlist description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing playlist description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist description file ' + descfn)
+ return
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
+ @__handle_extraction_exceptions
+ def __process_iterable_entry(self, entry, download, extra_info):
+ return self.process_ie_result(
+ entry, download=download, extra_info=extra_info)
+
def _build_format_filter(self, filter_spec):
" Returns a function to filter the formats according to the filter_spec "
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+ \s*(?P<key>[a-zA-Z0-9._-]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- def prefer_best():
- if self.params.get('simulate', False):
- return False
- if not download:
- return False
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
- return True
- if info_dict.get('is_live'):
- return True
- if not can_merge():
- return True
- return False
-
- req_format_list = ['bestvideo+bestaudio', 'best']
- if prefer_best():
- req_format_list.reverse()
- return '/'.join(req_format_list)
+ prefer_best = (
+ not self.params.get('simulate', False)
+ and download
+ and (
+ not can_merge()
+ or info_dict.get('is_live', False)
+ or self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-'))
+
+ return (
+ 'best/bestvideo+bestaudio'
+ if prefer_best
+ else 'bestvideo*+bestaudio/best'
+ if not self.params.get('allow_multiple_audio_streams', False)
+ else 'bestvideo+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+ allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+ 'video': self.params.get('allow_multiple_video_streams', False)}
+
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
return selectors
def _build_selector_function(selector):
- if isinstance(selector, list):
+ if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
def selector_function(ctx):
for format in f(ctx):
yield format
return selector_function
- elif selector.type == GROUP:
+
+ elif selector.type == GROUP: # ()
selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
+
+ elif selector.type == PICKFIRST: # /
fs = [_build_selector_function(s) for s in selector.selector]
def selector_function(ctx):
if picked_formats:
return picked_formats
return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
+ elif selector.type == SINGLE: # atom
+ format_spec = selector.selector if selector.selector is not None else 'best'
+
+ if format_spec == 'all':
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if formats:
+ for f in formats:
+ yield f
+
+ else:
+ format_fallback = False
+ format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
+ if format_spec_obj is not None:
+ format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
+ format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
+ not_format_type = 'v' if format_type == 'a' else 'a'
+ format_modified = format_spec_obj.group(3) is not None
+
+ format_fallback = not format_type and not format_modified # for b, w
+ filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get(not_format_type + 'codec') == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else None) # b*, w*
else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
+ format_idx = -1
+ filter_f = ((lambda f: f.get('ext') == format_spec)
+ if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
+ else (lambda f: f.get('format_id') == format_spec)) # id
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ matches = list(filter(filter_f, formats)) if filter_f is not None else formats
if matches:
- yield matches[-1]
- elif selector.type == MERGE:
+ yield matches[format_idx]
+ elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ yield formats[format_idx]
+
+ elif selector.type == MERGE: # +
def _merge(formats_pair):
format_1, format_2 = formats_pair
formats_info.extend(format_1.get('requested_formats', (format_1,)))
formats_info.extend(format_2.get('requested_formats', (format_2,)))
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {"video": False, "audio": False}
+ for (i, fmt_info) in enumerate(formats_info):
+ for aud_vid in ["audio", "video"]:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
if req_format is None:
req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'):
- self.to_stdout('[debug] Default format spec: %s' % req_format)
+ self._write_string('[debug] Default format spec: %s\n' % req_format)
format_selector = self.build_format_selector(req_format)
expected=True)
if download:
+ self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
if len(formats_to_download) > 1:
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
for format in formats_to_download:
assert info_dict.get('_type', 'video') == 'video'
+ info_dict.setdefault('__postprocessors', [])
+
max_downloads = self.params.get('max_downloads')
if max_downloads is not None:
if self._num_downloads >= int(max_downloads):
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict, incomplete=False)
- if reason is not None:
- self.to_screen('[download] ' + reason)
+ if self._match_entry(info_dict, incomplete=False) is not None:
return
self._num_downloads += 1
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+ info_dict = self.pre_process(info_dict)
+
+ filename = self.prepare_filename(info_dict, warn=True)
+ info_dict['_filename'] = full_filename = self.prepare_filepath(filename)
+ temp_filename = self.prepare_filepath(filename, 'temp')
+ files_to_move = {}
# Forced printings
- self.__forced_printings(info_dict, filename, incomplete=False)
+ self.__forced_printings(info_dict, full_filename, incomplete=False)
- # Do nothing else if in simulate mode
if self.params.get('simulate', False):
+ if self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+
+ # Do nothing else if in simulate mode
return
if filename is None:
return
def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
+ return make_dir(path, self.report_error)
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ if not ensure_dir_exists(encodeFilename(full_filename)):
+ return
+ if not ensure_dir_exists(encodeFilename(temp_filename)):
return
if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+ descfn = replace_extension(
+ self.prepare_filepath(filename, 'description'),
+ 'description', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
self.report_warning('There\'s no description to write.')
return
if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ annofn = replace_extension(
+ self.prepare_filepath(filename, 'annotation'),
+ 'annotations.xml', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(annofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.')
self.report_error('Cannot write annotations file: ' + annofn)
return
- def dl(name, info):
+ def dl(name, info, subtitle=False):
fd = get_suitable_downloader(info, self.params)(self, self.params)
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info)
+ self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
+ return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
# subtitles download errors are already managed as troubles in relevant IE
# that way it will silently go on when used with unsupporting IE
subtitles = info_dict['requested_subtitles']
+ # ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(
+ self.prepare_filepath(filename, 'subtitle'),
+ sub_lang, sub_format, info_dict.get('ext'))
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ files_to_move[sub_filename] = sub_filename_final
else:
+ self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
try:
# Use newline='' to prevent conversion of newline characters
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
+ files_to_move[sub_filename] = sub_filename_final
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
try:
- dl(sub_filename, sub_info)
- except (ExtractorError, IOError, OSError, ValueError,
- compat_urllib_error.URLError,
- compat_http_client.HTTPException,
- socket.error) as err:
+ dl(sub_filename, sub_info, subtitle=True)
+ '''
+ if self.params.get('sleep_interval_subtitles', False):
+ dl(sub_filename, sub_info)
+ else:
+ sub_data = ie._request_webpage(
+ sub_info['url'], info_dict['id'], note=False).read()
+ with io.open(encodeFilename(sub_filename), 'wb') as subfile:
+ subfile.write(sub_data)
+ '''
+ files_to_move[sub_filename] = sub_filename_final
+ except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
if self.params.get('skip_download', False):
if self.params.get('convertsubtitles', False):
- subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
+ # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
filename_real_ext = os.path.splitext(filename)[1][1:]
filename_wo_ext = (
- os.path.splitext(filename)[0]
+ os.path.splitext(full_filename)[0]
if filename_real_ext == info_dict['ext']
- else filename)
+ else full_filename)
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
- if subconv.available:
- info_dict.setdefault('__postprocessors', [])
- # info_dict['__postprocessors'].append(subconv)
+ # if subconv.available:
+ # info_dict['__postprocessors'].append(subconv)
if os.path.exists(encodeFilename(afilename)):
self.to_screen(
'[download] %s has already been downloaded and '
'converted' % afilename)
else:
try:
- self.post_process(filename, info_dict)
+ self.post_process(full_filename, info_dict, files_to_move)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
return
if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
+ infofn = replace_extension(
+ self.prepare_filepath(filename, 'infojson'),
+ 'info.json', info_dict.get('ext'))
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Video metadata is already present')
else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
try:
write_json_file(self.filter_requested_info(info_dict), infofn)
except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
+ self.report_error('Cannot write video metadata to JSON file ' + infofn)
return
+ info_dict['__infojson_filepath'] = infofn
+
+ thumbdir = os.path.dirname(self.prepare_filepath(filename, 'thumbnail'))
+ for thumbfn in self._write_thumbnails(info_dict, temp_filename):
+ files_to_move[thumbfn] = os.path.join(thumbdir, os.path.basename(thumbfn))
+
+ # Write internet shortcut files
+ url_link = webloc_link = desktop_link = False
+ if self.params.get('writelink', False):
+ if sys.platform == "darwin": # macOS.
+ webloc_link = True
+ elif sys.platform.startswith("linux"):
+ desktop_link = True
+ else: # if sys.platform in ['win32', 'cygwin']:
+ url_link = True
+ if self.params.get('writeurllink', False):
+ url_link = True
+ if self.params.get('writewebloclink', False):
+ webloc_link = True
+ if self.params.get('writedesktoplink', False):
+ desktop_link = True
+
+ if url_link or webloc_link or desktop_link:
+ if 'webpage_url' not in info_dict:
+ self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+ return
+ ascii_url = iri_to_uri(info_dict['webpage_url'])
- self._write_thumbnails(info_dict, filename)
+ def _write_link_file(extension, template, newline, embed_filename):
+ linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
+ self.to_screen('[info] Internet shortcut is already present')
+ else:
+ try:
+ self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+ with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+ template_vars = {'url': ascii_url}
+ if embed_filename:
+ template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+ linkfile.write(template % template_vars)
+ except (OSError, IOError):
+ self.report_error('Cannot write internet shortcut ' + linkfn)
+ return False
+ return True
+ if url_link:
+ if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+ return
+ if webloc_link:
+ if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+ return
+ if desktop_link:
+ if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+ return
+
+ # Download
+ must_record_download_archive = False
if not self.params.get('skip_download', False):
try:
+
+ def existing_file(*filepaths):
+ ext = info_dict.get('ext')
+ final_ext = self.params.get('final_ext', ext)
+ existing_files = []
+ for file in orderedSet(filepaths):
+ if final_ext != ext:
+ converted = replace_extension(file, final_ext, ext)
+ if os.path.exists(encodeFilename(converted)):
+ existing_files.append(converted)
+ if os.path.exists(encodeFilename(file)):
+ existing_files.append(file)
+
+ if not existing_files or self.params.get('overwrites', False):
+ for file in orderedSet(existing_files):
+ self.report_file_delete(file)
+ os.remove(encodeFilename(file))
+ return None
+
+ self.report_file_already_downloaded(existing_files[0])
+ info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
+ return existing_files[0]
+
+ success = True
if info_dict.get('requested_formats') is not None:
downloaded = []
- success = True
merger = FFmpegMergerPP(self)
if not merger.available:
postprocessors = []
self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
+ 'formats but ffmpeg is not installed.'
' The formats won\'t be merged.')
else:
postprocessors = [merger]
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
requested_formats = info_dict['requested_formats']
+ old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+
+ def correct_ext(filename):
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == old_ext
+ else filename)
+ return '%s.%s' % (filename_wo_ext, info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- if os.path.exists(encodeFilename(filename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
- else:
+ full_filename = correct_ext(full_filename)
+ temp_filename = correct_ext(temp_filename)
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
fname = prepend_extension(
- self.prepare_filename(new_info),
+ self.prepare_filepath(self.prepare_filename(new_info), 'temp'),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
- partial_success = dl(fname, new_info)
+ partial_success, real_download = dl(fname, new_info)
success = success and partial_success
info_dict['__postprocessors'] = postprocessors
info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
else:
# Just a single file
- success = dl(filename, info_dict)
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
+ success, real_download = dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+
+ dl_filename = dl_filename or temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
if fixup_policy is None:
fixup_policy = 'detect_or_warn'
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
elif fixup_policy == 'detect_or_warn':
stretched_pp = FFmpegFixupStretchedPP(self)
if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(stretched_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
if (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'):
+ and info_dict.get('container') == 'm4a_dash'
+ and info_dict.get('ext') == 'm4a'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
try:
- self.post_process(filename, info_dict)
+ self.post_process(dl_filename, info_dict, files_to_move)
except (PostProcessingError) as err:
self.report_error('postprocessing: %s' % str(err))
return
- self.record_download_archive(info_dict)
+ try:
+ for ph in self._post_hooks:
+ ph(full_filename)
+ except Exception as err:
+ self.report_error('post hooks: %s' % str(err))
+ return
+ must_record_download_archive = True
+
+ if must_record_download_archive or self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None and self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached.')
+ self.to_screen('[info] Maximum number of downloaded files reached')
+ raise
+ except ExistingVideoReached:
+ self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+ raise
+ except RejectedVideoReached:
+ self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
raise
else:
if self.params.get('dump_single_json', False):
(k, v) for k, v in info_dict.items()
if k not in ['requested_formats', 'requested_subtitles'])
- def post_process(self, filename, ie_info):
+ def run_pp(self, pp, infodict, files_to_move={}):
+ files_to_delete = []
+ try:
+ files_to_delete, infodict = pp.run(infodict)
+ except PostProcessingError as e:
+ self.report_error(e.msg)
+ if not files_to_delete:
+ return files_to_move, infodict
+
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ files_to_move.setdefault(f, '')
+ else:
+ for old_filename in set(files_to_delete):
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+ if old_filename in files_to_move:
+ del files_to_move[old_filename]
+ return files_to_move, infodict
+
+ def pre_process(self, ie_info):
+ info = dict(ie_info)
+ for pp in self._pps['beforedl']:
+ info = self.run_pp(pp, info)[1]
+ return info
+
+ def post_process(self, filename, ie_info, files_to_move={}):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
+
+ for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+ files_to_move, info = self.run_pp(pp, info, files_to_move)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info, files_to_move)[1]
+ for pp in self._pps['aftermove']:
+ files_to_move, info = self.run_pp(pp, info, {})
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
break
else:
return
- return extractor.lower() + ' ' + video_id
+ return '%s %s' % (extractor.lower(), video_id)
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
res += '~' + format_bytes(fdict['filesize_approx'])
return res
+ def _format_note_table(self, f):
+ def join_fields(*vargs):
+ return ', '.join((val for val in vargs if val != ''))
+
+ return join_fields(
+ 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
+ format_field(f, 'language', '[%s]'),
+ format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ format_field(f, 'asr', '%5dHz'))
+
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- table = [
- [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
- header_line = ['format code', 'extension', 'resolution', 'note']
+ new_format = self.params.get('listformats_table', False)
+ if new_format:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ format_field(f, 'fps', '%d'),
+ '|',
+ format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
+ format_field(f, 'tbr', '%4dk'),
+ f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n"),
+ '|',
+ format_field(f, 'vcodec', default='unknown').replace('none', ''),
+ format_field(f, 'vbr', '%4dk'),
+ format_field(f, 'acodec', default='unknown').replace('none', ''),
+ format_field(f, 'abr', '%3dk'),
+ format_field(f, 'asr', '%5dHz'),
+ self._format_note_table(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
+ '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
+ else:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['format code', 'extension', 'resolution', 'note']
+
+ # if len(formats) > 1:
+ # table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
self.to_screen(
- '[info] Available formats for %s:\n%s' %
- (info_dict['id'], render_table(header_line, table)))
+ '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
+ header_line,
+ table,
+ delim=new_format,
+ extraGap=(0 if new_format else 1),
+ hideEmpty=new_format)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
self.get_encoding()))
write_string(encoding_str, encoding=None)
- self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
+ self._write_string('[debug] yt-dlp version %s\n' % __version__)
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ self._write_string('[debug] Lazy loading extractors enabled\n')
+ if _PLUGIN_CLASSES:
+ self._write_string(
+ '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ self._write_string('[debug] Git HEAD: %s\n' % out)
except Exception:
try:
sys.exc_clear()
if self.params.get('call_home', False):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ return
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
if thumbnails:
thumbnails = [thumbnails[-1]]
elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
+ thumbnails = info_dict.get('thumbnails') or []
else:
- return
-
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
+ thumbnails = []
+ ret = []
for t in thumbnails:
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+ t['filename'] = thumb_filename = replace_extension(filename + suffix, thumb_ext, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
+ ret.append(thumb_filename)
self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
else:
uf = self.urlopen(t['url'])
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
+ ret.append(thumb_filename)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], error_to_compat_str(err)))
+ return ret