import random
from string import ascii_letters
+from zipimport import zipimporter
from .compat import (
compat_basestring,
date_from_str,
DateRange,
DEFAULT_OUTTMPL,
+ OUTTMPL_TYPES,
determine_ext,
determine_protocol,
+ DOT_DESKTOP_LINK_TEMPLATE,
+ DOT_URL_LINK_TEMPLATE,
+ DOT_WEBLOC_LINK_TEMPLATE,
DownloadError,
encode_compat_str,
encodeFilename,
error_to_compat_str,
+ ExistingVideoReached,
expand_path,
ExtractorError,
+ float_or_none,
format_bytes,
+ format_field,
formatSeconds,
GeoRestrictedError,
int_or_none,
+ iri_to_uri,
ISO3166Utils,
locked_file,
+ make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
orderedSet,
register_socks_protocols,
render_table,
replace_extension,
+ RejectedVideoReached,
SameFileError,
sanitize_filename,
sanitize_path,
sanitized_Request,
std_headers,
str_or_none,
+ strftime_or_none,
subtitles_filename,
+ to_high_limit_path,
UnavailableVideoError,
url_basename,
version_tuple,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
+ process_communicate_or_kill,
)
from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER
+from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
from .extractor.openload import PhantomJSwrapper
from .downloader import get_suitable_downloader
from .downloader.rtmp import rtmpdump_version
FFmpegFixupStretchedPP,
FFmpegMergerPP,
FFmpegPostProcessor,
- FFmpegSubtitlesConvertorPP,
+ # FFmpegSubtitlesConvertorPP,
get_postprocessor,
+ MoveFilesAfterDownloadPP,
)
from .version import __version__
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
+ force_write_download_archive: Force writing download archive regardless
+ of 'skip_download' or 'simulate'.
simulate: Do not download the video files.
- format: Video format code. See options.py for more information.
- outtmpl: Template for output names.
- restrictfilenames: Do not allow "&" and spaces in file names.
- trim_file_name: Limit length of filename (extension excluded).
- ignoreerrors: Do not stop on download errors.
+ format: Video format code. see "FORMAT SELECTION" for more details.
+ allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
+ format_sort: How to sort the video formats. see "Sorting Formats"
+ for more details.
+ format_sort_force: Force the given format_sort. see "Sorting Formats"
+ for more details.
+ allow_multiple_video_streams: Allow multiple video streams to be merged
+ into a single file
+ allow_multiple_audio_streams: Allow multiple audio streams to be merged
+ into a single file
+ paths: Dictionary of output paths. The allowed keys are 'home'
+ 'temp' and the keys of OUTTMPL_TYPES (in utils.py)
+ outtmpl: Dictionary of templates for output names. Allowed keys
+ are 'default' and the keys of OUTTMPL_TYPES (in utils.py).
+ A string a also accepted for backward compatibility
+ outtmpl_na_placeholder: Placeholder for unavailable meta fields.
+ restrictfilenames: Do not allow "&" and spaces in file names
+ trim_file_name: Limit length of filename (extension excluded)
+ windowsfilenames: Force the filenames to be windows compatible
+ ignoreerrors: Do not stop on download errors
+ (Default True when running youtube-dlc,
+ but False when directly accessing YoutubeDL class)
force_generic_extractor: Force downloader to use the generic extractor
- nooverwrites: Prevent overwriting files.
+ overwrites: Overwrite all video and metadata files if True,
+ overwrite only non-video files if None
+ and don't overwrite any file if False
playliststart: Playlist item to start at.
playlistend: Playlist item to end at.
playlist_items: Specific indices of playlist to download.
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ writecomments: Extract video comments. This will not be written to disk
+ unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
writethumbnail: Write the thumbnail image to a file
+ allow_playlist_files: Whether to write playlists' description, infojson etc
+ also to disk when using the 'write*' options
write_all_thumbnails: Write all thumbnail formats to files
+ writelink: Write an internet shortcut file, depending on the
+ current platform (.url/.webloc/.desktop)
+ writeurllink: Write a Windows internet shortcut file (.url)
+ writewebloclink: Write a macOS internet shortcut file (.webloc)
+ writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
allsubtitles: Downloads all the subtitles of the video
download_archive: File name of a file where all downloads are recorded.
Videos already present in the file are not downloaded
again.
- break_on_existing: Stop the download process after attempting to download a file that's
- in the archive.
- cookiefile: File name where cookies should be read from and dumped to.
+ break_on_existing: Stop the download process after attempting to download a
+ file that is in the archive.
+ break_on_reject: Stop the download process when encountering a video that
+ has been filtered out.
+ cookiefile: File name where cookies should be read from and dumped to
nocheckcertificate:Do not verify SSL certificates
prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
At the moment, this is only supported by YouTube.
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
youtube_dlc/postprocessor/__init__.py for a list.
+ * _after_move: Optional. If True, run this post_processor
+ after 'MoveFilesAfterDownload'
as well as any further keyword arguments for the
postprocessor.
+ post_hooks: A list of functions that get called as the final step
+ for each video file, after all postprocessors have been
+ called. The filename will be passed as the only argument.
progress_hooks: A list of functions that get called on download
progress, with a dictionary with the entries
* status: One of "downloading", "error", or "finished".
Progress hooks are guaranteed to be called at least once
(with status "finished") if the download is successful.
merge_output_format: Extension to use when merging formats.
+ final_ext: Expected final extension; used to detect when the file was
+ already downloaded and converted. "merge_output_format" is
+ replaced by this extension when given
fixup: Automatically correct known faults of the file.
One of:
- "never": do nothing
The following options are used by the post processors:
prefer_ffmpeg: If False, use avconv instead of ffmpeg if both are available,
- otherwise prefer ffmpeg.
+ otherwise prefer ffmpeg. (avconv support is deprecated)
ffmpeg_location: Location of the ffmpeg/avconv binary; either the path
to the binary or its containing directory.
- postprocessor_args: A list of additional command-line arguments for the
- postprocessor.
-
- The following options are used by the Youtube extractor:
+ postprocessor_args: A dictionary of postprocessor/executable keys (in lower case)
+ and a list of additional command-line arguments for the
+ postprocessor/executable. The dict can also have "PP+EXE" keys
+ which are used when the given exe is used by the given PP.
+ Use 'default' as the name for arguments to passed to all PP
+
+ The following options are used by the extractors:
+ dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
+ hls_split_discontinuity: Split HLS playlists to different formats at
+ discontinuities such as ad breaks (default: False)
youtube_include_dash_manifest: If True (default), DASH manifests and related
data will be downloaded and processed by extractor.
You can reduce network I/O by disabling it if you don't
- care about DASH.
+ care about DASH. (only for youtube)
+ youtube_include_hls_manifest: If True (default), HLS manifests and related
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about HLS. (only for youtube)
"""
_NUMERIC_FIELDS = set((
params = None
_ies = []
- _pps = []
+ _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ __prepare_filename_warned = False
_download_retcode = None
_num_downloads = None
+ _playlist_level = 0
+ _playlist_urls = set()
_screen_file = None
def __init__(self, params=None, auto_init=True):
params = {}
self._ies = []
self._ies_instances = {}
- self._pps = []
+ self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ self.__prepare_filename_warned = False
+ self._post_hooks = []
self._progress_hooks = []
self._download_retcode = 0
self._num_downloads = 0
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+ if self.params.get('final_ext'):
+ if self.params.get('merge_output_format'):
+ self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
+ self.params['merge_output_format'] = self.params['final_ext']
+
+ if 'overwrites' in self.params and self.params['overwrites'] is None:
+ del self.params['overwrites']
+
check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
'Set the LC_ALL environment variable to fix this.')
self.params['restrictfilenames'] = True
- if isinstance(params.get('outtmpl'), bytes):
- self.report_warning(
- 'Parameter outtmpl is bytes, but should be a unicode string. '
- 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ self.outtmpl_dict = self.parse_outtmpl()
self._setup_opener()
pp_class = get_postprocessor(pp_def_raw['key'])
pp_def = dict(pp_def_raw)
del pp_def['key']
+ if 'when' in pp_def:
+ when = pp_def['when']
+ del pp_def['when']
+ else:
+ when = 'normal'
pp = pp_class(self, **compat_kwargs(pp_def))
- self.add_post_processor(pp)
+ self.add_post_processor(pp, when=when)
+
+ for ph in self.params.get('post_hooks', []):
+ self.add_post_hook(ph)
for ph in self.params.get('progress_hooks', []):
self.add_progress_hook(ph)
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp):
+ def add_post_processor(self, pp, when='normal'):
"""Add a PostProcessor object to the end of the chain."""
- self._pps.append(pp)
+ self._pps[when].append(pp)
pp.set_downloader(self)
+ def add_post_hook(self, ph):
+ """Add the post hook"""
+ self._post_hooks.append(ph)
+
def add_progress_hook(self, ph):
"""Add the progress hook (currently only for the file downloader)"""
self._progress_hooks.append(ph)
except UnicodeEncodeError:
self.to_screen('[download] The file has already been downloaded')
- def prepare_filename(self, info_dict):
- """Generate the output filename."""
+ def report_file_delete(self, file_name):
+ """Report that existing file will be deleted."""
+ try:
+ self.to_screen('Deleting existing file %s' % file_name)
+ except UnicodeEncodeError:
+ self.to_screen('Deleting existing file')
+
+ def parse_outtmpl(self):
+ outtmpl_dict = self.params.get('outtmpl', {})
+ if not isinstance(outtmpl_dict, dict):
+ outtmpl_dict = {'default': outtmpl_dict}
+ outtmpl_dict.update({
+ k: v for k, v in DEFAULT_OUTTMPL.items()
+ if not outtmpl_dict.get(k)})
+ for key, val in outtmpl_dict.items():
+ if isinstance(val, bytes):
+ self.report_warning(
+ 'Parameter outtmpl is bytes, but should be a unicode string. '
+ 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
+ return outtmpl_dict
+
+ def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
template_dict = dict(info_dict)
+ template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-')
+ if info_dict.get('duration', None) is not None
+ else None)
+
template_dict['epoch'] = int(time.time())
autonumber_size = self.params.get('autonumber_size')
if autonumber_size is None:
template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
for k, v in template_dict.items()
if v is not None and not isinstance(v, (list, tuple, dict)))
- template_dict = collections.defaultdict(lambda: 'NA', template_dict)
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
+ template_dict = collections.defaultdict(lambda: na, template_dict)
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
+ force_ext = OUTTMPL_TYPES.get(tmpl_type)
# For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
outtmpl)
+ # As of [1] format syntax is:
+ # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
+ # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
+ FORMAT_RE = r'''(?x)
+ (?<!%)
+ %
+ \({0}\) # mapping key
+ (?:[#0\-+ ]+)? # conversion flags (optional)
+ (?:\d+)? # minimum field width (optional)
+ (?:\.\d+)? # precision (optional)
+ [hlL]? # length modifier (optional)
+ (?P<type>[diouxXeEfFgGcrs%]) # conversion type
+ '''
+
+ numeric_fields = list(self._NUMERIC_FIELDS)
+
+ # Format date
+ FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
+ for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
+ conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
+ if key in template_dict:
+ continue
+ value = strftime_or_none(template_dict.get(field), frmt, na)
+ if conv_type in 'crs': # string
+ value = sanitize(field, value)
+ else: # number
+ numeric_fields.append(key)
+ value = float_or_none(value, default=None)
+ if value is not None:
+ template_dict[key] = value
+
# Missing numeric fields used together with integer presentation types
# in format specification will break the argument substitution since
- # string 'NA' is returned for missing fields. We will patch output
- # template for missing fields to meet string presentation type.
- for numeric_field in self._NUMERIC_FIELDS:
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
+ for numeric_field in numeric_fields:
if numeric_field not in template_dict:
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- [diouxXeEfFgGcrs%] # conversion type
- '''
outtmpl = re.sub(
- FORMAT_RE.format(numeric_field),
+ FORMAT_RE.format(re.escape(numeric_field)),
r'%({0})s'.format(numeric_field), outtmpl)
# expand_path translates '%%' into '%' and '$$' into '$'
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
+ if force_ext is not None:
+ filename = replace_extension(filename, force_ext, template_dict.get('ext'))
+
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
if trim_file_name:
sub_ext = fn_groups[-2]
filename = '.'.join(filter(None, [fn_groups[0][:trim_file_name], sub_ext, ext]))
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- filename = encodeFilename(filename, True).decode(preferredencoding())
- return sanitize_path(filename)
+ return filename
except ValueError as err:
self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
return None
+ def prepare_filename(self, info_dict, dir_type='', warn=False):
+ """Generate the output filename."""
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict)
+ filename = self._prepare_filename(info_dict, dir_type or 'default')
+
+ if warn and not self.__prepare_filename_warned:
+ if not paths:
+ pass
+ elif filename == '-':
+ self.report_warning('--paths is ignored when an outputting to stdout')
+ elif os.path.isabs(filename):
+ self.report_warning('--paths is ignored since an absolute path is given in output template')
+ self.__prepare_filename_warned = True
+ if filename == '-' or not filename:
+ return filename
+
+ homepath = expand_path(paths.get('home', '').strip())
+ assert isinstance(homepath, compat_str)
+ subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
+ assert isinstance(subdir, compat_str)
+ path = os.path.join(homepath, subdir, filename)
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ path = encodeFilename(path, True).decode(preferredencoding())
+ return sanitize_path(path, force=self.params.get('windowsfilenames'))
+
def _match_entry(self, info_dict, incomplete):
""" Returns None if the file should be downloaded """
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
- if 'title' in info_dict:
- # This can happen when we're just evaluating the playlist
- title = info_dict['title']
- matchtitle = self.params.get('matchtitle', False)
- if matchtitle:
- if not re.search(matchtitle, title, re.IGNORECASE):
- return '"' + title + '" title did not match pattern "' + matchtitle + '"'
- rejecttitle = self.params.get('rejecttitle', False)
- if rejecttitle:
- if re.search(rejecttitle, title, re.IGNORECASE):
- return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
- date = info_dict.get('upload_date')
- if date is not None:
- dateRange = self.params.get('daterange', DateRange())
- if date not in dateRange:
- return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
- view_count = info_dict.get('view_count')
- if view_count is not None:
- min_views = self.params.get('min_views')
- if min_views is not None and view_count < min_views:
- return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
- max_views = self.params.get('max_views')
- if max_views is not None and view_count > max_views:
- return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
- if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
- return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
-
- if not incomplete:
- match_filter = self.params.get('match_filter')
- if match_filter is not None:
- ret = match_filter(info_dict)
- if ret is not None:
- return ret
-
- return None
+ def check_filter():
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+ if 'title' in info_dict:
+ # This can happen when we're just evaluating the playlist
+ title = info_dict['title']
+ matchtitle = self.params.get('matchtitle', False)
+ if matchtitle:
+ if not re.search(matchtitle, title, re.IGNORECASE):
+ return '"' + title + '" title did not match pattern "' + matchtitle + '"'
+ rejecttitle = self.params.get('rejecttitle', False)
+ if rejecttitle:
+ if re.search(rejecttitle, title, re.IGNORECASE):
+ return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
+ date = info_dict.get('upload_date')
+ if date is not None:
+ dateRange = self.params.get('daterange', DateRange())
+ if date not in dateRange:
+ return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
+ view_count = info_dict.get('view_count')
+ if view_count is not None:
+ min_views = self.params.get('min_views')
+ if min_views is not None and view_count < min_views:
+ return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
+ max_views = self.params.get('max_views')
+ if max_views is not None and view_count > max_views:
+ return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
+ if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
+ return 'Skipping "%s" because it is age restricted' % video_title
+ if self.in_download_archive(info_dict):
+ return '%s has already been recorded in archive' % video_title
+
+ if not incomplete:
+ match_filter = self.params.get('match_filter')
+ if match_filter is not None:
+ ret = match_filter(info_dict)
+ if ret is not None:
+ return ret
+ return None
+
+ reason = check_filter()
+ if reason is not None:
+ self.to_screen('[download] ' + reason)
+ if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
+ raise ExistingVideoReached()
+ elif self.params.get('break_on_reject', False):
+ raise RejectedVideoReached()
+ return reason
@staticmethod
def add_extra_info(info_dict, extra_info):
'and will probably not work.')
try:
- temp_id = ie.extract_id(url) if callable(getattr(ie, 'extract_id', None)) else ie._match_id(url)
+ temp_id = str_or_none(
+ ie.extract_id(url) if callable(getattr(ie, 'extract_id', None))
+ else ie._match_id(url))
except (AssertionError, IndexError, AttributeError):
temp_id = None
if temp_id is not None and self.in_download_archive({'id': temp_id, 'ie_key': ie_key}):
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
-
return self.__extract_info(url, ie, download, extra_info, process, info_dict)
-
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
self.report_error(msg)
except ExtractorError as e: # An error we somewhat expected
self.report_error(compat_str(e), e.format_traceback())
- except MaxDownloadsReached:
+ except (MaxDownloadsReached, ExistingVideoReached, RejectedVideoReached):
raise
except Exception as e:
if self.params.get('ignoreerrors', False):
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
- self.__forced_printings(
- ie_result, self.prepare_filename(ie_result),
- incomplete=True)
+ self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
return ie_result
if result_type == 'video':
return self.process_ie_result(
new_result, download=download, extra_info=extra_info)
elif result_type in ('playlist', 'multi_video'):
- # We process each entry in the playlist
- playlist = ie_result.get('title') or ie_result.get('id')
- self.to_screen('[download] Downloading playlist: %s' % playlist)
-
- playlist_results = []
-
- playliststart = self.params.get('playliststart', 1) - 1
- playlistend = self.params.get('playlistend')
- # For backwards compatibility, interpret -1 as whole list
- if playlistend == -1:
- playlistend = None
-
- playlistitems_str = self.params.get('playlist_items')
- playlistitems = None
- if playlistitems_str is not None:
- def iter_playlistitems(format):
- for string_segment in format.split(','):
- if '-' in string_segment:
- start, end = string_segment.split('-')
- for item in range(int(start), int(end) + 1):
- yield int(item)
- else:
- yield int(string_segment)
- playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
-
- ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
+ # Protect from infinite recursion due to recursively nested playlists
+ # (see https://github.com/ytdl-org/youtube-dl/issues/27833)
+ webpage_url = ie_result['webpage_url']
+ if webpage_url in self._playlist_urls:
self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
+ '[download] Skipping already downloaded playlist: %s'
+ % ie_result.get('title') or ie_result.get('id'))
+ return
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- report_download(n_entries)
- else: # iterable
- if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- report_download(n_entries)
-
- if self.params.get('playlistreverse', False):
- entries = entries[::-1]
-
- if self.params.get('playlistrandom', False):
- random.shuffle(entries)
-
- x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
-
- for i, entry in enumerate(entries, 1):
- self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
- # This __x_forwarded_for_ip thing is a bit ugly but requires
- # minimal changes
- if x_forwarded_for:
- entry['__x_forwarded_for_ip'] = x_forwarded_for
- extra = {
- 'n_entries': n_entries,
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
- 'extractor': ie_result['extractor'],
- 'webpage_url': ie_result['webpage_url'],
- 'webpage_url_basename': url_basename(ie_result['webpage_url']),
- 'extractor_key': ie_result['extractor_key'],
- }
-
- reason = self._match_entry(entry, incomplete=True)
- if reason is not None:
- if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing'):
- print('[download] tried downloading a file that\'s already in the archive, stopping since --break-on-existing is set.')
- break
- else:
- self.to_screen('[download] ' + reason)
- continue
-
- entry_result = self.__process_iterable_entry(entry, download, extra)
- # TODO: skip failed (empty) entries?
- playlist_results.append(entry_result)
- ie_result['entries'] = playlist_results
- self.to_screen('[download] Finished downloading playlist: %s' % playlist)
- return ie_result
+ self._playlist_level += 1
+ self._playlist_urls.add(webpage_url)
+ try:
+ return self.__process_playlist(ie_result, download)
+ finally:
+ self._playlist_level -= 1
+ if not self._playlist_level:
+ self._playlist_urls.clear()
elif result_type == 'compat_list':
self.report_warning(
'Extractor %s returned a compat_list result. '
else:
raise Exception('Invalid result type: %s' % result_type)
+ def __process_playlist(self, ie_result, download):
+ # We process each entry in the playlist
+ playlist = ie_result.get('title') or ie_result.get('id')
+ self.to_screen('[download] Downloading playlist: %s' % playlist)
+
+ if self.params.get('allow_playlist_files', True):
+ ie_copy = {
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0
+ }
+ ie_copy.update(dict(ie_result))
+
+ def ensure_dir_exists(path):
+ return make_dir(path, self.report_error)
+
+ if self.params.get('writeinfojson', False):
+ infofn = self.prepare_filename(ie_copy, 'pl_infojson')
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Playlist metadata is already present')
+ else:
+ playlist_info = dict(ie_result)
+ # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
+ del playlist_info['entries']
+ self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
+ try:
+ write_json_file(self.filter_requested_info(playlist_info), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
+
+ if self.params.get('writedescription', False):
+ descfn = self.prepare_filename(ie_copy, 'pl_description')
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Playlist description is already present')
+ elif ie_result.get('description') is None:
+ self.report_warning('There\'s no playlist description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing playlist description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist description file ' + descfn)
+ return
+
+ playlist_results = []
+
+ playliststart = self.params.get('playliststart', 1) - 1
+ playlistend = self.params.get('playlistend')
+ # For backwards compatibility, interpret -1 as whole list
+ if playlistend == -1:
+ playlistend = None
+
+ playlistitems_str = self.params.get('playlist_items')
+ playlistitems = None
+ if playlistitems_str is not None:
+ def iter_playlistitems(format):
+ for string_segment in format.split(','):
+ if '-' in string_segment:
+ start, end = string_segment.split('-')
+ for item in range(int(start), int(end) + 1):
+ yield int(item)
+ else:
+ yield int(string_segment)
+ playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
+
+ ie_entries = ie_result['entries']
+
+ def make_playlistitems_entries(list_ie_entries):
+ num_entries = len(list_ie_entries)
+ return [
+ list_ie_entries[i - 1] for i in playlistitems
+ if -num_entries <= i - 1 < num_entries]
+
+ def report_download(num_entries):
+ self.to_screen(
+ '[%s] playlist %s: Downloading %d videos' %
+ (ie_result['extractor'], playlist, num_entries))
+
+ if isinstance(ie_entries, list):
+ n_all_entries = len(ie_entries)
+ if playlistitems:
+ entries = make_playlistitems_entries(ie_entries)
+ else:
+ entries = ie_entries[playliststart:playlistend]
+ n_entries = len(entries)
+ self.to_screen(
+ '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
+ (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ elif isinstance(ie_entries, PagedList):
+ if playlistitems:
+ entries = []
+ for item in playlistitems:
+ entries.extend(ie_entries.getslice(
+ item - 1, item
+ ))
+ else:
+ entries = ie_entries.getslice(
+ playliststart, playlistend)
+ n_entries = len(entries)
+ report_download(n_entries)
+ else: # iterable
+ if playlistitems:
+ entries = make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems))))
+ else:
+ entries = list(itertools.islice(
+ ie_entries, playliststart, playlistend))
+ n_entries = len(entries)
+ report_download(n_entries)
+
+ if self.params.get('playlistreverse', False):
+ entries = entries[::-1]
+
+ if self.params.get('playlistrandom', False):
+ random.shuffle(entries)
+
+ x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+
+ for i, entry in enumerate(entries, 1):
+ self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
+ # This __x_forwarded_for_ip thing is a bit ugly but requires
+ # minimal changes
+ if x_forwarded_for:
+ entry['__x_forwarded_for_ip'] = x_forwarded_for
+ extra = {
+ 'n_entries': n_entries,
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'extractor': ie_result['extractor'],
+ 'webpage_url': ie_result['webpage_url'],
+ 'webpage_url_basename': url_basename(ie_result['webpage_url']),
+ 'extractor_key': ie_result['extractor_key'],
+ }
+
+ if self._match_entry(entry, incomplete=True) is not None:
+ continue
+
+ entry_result = self.__process_iterable_entry(entry, download, extra)
+ # TODO: skip failed (empty) entries?
+ playlist_results.append(entry_result)
+ ie_result['entries'] = playlist_results
+ self.to_screen('[download] Finished downloading playlist: %s' % playlist)
+ return ie_result
+
@__handle_extraction_exceptions
def __process_iterable_entry(self, entry, download, extra_info):
return self.process_ie_result(
'*=': lambda attr, value: value in attr,
}
str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>ext|acodec|vcodec|container|protocol|format_id)
+ \s*(?P<key>[a-zA-Z0-9._-]+)
\s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
\s*(?P<value>[a-zA-Z0-9._-]+)
\s*$
merger = FFmpegMergerPP(self)
return merger.available and merger.can_merge()
- def prefer_best():
- if self.params.get('simulate', False):
- return False
- if not download:
- return False
- if self.params.get('outtmpl', DEFAULT_OUTTMPL) == '-':
- return True
- if info_dict.get('is_live'):
- return True
- if not can_merge():
- return True
- return False
-
- req_format_list = ['bestvideo+bestaudio', 'best']
- if prefer_best():
- req_format_list.reverse()
- return '/'.join(req_format_list)
+ prefer_best = (
+ not self.params.get('simulate', False)
+ and download
+ and (
+ not can_merge()
+ or info_dict.get('is_live', False)
+ or self.outtmpl_dict['default'] == '-'))
+
+ return (
+ 'best/bestvideo+bestaudio'
+ if prefer_best
+ else 'bestvideo*+bestaudio/best'
+ if not self.params.get('allow_multiple_audio_streams', False)
+ else 'bestvideo+bestaudio/best')
def build_format_selector(self, format_spec):
def syntax_error(note, start):
GROUP = 'GROUP'
FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
+ allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
+ 'video': self.params.get('allow_multiple_video_streams', False)}
+
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
return selectors
def _build_selector_function(selector):
- if isinstance(selector, list):
+ if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
def selector_function(ctx):
for format in f(ctx):
yield format
return selector_function
- elif selector.type == GROUP:
+
+ elif selector.type == GROUP: # ()
selector_function = _build_selector_function(selector.selector)
- elif selector.type == PICKFIRST:
+
+ elif selector.type == PICKFIRST: # /
fs = [_build_selector_function(s) for s in selector.selector]
def selector_function(ctx):
if picked_formats:
return picked_formats
return []
- elif selector.type == SINGLE:
- format_spec = selector.selector
- def selector_function(ctx):
- formats = list(ctx['formats'])
- if not formats:
- return
- if format_spec == 'all':
- for f in formats:
- yield f
- elif format_spec in ['best', 'worst', None]:
- format_idx = 0 if format_spec == 'worst' else -1
- audiovideo_formats = [
- f for f in formats
- if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
- if audiovideo_formats:
- yield audiovideo_formats[format_idx]
- # for extractors with incomplete formats (audio only (soundcloud)
- # or video only (imgur)) we will fallback to best/worst
- # {video,audio}-only format
- elif ctx['incomplete_formats']:
- yield formats[format_idx]
- elif format_spec == 'bestaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[-1]
- elif format_spec == 'worstaudio':
- audio_formats = [
- f for f in formats
- if f.get('vcodec') == 'none']
- if audio_formats:
- yield audio_formats[0]
- elif format_spec == 'bestvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[-1]
- elif format_spec == 'worstvideo':
- video_formats = [
- f for f in formats
- if f.get('acodec') == 'none']
- if video_formats:
- yield video_formats[0]
+ elif selector.type == SINGLE: # atom
+ format_spec = selector.selector if selector.selector is not None else 'best'
+
+ if format_spec == 'all':
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if formats:
+ for f in formats:
+ yield f
+
+ else:
+ format_fallback = False
+ format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
+ if format_spec_obj is not None:
+ format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
+ format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
+ not_format_type = 'v' if format_type == 'a' else 'a'
+ format_modified = format_spec_obj.group(3) is not None
+
+ format_fallback = not format_type and not format_modified # for b, w
+ filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get(not_format_type + 'codec') == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else None) # b*, w*
else:
- extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
- if format_spec in extensions:
- filter_f = lambda f: f['ext'] == format_spec
- else:
- filter_f = lambda f: f['format_id'] == format_spec
- matches = list(filter(filter_f, formats))
+ format_idx = -1
+ filter_f = ((lambda f: f.get('ext') == format_spec)
+ if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
+ else (lambda f: f.get('format_id') == format_spec)) # id
+
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ if not formats:
+ return
+ matches = list(filter(filter_f, formats)) if filter_f is not None else formats
if matches:
- yield matches[-1]
- elif selector.type == MERGE:
+ yield matches[format_idx]
+ elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
+ # for extractors with incomplete formats (audio only (soundcloud)
+ # or video only (imgur)) best/worst will fallback to
+ # best/worst {video,audio}-only format
+ yield formats[format_idx]
+
+ elif selector.type == MERGE: # +
def _merge(formats_pair):
format_1, format_2 = formats_pair
formats_info.extend(format_1.get('requested_formats', (format_1,)))
formats_info.extend(format_2.get('requested_formats', (format_2,)))
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {"video": False, "audio": False}
+ for (i, fmt_info) in enumerate(formats_info):
+ for aud_vid in ["audio", "video"]:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
if req_format is None:
req_format = self._default_format_spec(info_dict, download=download)
if self.params.get('verbose'):
- self.to_stdout('[debug] Default format spec: %s' % req_format)
+ self.to_screen('[debug] Default format spec: %s' % req_format)
format_selector = self.build_format_selector(req_format)
expected=True)
if download:
+ self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
if len(formats_to_download) > 1:
self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
for format in formats_to_download:
assert info_dict.get('_type', 'video') == 'video'
+ info_dict.setdefault('__postprocessors', [])
+
max_downloads = self.params.get('max_downloads')
if max_downloads is not None:
if self._num_downloads >= int(max_downloads):
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- reason = self._match_entry(info_dict, incomplete=False)
- if reason is not None:
- self.to_screen('[download] ' + reason)
+ if self._match_entry(info_dict, incomplete=False) is not None:
return
self._num_downloads += 1
- info_dict['_filename'] = filename = self.prepare_filename(info_dict)
+ info_dict = self.pre_process(info_dict)
+
+ info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
+ temp_filename = self.prepare_filename(info_dict, 'temp')
+ files_to_move = {}
+ skip_dl = self.params.get('skip_download', False)
# Forced printings
- self.__forced_printings(info_dict, filename, incomplete=False)
+ self.__forced_printings(info_dict, full_filename, incomplete=False)
- # Do nothing else if in simulate mode
if self.params.get('simulate', False):
+ if self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+
+ # Do nothing else if in simulate mode
return
- if filename is None:
+ if full_filename is None:
return
def ensure_dir_exists(path):
- try:
- dn = os.path.dirname(path)
- if dn and not os.path.exists(dn):
- os.makedirs(dn)
- return True
- except (OSError, IOError) as err:
- self.report_error('unable to create directory ' + error_to_compat_str(err))
- return False
+ return make_dir(path, self.report_error)
- if not ensure_dir_exists(sanitize_path(encodeFilename(filename))):
+ if not ensure_dir_exists(encodeFilename(full_filename)):
+ return
+ if not ensure_dir_exists(encodeFilename(temp_filename)):
return
if self.params.get('writedescription', False):
- descfn = replace_extension(filename, 'description', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
+ descfn = self.prepare_filename(info_dict, 'description')
+ if not ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
elif info_dict.get('description') is None:
self.report_warning('There\'s no description to write.')
return
if self.params.get('writeannotations', False):
- annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
+ annofn = self.prepare_filename(info_dict, 'annotation')
+ if not ensure_dir_exists(encodeFilename(annofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
elif not info_dict.get('annotations'):
self.report_warning('There are no annotations to write.')
for ph in self._progress_hooks:
fd.add_progress_hook(ph)
if self.params.get('verbose'):
- self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
+ self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
return fd.download(name, info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
# ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- sub_filename = subtitles_filename(filename, sub_lang, sub_format, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
+ sub_fn = self.prepare_filename(info_dict, 'subtitle')
+ sub_filename = subtitles_filename(
+ temp_filename if not skip_dl else sub_fn,
+ sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ files_to_move[sub_filename] = sub_filename_final
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
if sub_info.get('data') is not None:
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
+ files_to_move[sub_filename] = sub_filename_final
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
with io.open(encodeFilename(sub_filename), 'wb') as subfile:
subfile.write(sub_data)
'''
+ files_to_move[sub_filename] = sub_filename_final
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
- if self.params.get('skip_download', False):
+ if skip_dl:
if self.params.get('convertsubtitles', False):
- subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
- filename_real_ext = os.path.splitext(filename)[1][1:]
+ # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
+ filename_real_ext = os.path.splitext(full_filename)[1][1:]
filename_wo_ext = (
- os.path.splitext(filename)[0]
+ os.path.splitext(full_filename)[0]
if filename_real_ext == info_dict['ext']
- else filename)
+ else full_filename)
afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
- if subconv.available:
- info_dict.setdefault('__postprocessors', [])
- # info_dict['__postprocessors'].append(subconv)
+ # if subconv.available:
+ # info_dict['__postprocessors'].append(subconv)
if os.path.exists(encodeFilename(afilename)):
self.to_screen(
'[download] %s has already been downloaded and '
'converted' % afilename)
else:
try:
- self.post_process(filename, info_dict)
- except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
+ self.post_process(full_filename, info_dict, files_to_move)
+ except PostProcessingError as err:
+ self.report_error('Postprocessing: %s' % str(err))
return
if self.params.get('writeinfojson', False):
- infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Video description metadata is already present')
+ infofn = self.prepare_filename(info_dict, 'infojson')
+ if not ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Video metadata is already present')
else:
- self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
+ self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
try:
write_json_file(self.filter_requested_info(info_dict), infofn)
except (OSError, IOError):
- self.report_error('Cannot write metadata to JSON file ' + infofn)
+ self.report_error('Cannot write video metadata to JSON file ' + infofn)
return
+ info_dict['__infojson_filename'] = infofn
+
+ thumbfn = self.prepare_filename(info_dict, 'thumbnail')
+ thumb_fn_temp = temp_filename if not skip_dl else thumbfn
+ for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
+ thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
+ thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
+ files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
+
+ # Write internet shortcut files
+ url_link = webloc_link = desktop_link = False
+ if self.params.get('writelink', False):
+ if sys.platform == "darwin": # macOS.
+ webloc_link = True
+ elif sys.platform.startswith("linux"):
+ desktop_link = True
+ else: # if sys.platform in ['win32', 'cygwin']:
+ url_link = True
+ if self.params.get('writeurllink', False):
+ url_link = True
+ if self.params.get('writewebloclink', False):
+ webloc_link = True
+ if self.params.get('writedesktoplink', False):
+ desktop_link = True
+
+ if url_link or webloc_link or desktop_link:
+ if 'webpage_url' not in info_dict:
+ self.report_error('Cannot write internet shortcut file because the "webpage_url" field is missing in the media information')
+ return
+ ascii_url = iri_to_uri(info_dict['webpage_url'])
- self._write_thumbnails(info_dict, filename)
+ def _write_link_file(extension, template, newline, embed_filename):
+ linkfn = replace_extension(full_filename, extension, info_dict.get('ext'))
+ if self.params.get('overwrites', True) and os.path.exists(encodeFilename(linkfn)):
+ self.to_screen('[info] Internet shortcut is already present')
+ else:
+ try:
+ self.to_screen('[info] Writing internet shortcut to: ' + linkfn)
+ with io.open(encodeFilename(to_high_limit_path(linkfn)), 'w', encoding='utf-8', newline=newline) as linkfile:
+ template_vars = {'url': ascii_url}
+ if embed_filename:
+ template_vars['filename'] = linkfn[:-(len(extension) + 1)]
+ linkfile.write(template % template_vars)
+ except (OSError, IOError):
+ self.report_error('Cannot write internet shortcut ' + linkfn)
+ return False
+ return True
+
+ if url_link:
+ if not _write_link_file('url', DOT_URL_LINK_TEMPLATE, '\r\n', embed_filename=False):
+ return
+ if webloc_link:
+ if not _write_link_file('webloc', DOT_WEBLOC_LINK_TEMPLATE, '\n', embed_filename=False):
+ return
+ if desktop_link:
+ if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
+ return
- if not self.params.get('skip_download', False):
+ # Download
+ must_record_download_archive = False
+ if not skip_dl:
try:
+
+ def existing_file(*filepaths):
+ ext = info_dict.get('ext')
+ final_ext = self.params.get('final_ext', ext)
+ existing_files = []
+ for file in orderedSet(filepaths):
+ if final_ext != ext:
+ converted = replace_extension(file, final_ext, ext)
+ if os.path.exists(encodeFilename(converted)):
+ existing_files.append(converted)
+ if os.path.exists(encodeFilename(file)):
+ existing_files.append(file)
+
+ if not existing_files or self.params.get('overwrites', False):
+ for file in orderedSet(existing_files):
+ self.report_file_delete(file)
+ os.remove(encodeFilename(file))
+ return None
+
+ self.report_file_already_downloaded(existing_files[0])
+ info_dict['ext'] = os.path.splitext(existing_files[0])[1][1:]
+ return existing_files[0]
+
+ success = True
if info_dict.get('requested_formats') is not None:
downloaded = []
- success = True
merger = FFmpegMergerPP(self)
- if not merger.available:
- postprocessors = []
- self.report_warning('You have requested multiple '
- 'formats but ffmpeg or avconv are not installed.'
- ' The formats won\'t be merged.')
- else:
- postprocessors = [merger]
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ 'You have requested merging of multiple formats '
+ 'while also allowing unplayable formats to be downloaded. '
+ 'The formats won\'t be merged to prevent data corruption.')
+ elif not merger.available:
+ self.report_warning(
+ 'You have requested merging of multiple formats but ffmpeg is not installed. '
+ 'The formats won\'t be merged.')
def compatible_formats(formats):
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
# TODO: Check acodec/vcodec
return False
- filename_real_ext = os.path.splitext(filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(filename)[0]
- if filename_real_ext == info_dict['ext']
- else filename)
requested_formats = info_dict['requested_formats']
+ old_ext = info_dict['ext']
if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
info_dict['ext'] = 'mkv'
self.report_warning(
'Requested formats are incompatible for merge and will be merged into mkv.')
+
+ def correct_ext(filename):
+ filename_real_ext = os.path.splitext(filename)[1][1:]
+ filename_wo_ext = (
+ os.path.splitext(filename)[0]
+ if filename_real_ext == old_ext
+ else filename)
+ return '%s.%s' % (filename_wo_ext, info_dict['ext'])
+
# Ensure filename always has a correct extension for successful merge
- filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
- if os.path.exists(encodeFilename(filename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'merged' % filename)
- else:
+ full_filename = correct_ext(full_filename)
+ temp_filename = correct_ext(temp_filename)
+ dl_filename = existing_file(full_filename, temp_filename)
+ info_dict['__real_download'] = False
+ if dl_filename is None:
for f in requested_formats:
new_info = dict(info_dict)
new_info.update(f)
fname = prepend_extension(
- self.prepare_filename(new_info),
+ self.prepare_filename(new_info, 'temp'),
'f%s' % f['format_id'], new_info['ext'])
if not ensure_dir_exists(fname):
return
downloaded.append(fname)
- partial_success = dl(fname, new_info)
+ partial_success, real_download = dl(fname, new_info)
+ info_dict['__real_download'] = info_dict['__real_download'] or real_download
success = success and partial_success
- info_dict['__postprocessors'] = postprocessors
- info_dict['__files_to_merge'] = downloaded
+ if merger.available and not self.params.get('allow_unplayable_formats'):
+ info_dict['__postprocessors'].append(merger)
+ info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
+ else:
+ for file in downloaded:
+ files_to_move[file] = None
else:
# Just a single file
- success = dl(filename, info_dict)
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
+ success, real_download = dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+
+ dl_filename = dl_filename or temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
return
- if success and filename != '-':
+ if success and full_filename != '-':
# Fixup content
fixup_policy = self.params.get('fixup')
if fixup_policy is None:
fixup_policy = 'detect_or_warn'
- INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg or avconv to fix this automatically.'
+ INSTALL_FFMPEG_MESSAGE = 'Install ffmpeg to fix this automatically.'
stretched_ratio = info_dict.get('stretched_ratio')
if stretched_ratio is not None and stretched_ratio != 1:
elif fixup_policy == 'detect_or_warn':
stretched_pp = FFmpegFixupStretchedPP(self)
if stretched_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(stretched_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
if (info_dict.get('requested_formats') is None
- and info_dict.get('container') == 'm4a_dash'):
+ and info_dict.get('container') == 'm4a_dash'
+ and info_dict.get('ext') == 'm4a'):
if fixup_policy == 'warn':
self.report_warning(
'%s: writing DASH m4a. '
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM4aPP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
elif fixup_policy == 'detect_or_warn':
fixup_pp = FFmpegFixupM3u8PP(self)
if fixup_pp.available:
- info_dict.setdefault('__postprocessors', [])
info_dict['__postprocessors'].append(fixup_pp)
else:
self.report_warning(
assert fixup_policy in ('ignore', 'never')
try:
- self.post_process(filename, info_dict)
- except (PostProcessingError) as err:
- self.report_error('postprocessing: %s' % str(err))
+ self.post_process(dl_filename, info_dict, files_to_move)
+ except PostProcessingError as err:
+ self.report_error('Postprocessing: %s' % str(err))
return
- self.record_download_archive(info_dict)
+ try:
+ for ph in self._post_hooks:
+ ph(full_filename)
+ except Exception as err:
+ self.report_error('post hooks: %s' % str(err))
+ return
+ must_record_download_archive = True
+
+ if must_record_download_archive or self.params.get('force_write_download_archive', False):
+ self.record_download_archive(info_dict)
+ max_downloads = self.params.get('max_downloads')
+ if max_downloads is not None and self._num_downloads >= int(max_downloads):
+ raise MaxDownloadsReached()
def download(self, url_list):
"""Download a given list of URLs."""
- outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
+ outtmpl = self.outtmpl_dict['default']
if (len(url_list) > 1
and outtmpl != '-'
and '%' not in outtmpl
except UnavailableVideoError:
self.report_error('unable to download video')
except MaxDownloadsReached:
- self.to_screen('[info] Maximum number of downloaded files reached.')
+ self.to_screen('[info] Maximum number of downloaded files reached')
+ raise
+ except ExistingVideoReached:
+ self.to_screen('[info] Encountered a file that is already in the archive, stopping due to --break-on-existing')
+ raise
+ except RejectedVideoReached:
+ self.to_screen('[info] Encountered a file that did not match filter, stopping due to --break-on-reject')
raise
else:
if self.params.get('dump_single_json', False):
@staticmethod
def filter_requested_info(info_dict):
+ fields_to_remove = ('requested_formats', 'requested_subtitles')
return dict(
(k, v) for k, v in info_dict.items()
- if k not in ['requested_formats', 'requested_subtitles'])
+ if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
+
+ def run_pp(self, pp, infodict, files_to_move={}):
+ files_to_delete = []
+ files_to_delete, infodict = pp.run(infodict)
+ if not files_to_delete:
+ return files_to_move, infodict
- def post_process(self, filename, ie_info):
+ if self.params.get('keepvideo', False):
+ for f in files_to_delete:
+ files_to_move.setdefault(f, '')
+ else:
+ for old_filename in set(files_to_delete):
+ self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
+ try:
+ os.remove(encodeFilename(old_filename))
+ except (IOError, OSError):
+ self.report_warning('Unable to remove downloaded original file')
+ if old_filename in files_to_move:
+ del files_to_move[old_filename]
+ return files_to_move, infodict
+
+ def pre_process(self, ie_info):
+ info = dict(ie_info)
+ for pp in self._pps['beforedl']:
+ info = self.run_pp(pp, info)[1]
+ return info
+
+ def post_process(self, filename, ie_info, files_to_move={}):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- pps_chain = []
- if ie_info.get('__postprocessors') is not None:
- pps_chain.extend(ie_info['__postprocessors'])
- pps_chain.extend(self._pps)
- for pp in pps_chain:
- files_to_delete = []
- try:
- files_to_delete, info = pp.run(info)
- except PostProcessingError as e:
- self.report_error(e.msg)
- if files_to_delete and not self.params.get('keepvideo', False):
- for old_filename in set(files_to_delete):
- self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
- try:
- os.remove(encodeFilename(old_filename))
- except (IOError, OSError):
- self.report_warning('Unable to remove downloaded original file')
+ info['__files_to_move'] = {}
+
+ for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+ files_to_move, info = self.run_pp(pp, info, files_to_move)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
+ for pp in self._pps['aftermove']:
+ info = self.run_pp(pp, info, {})[1]
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
break
else:
return
- return extractor.lower() + ' ' + video_id
+ return '%s %s' % (extractor.lower(), video_id)
def in_download_archive(self, info_dict):
fn = self.params.get('download_archive')
res += '~' + format_bytes(fdict['filesize_approx'])
return res
+ def _format_note_table(self, f):
+ def join_fields(*vargs):
+ return ', '.join((val for val in vargs if val != ''))
+
+ return join_fields(
+ 'UNSUPPORTED' if f.get('ext') in ('f4f', 'f4m') else '',
+ format_field(f, 'language', '[%s]'),
+ format_field(f, 'format_note'),
+ format_field(f, 'container', ignore=(None, f.get('ext'))),
+ format_field(f, 'asr', '%5dHz'))
+
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- table = [
- [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
- for f in formats
- if f.get('preference') is None or f['preference'] >= -1000]
- if len(formats) > 1:
- table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
-
- header_line = ['format code', 'extension', 'resolution', 'note']
+ new_format = self.params.get('listformats_table', False)
+ if new_format:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ format_field(f, 'fps', '%d'),
+ '|',
+ format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
+ format_field(f, 'tbr', '%4dk'),
+ f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
+ '|',
+ format_field(f, 'vcodec', default='unknown').replace('none', ''),
+ format_field(f, 'vbr', '%4dk'),
+ format_field(f, 'acodec', default='unknown').replace('none', ''),
+ format_field(f, 'abr', '%3dk'),
+ format_field(f, 'asr', '%5dHz'),
+ self._format_note_table(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['ID', 'EXT', 'RESOLUTION', 'FPS', '|', ' FILESIZE', ' TBR', 'PROTO',
+ '|', 'VCODEC', ' VBR', 'ACODEC', ' ABR', ' ASR', 'NOTE']
+ else:
+ table = [
+ [
+ format_field(f, 'format_id'),
+ format_field(f, 'ext'),
+ self.format_resolution(f),
+ self._format_note(f)]
+ for f in formats
+ if f.get('preference') is None or f['preference'] >= -1000]
+ header_line = ['format code', 'extension', 'resolution', 'note']
+
self.to_screen(
- '[info] Available formats for %s:\n%s' %
- (info_dict['id'], render_table(header_line, table)))
+ '[info] Available formats for %s:\n%s' % (info_dict['id'], render_table(
+ header_line,
+ table,
+ delim=new_format,
+ extraGap=(0 if new_format else 1),
+ hideEmpty=new_format)))
def list_thumbnails(self, info_dict):
thumbnails = info_dict.get('thumbnails')
self.get_encoding()))
write_string(encoding_str, encoding=None)
- self._write_string('[debug] youtube-dlc version ' + __version__ + '\n')
+ source = (
+ '(exe)' if hasattr(sys, 'frozen')
+ else '(zip)' if isinstance(globals().get('__loader__'), zipimporter)
+ else '(source)' if os.path.basename(sys.argv[0]) == '__main__.py'
+ else '')
+ self._write_string('[debug] yt-dlp version %s %s\n' % (__version__, source))
if _LAZY_LOADER:
- self._write_string('[debug] Lazy loading extractors enabled' + '\n')
+ self._write_string('[debug] Lazy loading extractors enabled\n')
+ if _PLUGIN_CLASSES:
+ self._write_string(
+ '[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
stdout=subprocess.PIPE, stderr=subprocess.PIPE,
cwd=os.path.dirname(os.path.abspath(__file__)))
- out, err = sp.communicate()
+ out, err = process_communicate_or_kill(sp)
out = out.decode().strip()
if re.match('[0-9a-f]+', out):
- self._write_string('[debug] Git HEAD: ' + out + '\n')
+ self._write_string('[debug] Git HEAD: %s\n' % out)
except Exception:
try:
sys.exc_clear()
return impl_name + ' version %d.%d.%d' % sys.pypy_version_info[:3]
return impl_name
- self._write_string('[debug] Python version %s (%s) - %s\n' % (
- platform.python_version(), python_implementation(),
+ self._write_string('[debug] Python version %s (%s %s) - %s\n' % (
+ platform.python_version(),
+ python_implementation(),
+ platform.architecture()[0],
platform_name()))
exe_versions = FFmpegPostProcessor.get_versions(self)
if self.params.get('call_home', False):
ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
self._write_string('[debug] Public IP address: %s\n' % ipaddr)
+ return
latest_version = self.urlopen(
'https://yt-dl.org/latest/version').read().decode('utf-8')
if version_tuple(latest_version) > version_tuple(__version__):
encoding = preferredencoding()
return encoding
- def _write_thumbnails(self, info_dict, filename):
- if self.params.get('writethumbnail', False):
- thumbnails = info_dict.get('thumbnails')
- if thumbnails:
- thumbnails = [thumbnails[-1]]
- elif self.params.get('write_all_thumbnails', False):
- thumbnails = info_dict.get('thumbnails')
- else:
- return
+ def _write_thumbnails(self, info_dict, filename): # return the extensions
+ write_all = self.params.get('write_all_thumbnails', False)
+ thumbnails = []
+ if write_all or self.params.get('writethumbnail', False):
+ thumbnails = info_dict.get('thumbnails') or []
+ multiple = write_all and len(thumbnails) > 1
- if not thumbnails:
- # No thumbnails present, so return immediately
- return
-
- for t in thumbnails:
+ ret = []
+ for t in thumbnails[::1 if write_all else -1]:
thumb_ext = determine_ext(t['url'], 'jpg')
- suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
- thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
- t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
+ suffix = '%s.' % t['id'] if multiple else ''
+ thumb_display_id = '%s ' % t['id'] if multiple else ''
+ t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
- if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
+ ret.append(suffix + thumb_ext)
self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
else:
uf = self.urlopen(t['url'])
with open(encodeFilename(thumb_filename), 'wb') as thumbf:
shutil.copyfileobj(uf, thumbf)
+ ret.append(suffix + thumb_ext)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], error_to_compat_str(err)))
+ if ret and not write_all:
+ break
+ return ret