encode_compat_str,
encodeFilename,
error_to_compat_str,
+ EntryNotInPlaylist,
ExistingVideoReached,
expand_path,
ExtractorError,
float_or_none,
format_bytes,
format_field,
+ FORMAT_RE,
formatSeconds,
GeoRestrictedError,
int_or_none,
process_communicate_or_kill,
)
from .cache import Cache
-from .extractor import get_info_extractor, gen_extractor_classes, _LAZY_LOADER, _PLUGIN_CLASSES
+from .extractor import (
+ gen_extractor_classes,
+ get_info_extractor,
+ _LAZY_LOADER,
+ _PLUGIN_CLASSES
+)
from .extractor.openload import PhantomJSwrapper
-from .downloader import get_suitable_downloader
+from .downloader import (
+ get_suitable_downloader,
+ shorten_protocol_name
+)
from .downloader.rtmp import rtmpdump_version
from .postprocessor import (
FFmpegFixupM3u8PP,
logtostderr: Log messages to stderr instead of stdout.
writedescription: Write the video description to a .description file
writeinfojson: Write the video description to a .info.json file
+ clean_infojson: Remove private fields from the infojson
writecomments: Extract video comments. This will not be written to disk
unless writeinfojson is also given
writeannotations: Write the video annotations to a .annotations.xml file
source_address: Client-side IP address to bind to.
call_home: Boolean, true iff we are allowed to contact the
yt-dlp servers for debugging. (BROKEN)
+ sleep_interval_requests: Number of seconds to sleep between requests
+ during extraction
sleep_interval: Number of seconds to sleep before each download when
used alone or a lower bound of a range for randomized
sleep before each download (minimum possible number
Must only be used along with sleep_interval.
Actual sleep time will be a random float from range
[sleep_interval; max_sleep_interval].
+ sleep_interval_subtitles: Number of seconds to sleep before each subtitle download
listformats: Print an overview of available video formats and exit.
list_thumbnails: Print a table of all thumbnails and exit.
match_filter: A function that gets called with the info_dict of
geo_bypass_country
The following options determine which downloader is picked:
- external_downloader: Executable of the external downloader to call.
- None or unset for standard (built-in) downloader.
- hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv
+ external_downloader: A dictionary of protocol keys and the executable of the
+ external downloader to use for it. The allowed protocols
+ are default|http|ftp|m3u8|dash|rtsp|rtmp|mms.
+ Set the value to 'native' to use the native downloader
+ hls_prefer_native: Deprecated - Use external_downloader = {'m3u8': 'native'}
+ or {'m3u8': 'ffmpeg'} instead.
+ Use the native HLS downloader instead of ffmpeg/avconv
if True, otherwise use ffmpeg/avconv if False, otherwise
use downloader suggested by extractor if None.
Use 'default' as the name for arguments to passed to all PP
The following options are used by the extractors:
- dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
+ extractor_retries: Number of times to retry for known errors
+ dynamic_mpd: Whether to process dynamic DASH manifests (default: True)
hls_split_discontinuity: Split HLS playlists to different formats at
- discontinuities such as ad breaks (default: False)
+ discontinuities such as ad breaks (default: False)
youtube_include_dash_manifest: If True (default), DASH manifests and related
- data will be downloaded and processed by extractor.
- You can reduce network I/O by disabling it if you don't
- care about DASH. (only for youtube)
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about DASH. (only for youtube)
youtube_include_hls_manifest: If True (default), HLS manifests and related
- data will be downloaded and processed by extractor.
- You can reduce network I/O by disabling it if you don't
- care about HLS. (only for youtube)
+ data will be downloaded and processed by extractor.
+ You can reduce network I/O by disabling it if you don't
+ care about HLS. (only for youtube)
"""
_NUMERIC_FIELDS = set((
_ies = []
_pps = {'beforedl': [], 'aftermove': [], 'normal': []}
__prepare_filename_warned = False
+ _first_webpage_request = True
_download_retcode = None
_num_downloads = None
_playlist_level = 0
self._ies_instances = {}
self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
self.__prepare_filename_warned = False
+ self._first_webpage_request = True
self._post_hooks = []
self._progress_hooks = []
self._download_retcode = 0
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
return outtmpl_dict
+ def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
+ """ Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
+ template_dict = dict(info_dict)
+
+ # duration_string
+ template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-')
+ if info_dict.get('duration', None) is not None
+ else None)
+
+ # epoch
+ template_dict['epoch'] = int(time.time())
+
+ # autonumber
+ autonumber_size = self.params.get('autonumber_size')
+ if autonumber_size is None:
+ autonumber_size = 5
+ template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+
+ # resolution if not defined
+ if template_dict.get('resolution') is None:
+ if template_dict.get('width') and template_dict.get('height'):
+ template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
+ elif template_dict.get('height'):
+ template_dict['resolution'] = '%sp' % template_dict['height']
+ elif template_dict.get('width'):
+ template_dict['resolution'] = '%dx?' % template_dict['width']
+
+ if sanitize is None:
+ sanitize = lambda k, v: v
+ template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
+ for k, v in template_dict.items()
+ if v is not None and not isinstance(v, (list, tuple, dict)))
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
+ template_dict = collections.defaultdict(lambda: na, template_dict)
+
+ # For fields playlist_index and autonumber convert all occurrences
+ # of %(field)s to %(field)0Nd for backward compatibility
+ field_size_compat_map = {
+ 'playlist_index': len(str(template_dict['n_entries'])),
+ 'autonumber': autonumber_size,
+ }
+ FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
+ mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
+ if mobj:
+ outtmpl = re.sub(
+ FIELD_SIZE_COMPAT_RE,
+ r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
+ outtmpl)
+
+ numeric_fields = list(self._NUMERIC_FIELDS)
+
+ # Format date
+ FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
+ for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
+ conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
+ if key in template_dict:
+ continue
+ value = strftime_or_none(template_dict.get(field), frmt, na)
+ if conv_type in 'crs': # string
+ value = sanitize(field, value)
+ else: # number
+ numeric_fields.append(key)
+ value = float_or_none(value, default=None)
+ if value is not None:
+ template_dict[key] = value
+
+ # Missing numeric fields used together with integer presentation types
+ # in format specification will break the argument substitution since
+ # string NA placeholder is returned for missing fields. We will patch
+ # output template for missing fields to meet string presentation type.
+ for numeric_field in numeric_fields:
+ if numeric_field not in template_dict:
+ outtmpl = re.sub(
+ FORMAT_RE.format(re.escape(numeric_field)),
+ r'%({0})s'.format(numeric_field), outtmpl)
+
+ return outtmpl, template_dict
+
def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
- template_dict = dict(info_dict)
-
- template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
- formatSeconds(info_dict['duration'], '-')
- if info_dict.get('duration', None) is not None
- else None)
-
- template_dict['epoch'] = int(time.time())
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
-
sanitize = lambda k, v: sanitize_filename(
compat_str(v),
restricted=self.params.get('restrictfilenames'),
is_id=(k == 'id' or k.endswith('_id')))
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- na = self.params.get('outtmpl_na_placeholder', 'NA')
- template_dict = collections.defaultdict(lambda: na, template_dict)
-
outtmpl = self.outtmpl_dict.get(tmpl_type, self.outtmpl_dict['default'])
- force_ext = OUTTMPL_TYPES.get(tmpl_type)
-
- # For fields playlist_index and autonumber convert all occurrences
- # of %(field)s to %(field)0Nd for backward compatibility
- field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
- }
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- # As of [1] format syntax is:
- # %[mapping_key][conversion_flags][minimum_width][.precision][length_modifier]type
- # 1. https://docs.python.org/2/library/stdtypes.html#string-formatting
- FORMAT_RE = r'''(?x)
- (?<!%)
- %
- \({0}\) # mapping key
- (?:[#0\-+ ]+)? # conversion flags (optional)
- (?:\d+)? # minimum field width (optional)
- (?:\.\d+)? # precision (optional)
- [hlL]? # length modifier (optional)
- (?P<type>[diouxXeEfFgGcrs%]) # conversion type
- '''
-
- numeric_fields = list(self._NUMERIC_FIELDS)
-
- # Format date
- FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
- for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
- conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
- if key in template_dict:
- continue
- value = strftime_or_none(template_dict.get(field), frmt, na)
- if conv_type in 'crs': # string
- value = sanitize(field, value)
- else: # number
- numeric_fields.append(key)
- value = float_or_none(value, default=None)
- if value is not None:
- template_dict[key] = value
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string NA placeholder is returned for missing fields. We will patch
- # output template for missing fields to meet string presentation type.
- for numeric_field in numeric_fields:
- if numeric_field not in template_dict:
- outtmpl = re.sub(
- FORMAT_RE.format(re.escape(numeric_field)),
- r'%({0})s'.format(numeric_field), outtmpl)
+ outtmpl, template_dict = self.prepare_outtmpl(outtmpl, info_dict, sanitize)
# expand_path translates '%%' into '%' and '$$' into '$'
# correspondingly that is not what we want since we need to keep
# title "Hello $PATH", we don't want `$PATH` to be expanded.
filename = expand_path(outtmpl).replace(sep, '') % template_dict
+ force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None:
filename = replace_extension(filename, force_ext, template_dict.get('ext'))
else:
raise Exception('Invalid result type: %s' % result_type)
+ def _ensure_dir_exists(self, path):
+ return make_dir(path, self.report_error)
+
def __process_playlist(self, ie_result, download):
# We process each entry in the playlist
playlist = ie_result.get('title') or ie_result.get('id')
self.to_screen('[download] Downloading playlist: %s' % playlist)
- if self.params.get('allow_playlist_files', True):
- ie_copy = {
- 'playlist': playlist,
- 'playlist_id': ie_result.get('id'),
- 'playlist_title': ie_result.get('title'),
- 'playlist_uploader': ie_result.get('uploader'),
- 'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': 0
- }
- ie_copy.update(dict(ie_result))
-
- def ensure_dir_exists(path):
- return make_dir(path, self.report_error)
-
- if self.params.get('writeinfojson', False):
- infofn = self.prepare_filename(ie_copy, 'pl_infojson')
- if not ensure_dir_exists(encodeFilename(infofn)):
- return
- if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
- self.to_screen('[info] Playlist metadata is already present')
- else:
- playlist_info = dict(ie_result)
- # playlist_info['entries'] = list(playlist_info['entries']) # Entries is a generator which shouldnot be resolved here
- del playlist_info['entries']
- self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
- try:
- write_json_file(self.filter_requested_info(playlist_info), infofn)
- except (OSError, IOError):
- self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
-
- if self.params.get('writedescription', False):
- descfn = self.prepare_filename(ie_copy, 'pl_description')
- if not ensure_dir_exists(encodeFilename(descfn)):
- return
- if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
- self.to_screen('[info] Playlist description is already present')
- elif ie_result.get('description') is None:
- self.report_warning('There\'s no playlist description to write.')
- else:
- try:
- self.to_screen('[info] Writing playlist description to: ' + descfn)
- with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
- descfile.write(ie_result['description'])
- except (OSError, IOError):
- self.report_error('Cannot write playlist description file ' + descfn)
- return
+ if 'entries' not in ie_result:
+ raise EntryNotInPlaylist()
+ incomplete_entries = bool(ie_result.get('requested_entries'))
+ if incomplete_entries:
+ def fill_missing_entries(entries, indexes):
+ ret = [None] * max(*indexes)
+ for i, entry in zip(indexes, entries):
+ ret[i - 1] = entry
+ return ret
+ ie_result['entries'] = fill_missing_entries(ie_result['entries'], ie_result['requested_entries'])
playlist_results = []
def make_playlistitems_entries(list_ie_entries):
num_entries = len(list_ie_entries)
- return [
- list_ie_entries[i - 1] for i in playlistitems
- if -num_entries <= i - 1 < num_entries]
-
- def report_download(num_entries):
- self.to_screen(
- '[%s] playlist %s: Downloading %d videos' %
- (ie_result['extractor'], playlist, num_entries))
+ for i in playlistitems:
+ if -num_entries < i <= num_entries:
+ yield list_ie_entries[i - 1]
+ elif incomplete_entries:
+ raise EntryNotInPlaylist()
if isinstance(ie_entries, list):
n_all_entries = len(ie_entries)
if playlistitems:
- entries = make_playlistitems_entries(ie_entries)
+ entries = list(make_playlistitems_entries(ie_entries))
else:
entries = ie_entries[playliststart:playlistend]
n_entries = len(entries)
- self.to_screen(
- '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
- (ie_result['extractor'], playlist, n_all_entries, n_entries))
+ msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
elif isinstance(ie_entries, PagedList):
if playlistitems:
entries = []
entries = ie_entries.getslice(
playliststart, playlistend)
n_entries = len(entries)
- report_download(n_entries)
+ msg = 'Downloading %d videos' % n_entries
else: # iterable
if playlistitems:
- entries = make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems))))
+ entries = list(make_playlistitems_entries(list(itertools.islice(
+ ie_entries, 0, max(playlistitems)))))
else:
entries = list(itertools.islice(
ie_entries, playliststart, playlistend))
n_entries = len(entries)
- report_download(n_entries)
+ msg = 'Downloading %d videos' % n_entries
+
+ if any((entry is None for entry in entries)):
+ raise EntryNotInPlaylist()
+ if not playlistitems and (playliststart or playlistend):
+ playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
+ ie_result['entries'] = entries
+ ie_result['requested_entries'] = playlistitems
+
+ if self.params.get('allow_playlist_files', True):
+ ie_copy = {
+ 'playlist': playlist,
+ 'playlist_id': ie_result.get('id'),
+ 'playlist_title': ie_result.get('title'),
+ 'playlist_uploader': ie_result.get('uploader'),
+ 'playlist_uploader_id': ie_result.get('uploader_id'),
+ 'playlist_index': 0
+ }
+ ie_copy.update(dict(ie_result))
+
+ if self.params.get('writeinfojson', False):
+ infofn = self.prepare_filename(ie_copy, 'pl_infojson')
+ if not self._ensure_dir_exists(encodeFilename(infofn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
+ self.to_screen('[info] Playlist metadata is already present')
+ else:
+ self.to_screen('[info] Writing playlist metadata as JSON to: ' + infofn)
+ try:
+ write_json_file(self.filter_requested_info(ie_result, self.params.get('clean_infojson', True)), infofn)
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
+
+ if self.params.get('writedescription', False):
+ descfn = self.prepare_filename(ie_copy, 'pl_description')
+ if not self._ensure_dir_exists(encodeFilename(descfn)):
+ return
+ if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
+ self.to_screen('[info] Playlist description is already present')
+ elif ie_result.get('description') is None:
+ self.report_warning('There\'s no playlist description to write.')
+ else:
+ try:
+ self.to_screen('[info] Writing playlist description to: ' + descfn)
+ with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
+ descfile.write(ie_result['description'])
+ except (OSError, IOError):
+ self.report_error('Cannot write playlist description file ' + descfn)
+ return
if self.params.get('playlistreverse', False):
entries = entries[::-1]
-
if self.params.get('playlistrandom', False):
random.shuffle(entries)
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
+ self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
for i, entry in enumerate(entries, 1):
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i + playliststart,
+ 'playlist_index': playlistitems[i - 1] if playlistitems else i,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
selectors.append(current_selector)
return selectors
+ def _merge(formats_pair):
+ format_1, format_2 = formats_pair
+
+ formats_info = []
+ formats_info.extend(format_1.get('requested_formats', (format_1,)))
+ formats_info.extend(format_2.get('requested_formats', (format_2,)))
+
+ if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
+ get_no_more = {"video": False, "audio": False}
+ for (i, fmt_info) in enumerate(formats_info):
+ for aud_vid in ["audio", "video"]:
+ if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
+ if get_no_more[aud_vid]:
+ formats_info.pop(i)
+ get_no_more[aud_vid] = True
+
+ if len(formats_info) == 1:
+ return formats_info[0]
+
+ video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
+ audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
+
+ the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
+ the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
+
+ output_ext = self.params.get('merge_output_format')
+ if not output_ext:
+ if the_only_video:
+ output_ext = the_only_video['ext']
+ elif the_only_audio and not video_fmts:
+ output_ext = the_only_audio['ext']
+ else:
+ output_ext = 'mkv'
+
+ new_dict = {
+ 'requested_formats': formats_info,
+ 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
+ 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
+ 'ext': output_ext,
+ }
+
+ if the_only_video:
+ new_dict.update({
+ 'width': the_only_video.get('width'),
+ 'height': the_only_video.get('height'),
+ 'resolution': the_only_video.get('resolution') or self.format_resolution(the_only_video),
+ 'fps': the_only_video.get('fps'),
+ 'vcodec': the_only_video.get('vcodec'),
+ 'vbr': the_only_video.get('vbr'),
+ 'stretched_ratio': the_only_video.get('stretched_ratio'),
+ })
+
+ if the_only_audio:
+ new_dict.update({
+ 'acodec': the_only_audio.get('acodec'),
+ 'abr': the_only_audio.get('abr'),
+ })
+
+ return new_dict
+
def _build_selector_function(selector):
if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
return []
elif selector.type == SINGLE: # atom
- format_spec = selector.selector if selector.selector is not None else 'best'
+ format_spec = (selector.selector if selector.selector is not None else 'best').lower()
+ # TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
if format_spec == 'all':
def selector_function(ctx):
formats = list(ctx['formats'])
if formats:
for f in formats:
yield f
+ elif format_spec == 'mergeall':
+ def selector_function(ctx):
+ formats = list(ctx['formats'])
+ merged_format = formats[0]
+ for f in formats[1:]:
+ merged_format = _merge((merged_format, f))
+ yield merged_format
else:
format_fallback = False
- format_spec_obj = re.match(r'(best|worst|b|w)(video|audio|v|a)?(\*)?$', format_spec)
- if format_spec_obj is not None:
- format_idx = 0 if format_spec_obj.group(1)[0] == 'w' else -1
- format_type = format_spec_obj.group(2)[0] if format_spec_obj.group(2) else False
- not_format_type = 'v' if format_type == 'a' else 'a'
- format_modified = format_spec_obj.group(3) is not None
+ mobj = re.match(
+ r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
+ format_spec)
+ if mobj is not None:
+ format_idx = int_or_none(mobj.group('n'), default=1)
+ format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
+ format_type = (mobj.group('type') or [None])[0]
+ not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
+ format_modified = mobj.group('mod') is not None
format_fallback = not format_type and not format_modified # for b, w
- filter_f = ((lambda f: f.get(format_type + 'codec') != 'none')
- if format_type and format_modified # bv*, ba*, wv*, wa*
- else (lambda f: f.get(not_format_type + 'codec') == 'none')
- if format_type # bv, ba, wv, wa
- else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
- if not format_modified # b, w
- else None) # b*, w*
+ filter_f = (
+ (lambda f: f.get('%scodec' % format_type) != 'none')
+ if format_type and format_modified # bv*, ba*, wv*, wa*
+ else (lambda f: f.get('%scodec' % not_format_type) == 'none')
+ if format_type # bv, ba, wv, wa
+ else (lambda f: f.get('vcodec') != 'none' and f.get('acodec') != 'none')
+ if not format_modified # b, w
+ else None) # b*, w*
else:
format_idx = -1
filter_f = ((lambda f: f.get('ext') == format_spec)
if not formats:
return
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
- if matches:
+ n = len(matches)
+ if -n <= format_idx < n:
yield matches[format_idx]
- elif format_fallback == 'force' or (format_fallback and ctx['incomplete_formats']):
+ elif format_fallback and ctx['incomplete_formats']:
# for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format
- yield formats[format_idx]
+ n = len(formats)
+ if -n <= format_idx < n:
+ yield formats[format_idx]
elif selector.type == MERGE: # +
- def _merge(formats_pair):
- format_1, format_2 = formats_pair
-
- formats_info = []
- formats_info.extend(format_1.get('requested_formats', (format_1,)))
- formats_info.extend(format_2.get('requested_formats', (format_2,)))
-
- if not allow_multiple_streams['video'] or not allow_multiple_streams['audio']:
- get_no_more = {"video": False, "audio": False}
- for (i, fmt_info) in enumerate(formats_info):
- for aud_vid in ["audio", "video"]:
- if not allow_multiple_streams[aud_vid] and fmt_info.get(aud_vid[0] + 'codec') != 'none':
- if get_no_more[aud_vid]:
- formats_info.pop(i)
- get_no_more[aud_vid] = True
-
- if len(formats_info) == 1:
- return formats_info[0]
-
- video_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('vcodec') != 'none']
- audio_fmts = [fmt_info for fmt_info in formats_info if fmt_info.get('acodec') != 'none']
-
- the_only_video = video_fmts[0] if len(video_fmts) == 1 else None
- the_only_audio = audio_fmts[0] if len(audio_fmts) == 1 else None
-
- output_ext = self.params.get('merge_output_format')
- if not output_ext:
- if the_only_video:
- output_ext = the_only_video['ext']
- elif the_only_audio and not video_fmts:
- output_ext = the_only_audio['ext']
- else:
- output_ext = 'mkv'
-
- new_dict = {
- 'requested_formats': formats_info,
- 'format': '+'.join(fmt_info.get('format') for fmt_info in formats_info),
- 'format_id': '+'.join(fmt_info.get('format_id') for fmt_info in formats_info),
- 'ext': output_ext,
- }
-
- if the_only_video:
- new_dict.update({
- 'width': the_only_video.get('width'),
- 'height': the_only_video.get('height'),
- 'resolution': the_only_video.get('resolution'),
- 'fps': the_only_video.get('fps'),
- 'vcodec': the_only_video.get('vcodec'),
- 'vbr': the_only_video.get('vbr'),
- 'stretched_ratio': the_only_video.get('stretched_ratio'),
- })
-
- if the_only_audio:
- new_dict.update({
- 'acodec': the_only_audio.get('acodec'),
- 'abr': the_only_audio.get('abr'),
- })
-
- return new_dict
-
selector_1, selector_2 = map(_build_selector_function, selector.selector)
def selector_function(ctx):
if 'display_id' not in info_dict and 'id' in info_dict:
info_dict['display_id'] = info_dict['id']
- if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
- # Working around out-of-range timestamp values (e.g. negative ones on Windows,
- # see http://bugs.python.org/issue1646728)
- try:
- upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
- info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
- except (ValueError, OverflowError, OSError):
- pass
+ for ts_key, date_key in (
+ ('timestamp', 'upload_date'),
+ ('release_timestamp', 'release_date'),
+ ):
+ if info_dict.get(date_key) is None and info_dict.get(ts_key) is not None:
+ # Working around out-of-range timestamp values (e.g. negative ones on Windows,
+ # see http://bugs.python.org/issue1646728)
+ try:
+ upload_date = datetime.datetime.utcfromtimestamp(info_dict[ts_key])
+ info_dict[date_key] = upload_date.strftime('%Y%m%d')
+ except (ValueError, OverflowError, OSError):
+ pass
# Auto generate title fields corresponding to the *_number fields when missing
# in order to always have clean titles. This is very common for TV series.
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
if self.params.get('forcejson', False):
- self.to_stdout(json.dumps(info_dict))
+ self.post_extract(info_dict)
+ self.to_stdout(json.dumps(info_dict, default=repr))
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if self._match_entry(info_dict, incomplete=False) is not None:
return
+ self.post_extract(info_dict)
self._num_downloads += 1
info_dict = self.pre_process(info_dict)
+ # info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {}
if full_filename is None:
return
- def ensure_dir_exists(path):
- return make_dir(path, self.report_error)
-
- if not ensure_dir_exists(encodeFilename(full_filename)):
+ if not self._ensure_dir_exists(encodeFilename(full_filename)):
return
- if not ensure_dir_exists(encodeFilename(temp_filename)):
+ if not self._ensure_dir_exists(encodeFilename(temp_filename)):
return
if self.params.get('writedescription', False):
descfn = self.prepare_filename(info_dict, 'description')
- if not ensure_dir_exists(encodeFilename(descfn)):
+ if not self._ensure_dir_exists(encodeFilename(descfn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(descfn)):
self.to_screen('[info] Video description is already present')
if self.params.get('writeannotations', False):
annofn = self.prepare_filename(info_dict, 'annotation')
- if not ensure_dir_exists(encodeFilename(annofn)):
+ if not self._ensure_dir_exists(encodeFilename(annofn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(annofn)):
self.to_screen('[info] Video annotations are already present')
fd.add_progress_hook(ph)
if self.params.get('verbose'):
self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
- return fd.download(name, info, subtitle)
+ new_info = dict(info)
+ if new_info.get('http_headers') is None:
+ new_info['http_headers'] = self._calc_headers(new_info)
+ return fd.download(name, new_info, subtitle)
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
+ sub_info['filepath'] = sub_filename
files_to_move[sub_filename] = sub_filename_final
else:
self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
# See https://github.com/ytdl-org/youtube-dl/issues/10268
with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8', newline='') as subfile:
subfile.write(sub_info['data'])
+ sub_info['filepath'] = sub_filename
files_to_move[sub_filename] = sub_filename_final
except (OSError, IOError):
self.report_error('Cannot write subtitles file ' + sub_filename)
return
else:
try:
- dl(sub_filename, sub_info, subtitle=True)
- '''
- if self.params.get('sleep_interval_subtitles', False):
- dl(sub_filename, sub_info)
- else:
- sub_data = ie._request_webpage(
- sub_info['url'], info_dict['id'], note=False).read()
- with io.open(encodeFilename(sub_filename), 'wb') as subfile:
- subfile.write(sub_data)
- '''
+ dl(sub_filename, sub_info.copy(), subtitle=True)
+ sub_info['filepath'] = sub_filename
files_to_move[sub_filename] = sub_filename_final
except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(info_dict, 'infojson')
- if not ensure_dir_exists(encodeFilename(infofn)):
+ if not self._ensure_dir_exists(encodeFilename(infofn)):
return
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(infofn)):
self.to_screen('[info] Video metadata is already present')
else:
self.to_screen('[info] Writing video metadata as JSON to: ' + infofn)
try:
- write_json_file(self.filter_requested_info(info_dict), infofn)
+ write_json_file(self.filter_requested_info(info_dict, self.params.get('clean_infojson', True)), infofn)
except (OSError, IOError):
self.report_error('Cannot write video metadata to JSON file ' + infofn)
return
for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
- files_to_move[thumb_filename_temp] = info_dict['__thumbnail_filename'] = thumb_filename
+ files_to_move[thumb_filename_temp] = thumb_filename
# Write internet shortcut files
url_link = webloc_link = desktop_link = False
requested_formats = info_dict['requested_formats']
old_ext = info_dict['ext']
- if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
- info_dict['ext'] = 'mkv'
- self.report_warning(
- 'Requested formats are incompatible for merge and will be merged into mkv.')
+ if self.params.get('merge_output_format') is None:
+ if not compatible_formats(requested_formats):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'Requested formats are incompatible for merge and will be merged into mkv.')
+ if (info_dict['ext'] == 'webm'
+ and self.params.get('writethumbnail', False)
+ and info_dict.get('thumbnails')):
+ info_dict['ext'] = 'mkv'
+ self.report_warning(
+ 'webm doesn\'t support embedding a thumbnail, mkv will be used.')
def correct_ext(filename):
filename_real_ext = os.path.splitext(filename)[1][1:]
fname = prepend_extension(
self.prepare_filename(new_info, 'temp'),
'f%s' % f['format_id'], new_info['ext'])
- if not ensure_dir_exists(fname):
+ if not self._ensure_dir_exists(fname):
return
downloaded.append(fname)
partial_success, real_download = dl(fname, new_info)
else:
assert fixup_policy in ('ignore', 'never')
- if (info_dict.get('protocol') == 'm3u8_native'
- or info_dict.get('protocol') == 'm3u8'
- and self.params.get('hls_prefer_native')):
+ if ('protocol' in info_dict
+ and get_suitable_downloader(info_dict, self.params).__name__ == 'HlsFD'):
if fixup_policy == 'warn':
self.report_warning('%s: malformed AAC bitstream detected.' % (
info_dict['id']))
assert fixup_policy in ('ignore', 'never')
try:
- self.post_process(dl_filename, info_dict, files_to_move)
+ info_dict = self.post_process(dl_filename, info_dict, files_to_move)
except PostProcessingError as err:
self.report_error('Postprocessing: %s' % str(err))
return
try:
for ph in self._post_hooks:
- ph(full_filename)
+ ph(info_dict['filepath'])
except Exception as err:
self.report_error('post hooks: %s' % str(err))
return
raise
else:
if self.params.get('dump_single_json', False):
- self.to_stdout(json.dumps(res))
+ self.post_extract(res)
+ self.to_stdout(json.dumps(res, default=repr))
return self._download_retcode
[info_filename], mode='r',
openhook=fileinput.hook_encoded('utf-8'))) as f:
# FileInput doesn't have a read method, we can't call json.load
- info = self.filter_requested_info(json.loads('\n'.join(f)))
+ info = self.filter_requested_info(json.loads('\n'.join(f)), self.params.get('clean_infojson', True))
try:
self.process_ie_result(info, download=True)
- except DownloadError:
+ except (DownloadError, EntryNotInPlaylist):
webpage_url = info.get('webpage_url')
if webpage_url is not None:
self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
return self._download_retcode
@staticmethod
- def filter_requested_info(info_dict):
- fields_to_remove = ('requested_formats', 'requested_subtitles')
- return dict(
- (k, v) for k, v in info_dict.items()
- if (k[0] != '_' or k == '_type') and k not in fields_to_remove)
-
- def run_pp(self, pp, infodict, files_to_move={}):
+ def filter_requested_info(info_dict, actually_filter=True):
+ if not actually_filter:
+ info_dict['epoch'] = int(time.time())
+ return info_dict
+ exceptions = {
+ 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
+ 'keep': ['_type'],
+ }
+ keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
+ filter_fn = lambda obj: (
+ list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
+ else obj if not isinstance(obj, dict)
+ else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
+ return filter_fn(info_dict)
+
+ def run_pp(self, pp, infodict):
files_to_delete = []
+ if '__files_to_move' not in infodict:
+ infodict['__files_to_move'] = {}
files_to_delete, infodict = pp.run(infodict)
if not files_to_delete:
- return files_to_move, infodict
+ return infodict
if self.params.get('keepvideo', False):
for f in files_to_delete:
- files_to_move.setdefault(f, '')
+ infodict['__files_to_move'].setdefault(f, '')
else:
for old_filename in set(files_to_delete):
self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
os.remove(encodeFilename(old_filename))
except (IOError, OSError):
self.report_warning('Unable to remove downloaded original file')
- if old_filename in files_to_move:
- del files_to_move[old_filename]
- return files_to_move, infodict
+ if old_filename in infodict['__files_to_move']:
+ del infodict['__files_to_move'][old_filename]
+ return infodict
+
+ @staticmethod
+ def post_extract(info_dict):
+ def actual_post_extract(info_dict):
+ if info_dict.get('_type') in ('playlist', 'multi_video'):
+ for video_dict in info_dict.get('entries', {}):
+ actual_post_extract(video_dict or {})
+ return
+
+ if '__post_extractor' not in info_dict:
+ return
+ post_extractor = info_dict['__post_extractor']
+ if post_extractor:
+ info_dict.update(post_extractor().items())
+ del info_dict['__post_extractor']
+ return
+
+ actual_post_extract(info_dict or {})
def pre_process(self, ie_info):
info = dict(ie_info)
for pp in self._pps['beforedl']:
- info = self.run_pp(pp, info)[1]
+ info = self.run_pp(pp, info)
return info
- def post_process(self, filename, ie_info, files_to_move={}):
+ def post_process(self, filename, ie_info, files_to_move=None):
"""Run all the postprocessors on the given file."""
info = dict(ie_info)
info['filepath'] = filename
- info['__files_to_move'] = {}
+ info['__files_to_move'] = files_to_move or {}
for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
- files_to_move, info = self.run_pp(pp, info, files_to_move)
- info = self.run_pp(MoveFilesAfterDownloadPP(self, files_to_move), info)[1]
+ info = self.run_pp(pp, info)
+ info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
+ del info['__files_to_move']
for pp in self._pps['aftermove']:
- info = self.run_pp(pp, info, {})[1]
+ info = self.run_pp(pp, info)
+ return info
def _make_archive_id(self, info_dict):
video_id = info_dict.get('id')
return 'audio only'
if format.get('resolution') is not None:
return format['resolution']
- if format.get('height') is not None:
- if format.get('width') is not None:
- res = '%sx%s' % (format['width'], format['height'])
- else:
- res = '%sp' % format['height']
- elif format.get('width') is not None:
+ if format.get('width') and format.get('height'):
+ res = '%dx%d' % (format['width'], format['height'])
+ elif format.get('height'):
+ res = '%sp' % format['height']
+ elif format.get('width'):
res = '%dx?' % format['width']
else:
res = default
'|',
format_field(f, 'filesize', ' %s', func=format_bytes) + format_field(f, 'filesize_approx', '~%s', func=format_bytes),
format_field(f, 'tbr', '%4dk'),
- f.get('protocol').replace('http_dash_segments', 'dash').replace("native", "n").replace('niconico_', ''),
+ shorten_protocol_name(f.get('protocol', '').replace("native", "n")),
'|',
format_field(f, 'vcodec', default='unknown').replace('none', ''),
format_field(f, 'vbr', '%4dk'),
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '%s.' % t['id'] if multiple else ''
thumb_display_id = '%s ' % t['id'] if multiple else ''
- t['filename'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
+ t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
ret.append(suffix + thumb_ext)
self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
else:
- self.to_screen('[%s] %s: Downloading thumbnail %s...' %
+ self.to_screen('[%s] %s: Downloading thumbnail %s ...' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
try:
uf = self.urlopen(t['url'])