-#!/usr/bin/env python
+#!/usr/bin/env python3
# coding: utf-8
from __future__ import absolute_import, unicode_literals
import re
import shutil
import subprocess
-import socket
import sys
+import tempfile
import time
import tokenize
import traceback
compat_basestring,
compat_cookiejar,
compat_get_terminal_size,
- compat_http_client,
compat_kwargs,
compat_numeric_types,
compat_os_name,
date_from_str,
DateRange,
DEFAULT_OUTTMPL,
- OUTTMPL_TYPES,
determine_ext,
determine_protocol,
DOT_DESKTOP_LINK_TEMPLATE,
DownloadError,
encode_compat_str,
encodeFilename,
- error_to_compat_str,
EntryNotInPlaylist,
+ error_to_compat_str,
ExistingVideoReached,
expand_path,
ExtractorError,
float_or_none,
format_bytes,
format_field,
- FORMAT_RE,
+ STR_FORMAT_RE,
formatSeconds,
GeoRestrictedError,
+ HEADRequest,
int_or_none,
iri_to_uri,
ISO3166Utils,
+ LazyList,
locked_file,
make_dir,
make_HTTPS_handler,
MaxDownloadsReached,
+ network_exceptions,
orderedSet,
+ OUTTMPL_TYPES,
PagedList,
parse_filesize,
PerRequestProxyHandler,
PostProcessingError,
preferredencoding,
prepend_extension,
+ process_communicate_or_kill,
register_socks_protocols,
+ RejectedVideoReached,
render_table,
replace_extension,
- RejectedVideoReached,
SameFileError,
sanitize_filename,
sanitize_path,
strftime_or_none,
subtitles_filename,
to_high_limit_path,
+ traverse_obj,
UnavailableVideoError,
url_basename,
version_tuple,
YoutubeDLCookieProcessor,
YoutubeDLHandler,
YoutubeDLRedirectHandler,
- process_communicate_or_kill,
)
from .cache import Cache
from .extractor import (
verbose: Print additional info to stdout.
quiet: Do not print messages to stdout.
no_warnings: Do not print out anything for warnings.
- forceurl: Force printing final URL.
- forcetitle: Force printing title.
- forceid: Force printing ID.
- forcethumbnail: Force printing thumbnail URL.
- forcedescription: Force printing description.
- forcefilename: Force printing final filename.
- forceduration: Force printing duration.
+ forceprint: A list of templates to force print
+ forceurl: Force printing final URL. (Deprecated)
+ forcetitle: Force printing title. (Deprecated)
+ forceid: Force printing ID. (Deprecated)
+ forcethumbnail: Force printing thumbnail URL. (Deprecated)
+ forcedescription: Force printing description. (Deprecated)
+ forcefilename: Force printing final filename. (Deprecated)
+ forceduration: Force printing duration. (Deprecated)
forcejson: Force printing info_dict as JSON.
dump_single_json: Force printing the info_dict of the whole playlist
(or video) as a single JSON line.
simulate: Do not download the video files.
format: Video format code. see "FORMAT SELECTION" for more details.
allow_unplayable_formats: Allow unplayable formats to be extracted and downloaded.
+ ignore_no_formats_error: Ignore "No video formats" error. Usefull for
+ extracting metadata even if the video is not actually
+ available for download (experimental)
format_sort: How to sort the video formats. see "Sorting Formats"
for more details.
format_sort_force: Force the given format_sort. see "Sorting Formats"
ignoreerrors: Do not stop on download errors
(Default True when running yt-dlp,
but False when directly accessing YoutubeDL class)
+ skip_playlist_after_errors: Number of allowed failures until the rest of
+ the playlist is skipped
force_generic_extractor: Force downloader to use the generic extractor
overwrites: Overwrite all video and metadata files if True,
overwrite only non-video files if None
writedesktoplink: Write a Linux internet shortcut file (.desktop)
writesubtitles: Write the video subtitles to a file
writeautomaticsub: Write the automatically generated subtitles to a file
- allsubtitles: Downloads all the subtitles of the video
+ allsubtitles: Deprecated - Use subtitlelangs = ['all']
+ Downloads all the subtitles of the video
(requires writesubtitles or writeautomaticsub)
listsubtitles: Lists all available subtitles for the video
subtitlesformat: The format code for subtitles
- subtitleslangs: List of languages of the subtitles to download
+ subtitleslangs: List of languages of the subtitles to download (can be regex).
+ The list may contain "all" to refer to all the available
+ subtitles. The language can be prefixed with a "-" to
+ exclude it from the requested languages. Eg: ['all', '-live_chat']
keepvideo: Keep the video file after post-processing
daterange: A DateRange object, download only if the upload_date is in the range.
skip_download: Skip the actual download of the video file
postprocessors: A list of dictionaries, each with an entry
* key: The name of the postprocessor. See
yt_dlp/postprocessor/__init__.py for a list.
- * _after_move: Optional. If True, run this post_processor
- after 'MoveFilesAfterDownload'
- as well as any further keyword arguments for the
- postprocessor.
+ * when: When to run the postprocessor. Can be one of
+ pre_process|before_dl|post_process|after_move.
+ Assumed to be 'post_process' if not given
post_hooks: A list of functions that get called as the final step
for each video file, after all postprocessors have been
called. The filename will be passed as the only argument.
Use the native HLS downloader instead of ffmpeg/avconv
if True, otherwise use ffmpeg/avconv if False, otherwise
use downloader suggested by extractor if None.
+ compat_opts: Compatibility options. See "Differences in default behavior".
+ Note that only format-sort, format-spec, no-live-chat,
+ no-attach-info-json, playlist-index, list-formats,
+ no-direct-merge, no-youtube-channel-redirect,
+ and no-youtube-unavailable-videos works when used via the API
The following parameters are not used by YoutubeDL itself, they are used by
the downloader (see yt_dlp/downloader/common.py):
params = None
_ies = []
- _pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ _pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
__prepare_filename_warned = False
_first_webpage_request = True
_download_retcode = None
params = {}
self._ies = []
self._ies_instances = {}
- self._pps = {'beforedl': [], 'aftermove': [], 'normal': []}
+ self._pps = {'pre_process': [], 'before_dl': [], 'after_move': [], 'post_process': []}
self.__prepare_filename_warned = False
self._first_webpage_request = True
self._post_hooks = []
}
self.params.update(params)
self.cache = Cache(self)
- self.archive = set()
- """Preload the archive, if any is specified"""
- def preload_download_archive(self):
- fn = self.params.get('download_archive')
- if fn is None:
- return False
- try:
- with locked_file(fn, 'r', encoding='utf-8') as archive_file:
- for line in archive_file:
- self.archive.add(line.strip())
- except IOError as ioe:
- if ioe.errno != errno.ENOENT:
- raise
- return False
- return True
+ if sys.version_info < (3, 6):
+ self.report_warning(
+ 'Python version %d.%d is not supported! Please update to Python 3.6 or above' % sys.version_info[:2])
def check_deprecated(param, option, suggestion):
if self.params.get(param) is not None:
- self.report_warning(
- '%s is deprecated. Use %s instead.' % (option, suggestion))
+ self.report_warning('%s is deprecated. Use %s instead' % (option, suggestion))
return True
return False
- if self.params.get('verbose'):
- self.to_stdout('[debug] Loading archive file %r' % self.params.get('download_archive'))
-
- preload_download_archive(self)
-
if check_deprecated('cn_verification_proxy', '--cn-verification-proxy', '--geo-verification-proxy'):
if self.params.get('geo_verification_proxy') is None:
self.params['geo_verification_proxy'] = self.params['cn_verification_proxy']
+ check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
+ check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
+ check_deprecated('useid', '--id', '-o "%(id)s.%(ext)s"')
+
+ for msg in self.params.get('warnings', []):
+ self.report_warning(msg)
+
if self.params.get('final_ext'):
if self.params.get('merge_output_format'):
self.report_warning('--merge-output-format will be ignored since --remux-video or --recode-video is given')
if 'overwrites' in self.params and self.params['overwrites'] is None:
del self.params['overwrites']
- check_deprecated('autonumber_size', '--autonumber-size', 'output template with %(autonumber)0Nd, where N in the number of digits')
- check_deprecated('autonumber', '--auto-number', '-o "%(autonumber)s-%(title)s.%(ext)s"')
- check_deprecated('usetitle', '--title', '-o "%(title)s-%(id)s.%(ext)s"')
-
if params.get('bidi_workaround', False):
try:
import pty
self.outtmpl_dict = self.parse_outtmpl()
+ # Creating format selector here allows us to catch syntax errors before the extraction
+ self.format_selector = (
+ None if self.params.get('format') is None
+ else self.build_format_selector(self.params['format']))
+
self._setup_opener()
+ """Preload the archive, if any is specified"""
+ def preload_download_archive(fn):
+ if fn is None:
+ return False
+ self.write_debug('Loading archive file %r\n' % fn)
+ try:
+ with locked_file(fn, 'r', encoding='utf-8') as archive_file:
+ for line in archive_file:
+ self.archive.add(line.strip())
+ except IOError as ioe:
+ if ioe.errno != errno.ENOENT:
+ raise
+ return False
+ return True
+
+ self.archive = set()
+ preload_download_archive(self.params.get('download_archive'))
+
if auto_init:
self.print_debug_header()
self.add_default_info_extractors()
when = pp_def['when']
del pp_def['when']
else:
- when = 'normal'
+ when = 'post_process'
pp = pp_class(self, **compat_kwargs(pp_def))
self.add_post_processor(pp, when=when)
for ie in gen_extractor_classes():
self.add_info_extractor(ie)
- def add_post_processor(self, pp, when='normal'):
+ def add_post_processor(self, pp, when='post_process'):
"""Add a PostProcessor object to the end of the chain."""
self._pps[when].append(pp)
pp.set_downloader(self)
for _ in range(line_count))
return res[:-len('\n')]
- def to_screen(self, message, skip_eol=False):
- """Print message to stdout if not in quiet mode."""
- return self.to_stdout(message, skip_eol, check_quiet=True)
-
def _write_string(self, s, out=None):
write_string(s, out=out, encoding=self.params.get('encoding'))
- def to_stdout(self, message, skip_eol=False, check_quiet=False):
- """Print message to stdout if not in quiet mode."""
+ def to_stdout(self, message, skip_eol=False, quiet=False):
+ """Print message to stdout"""
if self.params.get('logger'):
self.params['logger'].debug(message)
- elif not check_quiet or not self.params.get('quiet', False):
- message = self._bidi_workaround(message)
- terminator = ['\n', ''][skip_eol]
- output = message + terminator
-
- self._write_string(output, self._screen_file)
+ elif not quiet or self.params.get('verbose'):
+ self._write_string(
+ '%s%s' % (self._bidi_workaround(message), ('' if skip_eol else '\n')),
+ self._err_file if quiet else self._screen_file)
def to_stderr(self, message):
- """Print message to stderr."""
+ """Print message to stderr"""
assert isinstance(message, compat_str)
if self.params.get('logger'):
self.params['logger'].error(message)
else:
- message = self._bidi_workaround(message)
- output = message + '\n'
- self._write_string(output, self._err_file)
+ self._write_string('%s\n' % self._bidi_workaround(message), self._err_file)
def to_console_title(self, message):
if not self.params.get('consoletitle', False):
else:
tb_data = traceback.format_list(traceback.extract_stack())
tb = ''.join(tb_data)
- self.to_stderr(tb)
+ if tb:
+ self.to_stderr(tb)
if not self.params.get('ignoreerrors', False):
if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
exc_info = sys.exc_info()[1].exc_info
raise DownloadError(message, exc_info)
self._download_retcode = 1
+ def to_screen(self, message, skip_eol=False):
+ """Print message to stdout if not in quiet mode"""
+ self.to_stdout(
+ message, skip_eol, quiet=self.params.get('quiet', False))
+
def report_warning(self, message):
'''
Print the message to stderr, it will be prefixed with 'WARNING:'
error_message = '%s %s' % (_msg_header, message)
self.trouble(error_message, tb)
+ def write_debug(self, message):
+ '''Log debug message or Print message to stderr'''
+ if not self.params.get('verbose', False):
+ return
+ message = '[debug] %s' % message
+ if self.params.get('logger'):
+ self.params['logger'].debug(message)
+ else:
+ self._write_string('%s\n' % message)
+
def report_file_already_downloaded(self, file_name):
"""Report file has already been fully downloaded."""
try:
'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
return outtmpl_dict
+ def get_output_path(self, dir_type='', filename=None):
+ paths = self.params.get('paths', {})
+ assert isinstance(paths, dict)
+ path = os.path.join(
+ expand_path(paths.get('home', '').strip()),
+ expand_path(paths.get(dir_type, '').strip()) if dir_type else '',
+ filename or '')
+
+ # Temporary fix for #4787
+ # 'Treat' all problem characters by passing filename through preferredencoding
+ # to workaround encoding issues with subprocess on python2 @ Windows
+ if sys.version_info < (3, 0) and sys.platform == 'win32':
+ path = encodeFilename(path, True).decode(preferredencoding())
+ return sanitize_path(path, force=self.params.get('windowsfilenames'))
+
+ @staticmethod
+ def validate_outtmpl(tmpl):
+ ''' @return None or Exception object '''
+ try:
+ re.sub(
+ STR_FORMAT_RE.format(''),
+ lambda mobj: ('%' if not mobj.group('has_key') else '') + mobj.group(0),
+ tmpl
+ ) % collections.defaultdict(int)
+ return None
+ except ValueError as err:
+ return err
+
def prepare_outtmpl(self, outtmpl, info_dict, sanitize=None):
""" Make the template and info_dict suitable for substitution (outtmpl % info_dict)"""
- template_dict = dict(info_dict)
+ info_dict = dict(info_dict)
+ na = self.params.get('outtmpl_na_placeholder', 'NA')
- # duration_string
- template_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
- formatSeconds(info_dict['duration'], '-')
+ info_dict['duration_string'] = ( # %(duration>%H-%M-%S)s is wrong if duration > 24hrs
+ formatSeconds(info_dict['duration'], '-' if sanitize else ':')
if info_dict.get('duration', None) is not None
else None)
-
- # epoch
- template_dict['epoch'] = int(time.time())
-
- # autonumber
- autonumber_size = self.params.get('autonumber_size')
- if autonumber_size is None:
- autonumber_size = 5
- template_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
-
- # resolution if not defined
- if template_dict.get('resolution') is None:
- if template_dict.get('width') and template_dict.get('height'):
- template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
- elif template_dict.get('height'):
- template_dict['resolution'] = '%sp' % template_dict['height']
- elif template_dict.get('width'):
- template_dict['resolution'] = '%dx?' % template_dict['width']
-
- if sanitize is None:
- sanitize = lambda k, v: v
- template_dict = dict((k, v if isinstance(v, compat_numeric_types) else sanitize(k, v))
- for k, v in template_dict.items()
- if v is not None and not isinstance(v, (list, tuple, dict)))
- na = self.params.get('outtmpl_na_placeholder', 'NA')
- template_dict = collections.defaultdict(lambda: na, template_dict)
+ info_dict['epoch'] = int(time.time())
+ info_dict['autonumber'] = self.params.get('autonumber_start', 1) - 1 + self._num_downloads
+ if info_dict.get('resolution') is None:
+ info_dict['resolution'] = self.format_resolution(info_dict, default=None)
# For fields playlist_index and autonumber convert all occurrences
# of %(field)s to %(field)0Nd for backward compatibility
field_size_compat_map = {
- 'playlist_index': len(str(template_dict['n_entries'])),
- 'autonumber': autonumber_size,
+ 'playlist_index': len(str(info_dict.get('_last_playlist_index') or '')),
+ 'autonumber': self.params.get('autonumber_size') or 5,
}
- FIELD_SIZE_COMPAT_RE = r'(?<!%)%\((?P<field>autonumber|playlist_index)\)s'
- mobj = re.search(FIELD_SIZE_COMPAT_RE, outtmpl)
- if mobj:
- outtmpl = re.sub(
- FIELD_SIZE_COMPAT_RE,
- r'%%(\1)0%dd' % field_size_compat_map[mobj.group('field')],
- outtmpl)
-
- numeric_fields = list(self._NUMERIC_FIELDS)
-
- # Format date
- FORMAT_DATE_RE = FORMAT_RE.format(r'(?P<key>(?P<field>\w+)>(?P<format>.+?))')
- for mobj in re.finditer(FORMAT_DATE_RE, outtmpl):
- conv_type, field, frmt, key = mobj.group('type', 'field', 'format', 'key')
- if key in template_dict:
- continue
- value = strftime_or_none(template_dict.get(field), frmt, na)
- if conv_type in 'crs': # string
- value = sanitize(field, value)
- else: # number
- numeric_fields.append(key)
- value = float_or_none(value, default=None)
- if value is not None:
- template_dict[key] = value
-
- # Missing numeric fields used together with integer presentation types
- # in format specification will break the argument substitution since
- # string NA placeholder is returned for missing fields. We will patch
- # output template for missing fields to meet string presentation type.
- for numeric_field in numeric_fields:
- if numeric_field not in template_dict:
- outtmpl = re.sub(
- FORMAT_RE.format(re.escape(numeric_field)),
- r'%({0})s'.format(numeric_field), outtmpl)
-
- return outtmpl, template_dict
+
+ TMPL_DICT = {}
+ EXTERNAL_FORMAT_RE = re.compile(STR_FORMAT_RE.format('[^)]*'))
+ MATH_FUNCTIONS = {
+ '+': float.__add__,
+ '-': float.__sub__,
+ }
+ # Field is of the form key1.key2...
+ # where keys (except first) can be string, int or slice
+ FIELD_RE = r'\w+(?:\.(?:\w+|{num}|{num}?(?::{num}?){{1,2}}))*'.format(num=r'(?:-?\d+)')
+ MATH_FIELD_RE = r'''{field}|{num}'''.format(field=FIELD_RE, num=r'-?\d+(?:.\d+)?')
+ MATH_OPERATORS_RE = r'(?:%s)' % '|'.join(map(re.escape, MATH_FUNCTIONS.keys()))
+ INTERNAL_FORMAT_RE = re.compile(r'''(?x)
+ (?P<negate>-)?
+ (?P<fields>{field})
+ (?P<maths>(?:{math_op}{math_field})*)
+ (?:>(?P<strf_format>.+?))?
+ (?:\|(?P<default>.*?))?
+ $'''.format(field=FIELD_RE, math_op=MATH_OPERATORS_RE, math_field=MATH_FIELD_RE))
+
+ get_key = lambda k: traverse_obj(
+ info_dict, k.split('.'), is_user_input=True, traverse_string=True)
+
+ def get_value(mdict):
+ # Object traversal
+ value = get_key(mdict['fields'])
+ # Negative
+ if mdict['negate']:
+ value = float_or_none(value)
+ if value is not None:
+ value *= -1
+ # Do maths
+ offset_key = mdict['maths']
+ if offset_key:
+ value = float_or_none(value)
+ operator = None
+ while offset_key:
+ item = re.match(
+ MATH_FIELD_RE if operator else MATH_OPERATORS_RE,
+ offset_key).group(0)
+ offset_key = offset_key[len(item):]
+ if operator is None:
+ operator = MATH_FUNCTIONS[item]
+ continue
+ item, multiplier = (item[1:], -1) if item[0] == '-' else (item, 1)
+ offset = float_or_none(item)
+ if offset is None:
+ offset = float_or_none(get_key(item))
+ try:
+ value = operator(value, multiplier * offset)
+ except (TypeError, ZeroDivisionError):
+ return None
+ operator = None
+ # Datetime formatting
+ if mdict['strf_format']:
+ value = strftime_or_none(value, mdict['strf_format'])
+
+ return value
+
+ def create_key(outer_mobj):
+ if not outer_mobj.group('has_key'):
+ return '%{}'.format(outer_mobj.group(0))
+
+ key = outer_mobj.group('key')
+ fmt = outer_mobj.group('format')
+ mobj = re.match(INTERNAL_FORMAT_RE, key)
+ if mobj is None:
+ value, default = None, na
+ else:
+ mobj = mobj.groupdict()
+ default = mobj['default'] if mobj['default'] is not None else na
+ value = get_value(mobj)
+
+ if fmt == 's' and value is not None and key in field_size_compat_map.keys():
+ fmt = '0{:d}d'.format(field_size_compat_map[key])
+
+ value = default if value is None else value
+ key += '\0%s' % fmt
+
+ if fmt == 'c':
+ value = compat_str(value)
+ if value is None:
+ value, fmt = default, 's'
+ else:
+ value = value[0]
+ elif fmt[-1] not in 'rs': # numeric
+ value = float_or_none(value)
+ if value is None:
+ value, fmt = default, 's'
+ if sanitize:
+ if fmt[-1] == 'r':
+ # If value is an object, sanitize might convert it to a string
+ # So we convert it to repr first
+ value, fmt = repr(value), '%ss' % fmt[:-1]
+ if fmt[-1] in 'csr':
+ value = sanitize(key, value)
+ TMPL_DICT[key] = value
+ return '%({key}){fmt}'.format(key=key, fmt=fmt)
+
+ return EXTERNAL_FORMAT_RE.sub(create_key, outtmpl), TMPL_DICT
def _prepare_filename(self, info_dict, tmpl_type='default'):
try:
force_ext = OUTTMPL_TYPES.get(tmpl_type)
if force_ext is not None:
- filename = replace_extension(filename, force_ext, template_dict.get('ext'))
+ filename = replace_extension(filename, force_ext, info_dict.get('ext'))
# https://github.com/blackjack4494/youtube-dlc/issues/85
trim_file_name = self.params.get('trim_file_name', False)
def prepare_filename(self, info_dict, dir_type='', warn=False):
"""Generate the output filename."""
- paths = self.params.get('paths', {})
- assert isinstance(paths, dict)
+
filename = self._prepare_filename(info_dict, dir_type or 'default')
if warn and not self.__prepare_filename_warned:
- if not paths:
+ if not self.params.get('paths'):
pass
elif filename == '-':
self.report_warning('--paths is ignored when an outputting to stdout')
if filename == '-' or not filename:
return filename
- homepath = expand_path(paths.get('home', '').strip())
- assert isinstance(homepath, compat_str)
- subdir = expand_path(paths.get(dir_type, '').strip()) if dir_type else ''
- assert isinstance(subdir, compat_str)
- path = os.path.join(homepath, subdir, filename)
+ return self.get_output_path(dir_type, filename)
- # Temporary fix for #4787
- # 'Treat' all problem characters by passing filename through preferredencoding
- # to workaround encoding issues with subprocess on python2 @ Windows
- if sys.version_info < (3, 0) and sys.platform == 'win32':
- path = encodeFilename(path, True).decode(preferredencoding())
- return sanitize_path(path, force=self.params.get('windowsfilenames'))
-
- def _match_entry(self, info_dict, incomplete):
+ def _match_entry(self, info_dict, incomplete=False, silent=False):
""" Returns None if the file should be downloaded """
+ video_title = info_dict.get('title', info_dict.get('id', 'video'))
+
def check_filter():
- video_title = info_dict.get('title', info_dict.get('id', 'video'))
if 'title' in info_dict:
# This can happen when we're just evaluating the playlist
title = info_dict['title']
return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
return 'Skipping "%s" because it is age restricted' % video_title
- if self.in_download_archive(info_dict):
- return '%s has already been recorded in archive' % video_title
if not incomplete:
match_filter = self.params.get('match_filter')
return ret
return None
- reason = check_filter()
+ if self.in_download_archive(info_dict):
+ reason = '%s has already been recorded in the archive' % video_title
+ break_opt, break_err = 'break_on_existing', ExistingVideoReached
+ else:
+ reason = check_filter()
+ break_opt, break_err = 'break_on_reject', RejectedVideoReached
if reason is not None:
- self.to_screen('[download] ' + reason)
- if reason.endswith('has already been recorded in the archive') and self.params.get('break_on_existing', False):
- raise ExistingVideoReached()
- elif self.params.get('break_on_reject', False):
- raise RejectedVideoReached()
+ if not silent:
+ self.to_screen('[download] ' + reason)
+ if self.params.get(break_opt, False):
+ raise break_err()
return reason
@staticmethod
for key, value in extra_info.items():
info_dict.setdefault(key, value)
- def extract_info(self, url, download=True, ie_key=None, info_dict=None, extra_info={},
+ def extract_info(self, url, download=True, ie_key=None, extra_info={},
process=True, force_generic_extractor=False):
- '''
- Returns a list with a dictionary for each video we find.
- If 'download', also downloads the videos.
- extra_info is a dict containing the extra values to add to each result
- '''
+ """
+ Return a list with a dictionary for each video extracted.
+
+ Arguments:
+ url -- URL to extract
+
+ Keyword arguments:
+ download -- whether to download videos during extraction
+ ie_key -- extractor key hint
+ extra_info -- dictionary containing the extra values to add to each result
+ process -- whether to resolve all unresolved references (URLs, playlist items),
+ must be True for download to work.
+ force_generic_extractor -- force using the generic extractor
+ """
if not ie_key and force_generic_extractor:
ie_key = 'Generic'
self.to_screen("[%s] %s: has already been recorded in archive" % (
ie_key, temp_id))
break
- return self.__extract_info(url, ie, download, extra_info, process, info_dict)
+ return self.__extract_info(url, ie, download, extra_info, process)
else:
self.report_error('no suitable InfoExtractor for URL %s' % url)
return wrapper
@__handle_extraction_exceptions
- def __extract_info(self, url, ie, download, extra_info, process, info_dict):
+ def __extract_info(self, url, ie, download, extra_info, process):
ie_result = ie.extract(url)
if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
return
'_type': 'compat_list',
'entries': ie_result,
}
- if info_dict:
- if info_dict.get('id'):
- ie_result['id'] = info_dict['id']
- if info_dict.get('title'):
- ie_result['title'] = info_dict['title']
self.add_default_extra_info(ie_result, ie, url)
if process:
return self.process_ie_result(ie_result, download, extra_info)
self.add_extra_info(ie_result, {
'extractor': ie.IE_NAME,
'webpage_url': url,
+ 'original_url': url,
'webpage_url_basename': url_basename(url),
'extractor_key': ie.ie_key(),
})
extract_flat = self.params.get('extract_flat', False)
if ((extract_flat == 'in_playlist' and 'playlist' in extra_info)
or extract_flat is True):
- self.__forced_printings(ie_result, self.prepare_filename(ie_result), incomplete=True)
+ info_copy = ie_result.copy()
+ self.add_extra_info(info_copy, extra_info)
+ self.add_default_extra_info(
+ info_copy, self.get_info_extractor(ie_result.get('ie_key')), ie_result['url'])
+ self.__forced_printings(info_copy, self.prepare_filename(info_copy), incomplete=True)
return ie_result
if result_type == 'video':
self.add_extra_info(ie_result, extra_info)
- return self.process_video_result(ie_result, download=download)
+ ie_result = self.process_video_result(ie_result, download=download)
+ additional_urls = (ie_result or {}).get('additional_urls')
+ if additional_urls:
+ # TODO: Improve MetadataFromFieldPP to allow setting a list
+ if isinstance(additional_urls, compat_str):
+ additional_urls = [additional_urls]
+ self.to_screen(
+ '[info] %s: %d additional URL(s) requested' % (ie_result['id'], len(additional_urls)))
+ self.write_debug('Additional URLs: "%s"' % '", "'.join(additional_urls))
+ ie_result['additional_entries'] = [
+ self.extract_info(
+ url, download, extra_info,
+ force_generic_extractor=self.params.get('force_generic_extractor'))
+ for url in additional_urls
+ ]
+ return ie_result
elif result_type == 'url':
# We have to add extra_info to the results because it may be
# contained in a playlist
- return self.extract_info(ie_result['url'],
- download, info_dict=ie_result,
- ie_key=ie_result.get('ie_key'),
- extra_info=extra_info)
+ return self.extract_info(
+ ie_result['url'], download,
+ ie_key=ie_result.get('ie_key'),
+ extra_info=extra_info)
elif result_type == 'url_transparent':
# Use the information from the embedding page
info = self.extract_info(
self._playlist_level += 1
self._playlist_urls.add(webpage_url)
+ self._sanitize_thumbnails(ie_result)
try:
return self.__process_playlist(ie_result, download)
finally:
playlist_results = []
- playliststart = self.params.get('playliststart', 1) - 1
+ playliststart = self.params.get('playliststart', 1)
playlistend = self.params.get('playlistend')
# For backwards compatibility, interpret -1 as whole list
if playlistend == -1:
playlistitems = orderedSet(iter_playlistitems(playlistitems_str))
ie_entries = ie_result['entries']
-
- def make_playlistitems_entries(list_ie_entries):
- num_entries = len(list_ie_entries)
- for i in playlistitems:
- if -num_entries < i <= num_entries:
- yield list_ie_entries[i - 1]
- elif incomplete_entries:
+ msg = (
+ 'Downloading %d videos' if not isinstance(ie_entries, list)
+ else 'Collected %d videos; downloading %%d of them' % len(ie_entries))
+ if not isinstance(ie_entries, (list, PagedList)):
+ ie_entries = LazyList(ie_entries)
+
+ entries = []
+ for i in playlistitems or itertools.count(playliststart):
+ if playlistitems is None and playlistend is not None and playlistend < i:
+ break
+ entry = None
+ try:
+ entry = ie_entries[i - 1]
+ if entry is None:
+ raise EntryNotInPlaylist()
+ except (IndexError, EntryNotInPlaylist):
+ if incomplete_entries:
raise EntryNotInPlaylist()
+ elif not playlistitems:
+ break
+ entries.append(entry)
+ try:
+ if entry is not None:
+ self._match_entry(entry, incomplete=True, silent=True)
+ except (ExistingVideoReached, RejectedVideoReached):
+ break
+ ie_result['entries'] = entries
- if isinstance(ie_entries, list):
- n_all_entries = len(ie_entries)
- if playlistitems:
- entries = list(make_playlistitems_entries(ie_entries))
- else:
- entries = ie_entries[playliststart:playlistend]
- n_entries = len(entries)
- msg = 'Collected %d videos; downloading %d of them' % (n_all_entries, n_entries)
- elif isinstance(ie_entries, PagedList):
- if playlistitems:
- entries = []
- for item in playlistitems:
- entries.extend(ie_entries.getslice(
- item - 1, item
- ))
- else:
- entries = ie_entries.getslice(
- playliststart, playlistend)
- n_entries = len(entries)
- msg = 'Downloading %d videos' % n_entries
- else: # iterable
- if playlistitems:
- entries = list(make_playlistitems_entries(list(itertools.islice(
- ie_entries, 0, max(playlistitems)))))
- else:
- entries = list(itertools.islice(
- ie_entries, playliststart, playlistend))
- n_entries = len(entries)
- msg = 'Downloading %d videos' % n_entries
+ # Save playlist_index before re-ordering
+ entries = [
+ ((playlistitems[i - 1] if playlistitems else i), entry)
+ for i, entry in enumerate(entries, 1)
+ if entry is not None]
+ n_entries = len(entries)
- if any((entry is None for entry in entries)):
- raise EntryNotInPlaylist()
if not playlistitems and (playliststart or playlistend):
- playlistitems = list(range(1 + playliststart, 1 + playliststart + len(entries)))
- ie_result['entries'] = entries
+ playlistitems = list(range(playliststart, playliststart + n_entries))
ie_result['requested_entries'] = playlistitems
if self.params.get('allow_playlist_files', True):
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': 0
+ 'playlist_index': 0,
}
ie_copy.update(dict(ie_result))
except (OSError, IOError):
self.report_error('Cannot write playlist metadata to JSON file ' + infofn)
+ # TODO: This should be passed to ThumbnailsConvertor if necessary
+ self._write_thumbnails(ie_copy, self.prepare_filename(ie_copy, 'pl_thumbnail'))
+
if self.params.get('writedescription', False):
descfn = self.prepare_filename(ie_copy, 'pl_description')
if not self._ensure_dir_exists(encodeFilename(descfn)):
x_forwarded_for = ie_result.get('__x_forwarded_for_ip')
- self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg))
- for i, entry in enumerate(entries, 1):
+ self.to_screen('[%s] playlist %s: %s' % (ie_result['extractor'], playlist, msg % n_entries))
+ failures = 0
+ max_failures = self.params.get('skip_playlist_after_errors') or float('inf')
+ for i, entry_tuple in enumerate(entries, 1):
+ playlist_index, entry = entry_tuple
+ if 'playlist_index' in self.params.get('compat_options', []):
+ playlist_index = playlistitems[i - 1] if playlistitems else i
self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
# This __x_forwarded_for_ip thing is a bit ugly but requires
# minimal changes
entry['__x_forwarded_for_ip'] = x_forwarded_for
extra = {
'n_entries': n_entries,
+ '_last_playlist_index': max(playlistitems) if playlistitems else (playlistend or n_entries),
+ 'playlist_index': playlist_index,
+ 'playlist_autonumber': i,
'playlist': playlist,
'playlist_id': ie_result.get('id'),
'playlist_title': ie_result.get('title'),
'playlist_uploader': ie_result.get('uploader'),
'playlist_uploader_id': ie_result.get('uploader_id'),
- 'playlist_index': playlistitems[i - 1] if playlistitems else i,
'extractor': ie_result['extractor'],
'webpage_url': ie_result['webpage_url'],
'webpage_url_basename': url_basename(ie_result['webpage_url']),
continue
entry_result = self.__process_iterable_entry(entry, download, extra)
+ if not entry_result:
+ failures += 1
+ if failures >= max_failures:
+ self.report_error(
+ 'Skipping the remaining entries in playlist "%s" since %d items failed extraction' % (playlist, failures))
+ break
# TODO: skip failed (empty) entries?
playlist_results.append(entry_result)
ie_result['entries'] = playlist_results
'!=': operator.ne,
}
operator_rex = re.compile(r'''(?x)\s*
- (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)
- \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
- (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
- $
+ (?P<key>width|height|tbr|abr|vbr|asr|filesize|filesize_approx|fps)\s*
+ (?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)\s*
''' % '|'.join(map(re.escape, OPERATORS.keys())))
- m = operator_rex.search(filter_spec)
+ m = operator_rex.fullmatch(filter_spec)
if m:
try:
comparison_value = int(m.group('value'))
'$=': lambda attr, value: attr.endswith(value),
'*=': lambda attr, value: value in attr,
}
- str_operator_rex = re.compile(r'''(?x)
- \s*(?P<key>[a-zA-Z0-9._-]+)
- \s*(?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?
- \s*(?P<value>[a-zA-Z0-9._-]+)
- \s*$
+ str_operator_rex = re.compile(r'''(?x)\s*
+ (?P<key>[a-zA-Z0-9._-]+)\s*
+ (?P<negation>!\s*)?(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
+ (?P<value>[a-zA-Z0-9._-]+)\s*
''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
- m = str_operator_rex.search(filter_spec)
+ m = str_operator_rex.fullmatch(filter_spec)
if m:
comparison_value = m.group('value')
str_op = STR_OPERATORS[m.group('op')]
op = str_op
if not m:
- raise ValueError('Invalid filter specification %r' % filter_spec)
+ raise SyntaxError('Invalid filter specification %r' % filter_spec)
def _filter(f):
actual_value = f.get(m.group('key'))
not can_merge()
or info_dict.get('is_live', False)
or self.outtmpl_dict['default'] == '-'))
+ compat = (
+ prefer_best
+ or self.params.get('allow_multiple_audio_streams', False)
+ or 'format-spec' in self.params.get('compat_opts', []))
return (
- 'best/bestvideo+bestaudio'
- if prefer_best
- else 'bestvideo*+bestaudio/best'
- if not self.params.get('allow_multiple_audio_streams', False)
+ 'best/bestvideo+bestaudio' if prefer_best
+ else 'bestvideo*+bestaudio/best' if not compat
else 'bestvideo+bestaudio/best')
def build_format_selector(self, format_spec):
allow_multiple_streams = {'audio': self.params.get('allow_multiple_audio_streams', False),
'video': self.params.get('allow_multiple_video_streams', False)}
+ check_formats = self.params.get('check_formats')
+
def _parse_filter(tokens):
filter_parts = []
for type, string, start, _, _ in tokens:
return new_dict
+ def _check_formats(formats):
+ for f in formats:
+ self.to_screen('[info] Testing format %s' % f['format_id'])
+ temp_file = tempfile.NamedTemporaryFile(
+ suffix='.tmp', delete=False,
+ dir=self.get_output_path('temp') or None)
+ temp_file.close()
+ try:
+ dl, _ = self.dl(temp_file.name, f, test=True)
+ except (ExtractorError, IOError, OSError, ValueError) + network_exceptions:
+ dl = False
+ finally:
+ if os.path.exists(temp_file.name):
+ try:
+ os.remove(temp_file.name)
+ except OSError:
+ self.report_warning('Unable to delete temporary file "%s"' % temp_file.name)
+ if dl:
+ yield f
+ else:
+ self.to_screen('[info] Unable to download format %s. Skipping...' % f['format_id'])
+
def _build_selector_function(selector):
if isinstance(selector, list): # ,
fs = [_build_selector_function(s) for s in selector]
return []
elif selector.type == SINGLE: # atom
- format_spec = (selector.selector or 'best').lower()
+ format_spec = selector.selector or 'best'
# TODO: Add allvideo, allaudio etc by generalizing the code with best/worst selector
if format_spec == 'all':
def selector_function(ctx):
formats = list(ctx['formats'])
- if formats:
- for f in formats:
- yield f
+ if check_formats:
+ formats = _check_formats(formats)
+ for f in formats:
+ yield f
elif format_spec == 'mergeall':
def selector_function(ctx):
- formats = list(ctx['formats'])
+ formats = list(_check_formats(ctx['formats']))
if not formats:
return
- merged_format = formats[0]
- for f in formats[1:]:
+ merged_format = formats[-1]
+ for f in formats[-2::-1]:
merged_format = _merge((merged_format, f))
yield merged_format
else:
- format_fallback = False
+ format_fallback, format_reverse, format_idx = False, True, 1
mobj = re.match(
r'(?P<bw>best|worst|b|w)(?P<type>video|audio|v|a)?(?P<mod>\*)?(?:\.(?P<n>[1-9]\d*))?$',
format_spec)
if mobj is not None:
format_idx = int_or_none(mobj.group('n'), default=1)
- format_idx = format_idx - 1 if mobj.group('bw')[0] == 'w' else -format_idx
+ format_reverse = mobj.group('bw')[0] == 'b'
format_type = (mobj.group('type') or [None])[0]
not_format_type = {'v': 'a', 'a': 'v'}.get(format_type)
format_modified = mobj.group('mod') is not None
if not format_modified # b, w
else None) # b*, w*
else:
- format_idx = -1
filter_f = ((lambda f: f.get('ext') == format_spec)
if format_spec in ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav'] # extension
else (lambda f: f.get('format_id') == format_spec)) # id
if not formats:
return
matches = list(filter(filter_f, formats)) if filter_f is not None else formats
- n = len(matches)
- if -n <= format_idx < n:
- yield matches[format_idx]
- elif format_fallback and ctx['incomplete_formats']:
+ if format_fallback and ctx['incomplete_formats'] and not matches:
# for extractors with incomplete formats (audio only (soundcloud)
# or video only (imgur)) best/worst will fallback to
# best/worst {video,audio}-only format
- n = len(formats)
- if -n <= format_idx < n:
- yield formats[format_idx]
+ matches = formats
+ if format_reverse:
+ matches = matches[::-1]
+ if check_formats:
+ matches = list(itertools.islice(_check_formats(matches), format_idx))
+ n = len(matches)
+ if -n <= format_idx - 1 < n:
+ yield matches[format_idx - 1]
elif selector.type == MERGE: # +
selector_1, selector_2 = map(_build_selector_function, selector.selector)
self.cookiejar.add_cookie_header(pr)
return pr.get_header('Cookie')
+ def _sanitize_thumbnails(self, info_dict):
+ thumbnails = info_dict.get('thumbnails')
+ if thumbnails is None:
+ thumbnail = info_dict.get('thumbnail')
+ if thumbnail:
+ info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
+ if thumbnails:
+ thumbnails.sort(key=lambda t: (
+ t.get('preference') if t.get('preference') is not None else -1,
+ t.get('width') if t.get('width') is not None else -1,
+ t.get('height') if t.get('height') is not None else -1,
+ t.get('id') if t.get('id') is not None else '',
+ t.get('url')))
+
+ def test_thumbnail(t):
+ self.to_screen('[info] Testing thumbnail %s' % t['id'])
+ try:
+ self.urlopen(HEADRequest(t['url']))
+ except network_exceptions as err:
+ self.to_screen('[info] Unable to connect to thumbnail %s URL "%s" - %s. Skipping...' % (
+ t['id'], t['url'], error_to_compat_str(err)))
+ return False
+ return True
+
+ for i, t in enumerate(thumbnails):
+ if t.get('id') is None:
+ t['id'] = '%d' % i
+ if t.get('width') and t.get('height'):
+ t['resolution'] = '%dx%d' % (t['width'], t['height'])
+ t['url'] = sanitize_url(t['url'])
+ if self.params.get('check_formats'):
+ info_dict['thumbnails'] = reversed(LazyList(filter(test_thumbnail, thumbnails[::-1])))
+
def process_video_result(self, info_dict, download=True):
assert info_dict.get('_type', 'video') == 'video'
info_dict['playlist'] = None
info_dict['playlist_index'] = None
- thumbnails = info_dict.get('thumbnails')
- if thumbnails is None:
- thumbnail = info_dict.get('thumbnail')
- if thumbnail:
- info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
- if thumbnails:
- thumbnails.sort(key=lambda t: (
- t.get('preference') if t.get('preference') is not None else -1,
- t.get('width') if t.get('width') is not None else -1,
- t.get('height') if t.get('height') is not None else -1,
- t.get('id') if t.get('id') is not None else '', t.get('url')))
- for i, t in enumerate(thumbnails):
- t['url'] = sanitize_url(t['url'])
- if t.get('width') and t.get('height'):
- t['resolution'] = '%dx%d' % (t['width'], t['height'])
- if t.get('id') is None:
- t['id'] = '%d' % i
+ self._sanitize_thumbnails(info_dict)
if self.params.get('list_thumbnails'):
self.list_thumbnails(info_dict)
return
thumbnail = info_dict.get('thumbnail')
+ thumbnails = info_dict.get('thumbnails')
if thumbnail:
info_dict['thumbnail'] = sanitize_url(thumbnail)
elif thumbnails:
formats = info_dict['formats']
if not formats:
- raise ExtractorError('No video formats found!')
+ if not self.params.get('ignore_no_formats_error'):
+ raise ExtractorError('No video formats found!')
+ else:
+ self.report_warning('No video formats found!')
def is_wellformed(f):
url = f.get('url')
# TODO Central sorting goes here
- if formats[0] is not info_dict:
+ if formats and formats[0] is not info_dict:
# only set the 'formats' fields if the original info_dict list them
# otherwise we end up with a circular reference, the first (and unique)
# element in the 'formats' field in info_dict is info_dict itself,
# which can't be exported to json
info_dict['formats'] = formats
+
+ info_dict, _ = self.pre_process(info_dict)
+
if self.params.get('listformats'):
+ if not info_dict.get('formats'):
+ raise ExtractorError('No video formats found', expected=True)
self.list_formats(info_dict)
return
- req_format = self.params.get('format')
- if req_format is None:
+ format_selector = self.format_selector
+ if format_selector is None:
req_format = self._default_format_spec(info_dict, download=download)
- if self.params.get('verbose'):
- self.to_screen('[debug] Default format spec: %s' % req_format)
-
- format_selector = self.build_format_selector(req_format)
+ self.write_debug('Default format spec: %s' % req_format)
+ format_selector = self.build_format_selector(req_format)
# While in format selection we may need to have an access to the original
# format set in order to calculate some metrics or do some processing.
formats_to_download = list(format_selector(ctx))
if not formats_to_download:
- raise ExtractorError('requested format not available',
- expected=True)
-
- if download:
- self.to_screen('[info] Downloading format(s) %s' % ", ".join([f['format_id'] for f in formats_to_download]))
- if len(formats_to_download) > 1:
- self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
- for format in formats_to_download:
+ if not self.params.get('ignore_no_formats_error'):
+ raise ExtractorError('Requested format is not available', expected=True)
+ else:
+ self.report_warning('Requested format is not available')
+ elif download:
+ self.to_screen(
+ '[info] %s: Downloading %d format(s): %s' % (
+ info_dict['id'], len(formats_to_download),
+ ", ".join([f['format_id'] for f in formats_to_download])))
+ for fmt in formats_to_download:
new_info = dict(info_dict)
- new_info.update(format)
+ # Save a reference to the original info_dict so that it can be modified in process_info if needed
+ new_info['__original_infodict'] = info_dict
+ new_info.update(fmt)
self.process_info(new_info)
# We update the info dict with the best quality format (backwards compatibility)
- info_dict.update(formats_to_download[-1])
+ if formats_to_download:
+ info_dict.update(formats_to_download[-1])
return info_dict
def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
available_subs):
return None
+ all_sub_langs = available_subs.keys()
if self.params.get('allsubtitles', False):
- requested_langs = available_subs.keys()
+ requested_langs = all_sub_langs
+ elif self.params.get('subtitleslangs', False):
+ requested_langs = set()
+ for lang in self.params.get('subtitleslangs'):
+ if lang == 'all':
+ requested_langs.update(all_sub_langs)
+ continue
+ discard = lang[0] == '-'
+ if discard:
+ lang = lang[1:]
+ current_langs = filter(re.compile(lang + '$').match, all_sub_langs)
+ if discard:
+ for lang in current_langs:
+ requested_langs.discard(lang)
+ else:
+ requested_langs.update(current_langs)
+ elif 'en' in available_subs:
+ requested_langs = ['en']
else:
- if self.params.get('subtitleslangs', False):
- requested_langs = self.params.get('subtitleslangs')
- elif 'en' in available_subs:
- requested_langs = ['en']
- else:
- requested_langs = [list(available_subs.keys())[0]]
+ requested_langs = [list(all_sub_langs)[0]]
+ self.write_debug('Downloading subtitles: %s' % ', '.join(requested_langs))
formats_query = self.params.get('subtitlesformat', 'best')
formats_preference = formats_query.split('/') if formats_query else []
return subs
def __forced_printings(self, info_dict, filename, incomplete):
- def print_mandatory(field):
+ def print_mandatory(field, actual_field=None):
+ if actual_field is None:
+ actual_field = field
if (self.params.get('force%s' % field, False)
- and (not incomplete or info_dict.get(field) is not None)):
- self.to_stdout(info_dict[field])
+ and (not incomplete or info_dict.get(actual_field) is not None)):
+ self.to_stdout(info_dict[actual_field])
def print_optional(field):
if (self.params.get('force%s' % field, False)
and info_dict.get(field) is not None):
self.to_stdout(info_dict[field])
+ info_dict = info_dict.copy()
+ if filename is not None:
+ info_dict['filename'] = filename
+ if info_dict.get('requested_formats') is not None:
+ # For RTMP URLs, also include the playpath
+ info_dict['urls'] = '\n'.join(f['url'] + f.get('play_path', '') for f in info_dict['requested_formats'])
+ elif 'url' in info_dict:
+ info_dict['urls'] = info_dict['url'] + info_dict.get('play_path', '')
+
+ for tmpl in self.params.get('forceprint', []):
+ if re.match(r'\w+$', tmpl):
+ tmpl = '%({})s'.format(tmpl)
+ tmpl, info_copy = self.prepare_outtmpl(tmpl, info_dict)
+ self.to_stdout(tmpl % info_copy)
+
print_mandatory('title')
print_mandatory('id')
- if self.params.get('forceurl', False) and not incomplete:
- if info_dict.get('requested_formats') is not None:
- for f in info_dict['requested_formats']:
- self.to_stdout(f['url'] + f.get('play_path', ''))
- else:
- # For RTMP URLs, also include the playpath
- self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
+ print_mandatory('url', 'urls')
print_optional('thumbnail')
print_optional('description')
- if self.params.get('forcefilename', False) and filename is not None:
- self.to_stdout(filename)
+ print_optional('filename')
if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
self.to_stdout(formatSeconds(info_dict['duration']))
print_mandatory('format')
+
if self.params.get('forcejson', False):
self.post_extract(info_dict)
self.to_stdout(json.dumps(info_dict, default=repr))
+ def dl(self, name, info, subtitle=False, test=False):
+
+ if test:
+ verbose = self.params.get('verbose')
+ params = {
+ 'test': True,
+ 'quiet': not verbose,
+ 'verbose': verbose,
+ 'noprogress': not verbose,
+ 'nopart': True,
+ 'skip_unavailable_fragments': False,
+ 'keep_fragments': False,
+ 'overwrites': True,
+ '_no_ytdl_file': True,
+ }
+ else:
+ params = self.params
+ fd = get_suitable_downloader(info, params)(self, params)
+ if not test:
+ for ph in self._progress_hooks:
+ fd.add_progress_hook(ph)
+ urls = '", "'.join([f['url'] for f in info.get('requested_formats', [])] or [info['url']])
+ self.write_debug('Invoking downloader on "%s"' % urls)
+ new_info = dict(info)
+ if new_info.get('http_headers') is None:
+ new_info['http_headers'] = self._calc_headers(new_info)
+ return fd.download(name, new_info, subtitle)
+
def process_info(self, info_dict):
"""Process a single resolved IE result."""
if 'format' not in info_dict:
info_dict['format'] = info_dict['ext']
- if self._match_entry(info_dict, incomplete=False) is not None:
+ if self._match_entry(info_dict) is not None:
return
self.post_extract(info_dict)
self._num_downloads += 1
- info_dict = self.pre_process(info_dict)
-
# info_dict['_filename'] needs to be set for backward compatibility
info_dict['_filename'] = full_filename = self.prepare_filename(info_dict, warn=True)
temp_filename = self.prepare_filename(info_dict, 'temp')
files_to_move = {}
- skip_dl = self.params.get('skip_download', False)
# Forced printings
self.__forced_printings(info_dict, full_filename, incomplete=False)
self.report_error('Cannot write annotations file: ' + annofn)
return
- def dl(name, info, subtitle=False):
- fd = get_suitable_downloader(info, self.params)(self, self.params)
- for ph in self._progress_hooks:
- fd.add_progress_hook(ph)
- if self.params.get('verbose'):
- self.to_screen('[debug] Invoking downloader on %r' % info.get('url'))
- new_info = dict(info)
- if new_info.get('http_headers') is None:
- new_info['http_headers'] = self._calc_headers(new_info)
- return fd.download(name, new_info, subtitle)
-
subtitles_are_requested = any([self.params.get('writesubtitles', False),
self.params.get('writeautomaticsub')])
# ie = self.get_info_extractor(info_dict['extractor_key'])
for sub_lang, sub_info in subtitles.items():
sub_format = sub_info['ext']
- sub_fn = self.prepare_filename(info_dict, 'subtitle')
- sub_filename = subtitles_filename(
- temp_filename if not skip_dl else sub_fn,
- sub_lang, sub_format, info_dict.get('ext'))
- sub_filename_final = subtitles_filename(sub_fn, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename = subtitles_filename(temp_filename, sub_lang, sub_format, info_dict.get('ext'))
+ sub_filename_final = subtitles_filename(
+ self.prepare_filename(info_dict, 'subtitle'), sub_lang, sub_format, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(sub_filename)):
self.to_screen('[info] Video subtitle %s.%s is already present' % (sub_lang, sub_format))
sub_info['filepath'] = sub_filename
return
else:
try:
- dl(sub_filename, sub_info.copy(), subtitle=True)
+ self.dl(sub_filename, sub_info.copy(), subtitle=True)
sub_info['filepath'] = sub_filename
files_to_move[sub_filename] = sub_filename_final
- except (ExtractorError, IOError, OSError, ValueError, compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ except (ExtractorError, IOError, OSError, ValueError) + network_exceptions as err:
self.report_warning('Unable to download subtitle for "%s": %s' %
(sub_lang, error_to_compat_str(err)))
continue
- if skip_dl:
- if self.params.get('convertsubtitles', False):
- # subconv = FFmpegSubtitlesConvertorPP(self, format=self.params.get('convertsubtitles'))
- filename_real_ext = os.path.splitext(full_filename)[1][1:]
- filename_wo_ext = (
- os.path.splitext(full_filename)[0]
- if filename_real_ext == info_dict['ext']
- else full_filename)
- afilename = '%s.%s' % (filename_wo_ext, self.params.get('convertsubtitles'))
- # if subconv.available:
- # info_dict['__postprocessors'].append(subconv)
- if os.path.exists(encodeFilename(afilename)):
- self.to_screen(
- '[download] %s has already been downloaded and '
- 'converted' % afilename)
- else:
- try:
- self.post_process(full_filename, info_dict, files_to_move)
- except PostProcessingError as err:
- self.report_error('Postprocessing: %s' % str(err))
- return
-
if self.params.get('writeinfojson', False):
infofn = self.prepare_filename(info_dict, 'infojson')
if not self._ensure_dir_exists(encodeFilename(infofn)):
return
info_dict['__infojson_filename'] = infofn
- thumbfn = self.prepare_filename(info_dict, 'thumbnail')
- thumb_fn_temp = temp_filename if not skip_dl else thumbfn
- for thumb_ext in self._write_thumbnails(info_dict, thumb_fn_temp):
- thumb_filename_temp = replace_extension(thumb_fn_temp, thumb_ext, info_dict.get('ext'))
- thumb_filename = replace_extension(thumbfn, thumb_ext, info_dict.get('ext'))
+ for thumb_ext in self._write_thumbnails(info_dict, temp_filename):
+ thumb_filename_temp = replace_extension(temp_filename, thumb_ext, info_dict.get('ext'))
+ thumb_filename = replace_extension(
+ self.prepare_filename(info_dict, 'thumbnail'), thumb_ext, info_dict.get('ext'))
files_to_move[thumb_filename_temp] = thumb_filename
# Write internet shortcut files
if not _write_link_file('desktop', DOT_DESKTOP_LINK_TEMPLATE, '\n', embed_filename=True):
return
- # Download
+ try:
+ info_dict, files_to_move = self.pre_process(info_dict, 'before_dl', files_to_move)
+ except PostProcessingError as err:
+ self.report_error('Preprocessing: %s' % str(err))
+ return
+
must_record_download_archive = False
- if not skip_dl:
+ if self.params.get('skip_download', False):
+ info_dict['filepath'] = temp_filename
+ info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
+ info_dict['__files_to_move'] = files_to_move
+ info_dict = self.run_pp(MoveFilesAfterDownloadPP(self, False), info_dict)
+ else:
+ # Download
try:
def existing_file(*filepaths):
success = True
if info_dict.get('requested_formats') is not None:
- downloaded = []
- merger = FFmpegMergerPP(self)
- if self.params.get('allow_unplayable_formats'):
- self.report_warning(
- 'You have requested merging of multiple formats '
- 'while also allowing unplayable formats to be downloaded. '
- 'The formats won\'t be merged to prevent data corruption.')
- elif not merger.available:
- self.report_warning(
- 'You have requested merging of multiple formats but ffmpeg is not installed. '
- 'The formats won\'t be merged.')
def compatible_formats(formats):
# TODO: some formats actually allow this (mkv, webm, ogg, mp4), but not all of them.
temp_filename = correct_ext(temp_filename)
dl_filename = existing_file(full_filename, temp_filename)
info_dict['__real_download'] = False
- if dl_filename is None:
- for f in requested_formats:
- new_info = dict(info_dict)
- new_info.update(f)
- fname = prepend_extension(
- self.prepare_filename(new_info, 'temp'),
- 'f%s' % f['format_id'], new_info['ext'])
- if not self._ensure_dir_exists(fname):
- return
- downloaded.append(fname)
- partial_success, real_download = dl(fname, new_info)
- info_dict['__real_download'] = info_dict['__real_download'] or real_download
- success = success and partial_success
- if merger.available and not self.params.get('allow_unplayable_formats'):
- info_dict['__postprocessors'].append(merger)
- info_dict['__files_to_merge'] = downloaded
- # Even if there were no downloads, it is being merged only now
- info_dict['__real_download'] = True
- else:
- for file in downloaded:
- files_to_move[file] = None
+
+ _protocols = set(determine_protocol(f) for f in requested_formats)
+ if len(_protocols) == 1:
+ info_dict['protocol'] = _protocols.pop()
+ directly_mergable = (
+ 'no-direct-merge' not in self.params.get('compat_opts', [])
+ and info_dict.get('protocol') is not None # All requested formats have same protocol
+ and not self.params.get('allow_unplayable_formats')
+ and get_suitable_downloader(info_dict, self.params).__name__ == 'FFmpegFD')
+ if directly_mergable:
+ info_dict['url'] = requested_formats[0]['url']
+ # Treat it as a single download
+ dl_filename = existing_file(full_filename, temp_filename)
+ if dl_filename is None:
+ success, real_download = self.dl(temp_filename, info_dict)
+ info_dict['__real_download'] = real_download
+ else:
+ downloaded = []
+ merger = FFmpegMergerPP(self)
+ if self.params.get('allow_unplayable_formats'):
+ self.report_warning(
+ 'You have requested merging of multiple formats '
+ 'while also allowing unplayable formats to be downloaded. '
+ 'The formats won\'t be merged to prevent data corruption.')
+ elif not merger.available:
+ self.report_warning(
+ 'You have requested merging of multiple formats but ffmpeg is not installed. '
+ 'The formats won\'t be merged.')
+
+ if dl_filename is None:
+ for f in requested_formats:
+ new_info = dict(info_dict)
+ del new_info['requested_formats']
+ new_info.update(f)
+ fname = prepend_extension(
+ self.prepare_filename(new_info, 'temp'),
+ 'f%s' % f['format_id'], new_info['ext'])
+ if not self._ensure_dir_exists(fname):
+ return
+ downloaded.append(fname)
+ partial_success, real_download = self.dl(fname, new_info)
+ info_dict['__real_download'] = info_dict['__real_download'] or real_download
+ success = success and partial_success
+ if merger.available and not self.params.get('allow_unplayable_formats'):
+ info_dict['__postprocessors'].append(merger)
+ info_dict['__files_to_merge'] = downloaded
+ # Even if there were no downloads, it is being merged only now
+ info_dict['__real_download'] = True
+ else:
+ for file in downloaded:
+ files_to_move[file] = None
else:
# Just a single file
dl_filename = existing_file(full_filename, temp_filename)
if dl_filename is None:
- success, real_download = dl(temp_filename, info_dict)
+ success, real_download = self.dl(temp_filename, info_dict)
info_dict['__real_download'] = real_download
dl_filename = dl_filename or temp_filename
info_dict['__finaldir'] = os.path.dirname(os.path.abspath(encodeFilename(full_filename)))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ except network_exceptions as err:
self.report_error('unable to download video data: %s' % error_to_compat_str(err))
return
except (OSError, IOError) as err:
@staticmethod
def filter_requested_info(info_dict, actually_filter=True):
- if not actually_filter:
+ remove_keys = ['__original_infodict'] # Always remove this since this may contain a copy of the entire dict
+ keep_keys = ['_type'], # Always keep this to facilitate load-info-json
+ if actually_filter:
+ remove_keys += ('requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries', 'original_url')
+ empty_values = (None, {}, [], set(), tuple())
+ reject = lambda k, v: k not in keep_keys and (
+ k.startswith('_') or k in remove_keys or v in empty_values)
+ else:
info_dict['epoch'] = int(time.time())
- return info_dict
- exceptions = {
- 'remove': ['requested_formats', 'requested_subtitles', 'requested_entries', 'filepath', 'entries'],
- 'keep': ['_type'],
- }
- keep_key = lambda k: k in exceptions['keep'] or not (k.startswith('_') or k in exceptions['remove'])
+ reject = lambda k, v: k in remove_keys
filter_fn = lambda obj: (
- list(map(filter_fn, obj)) if isinstance(obj, (list, tuple))
+ list(map(filter_fn, obj)) if isinstance(obj, (LazyList, list, tuple, set))
else obj if not isinstance(obj, dict)
- else dict((k, filter_fn(v)) for k, v in obj.items() if keep_key(k)))
+ else dict((k, filter_fn(v)) for k, v in obj.items() if not reject(k, v)))
return filter_fn(info_dict)
def run_pp(self, pp, infodict):
actual_post_extract(video_dict or {})
return
- if '__post_extractor' not in info_dict:
- return
- post_extractor = info_dict['__post_extractor']
- if post_extractor:
- info_dict.update(post_extractor().items())
- del info_dict['__post_extractor']
- return
+ post_extractor = info_dict.get('__post_extractor') or (lambda: {})
+ extra = post_extractor().items()
+ info_dict.update(extra)
+ info_dict.pop('__post_extractor', None)
+
+ original_infodict = info_dict.get('__original_infodict') or {}
+ original_infodict.update(extra)
+ original_infodict.pop('__post_extractor', None)
actual_post_extract(info_dict or {})
- def pre_process(self, ie_info):
+ def pre_process(self, ie_info, key='pre_process', files_to_move=None):
info = dict(ie_info)
- for pp in self._pps['beforedl']:
+ info['__files_to_move'] = files_to_move or {}
+ for pp in self._pps[key]:
info = self.run_pp(pp, info)
- return info
+ return info, info.pop('__files_to_move', None)
def post_process(self, filename, ie_info, files_to_move=None):
"""Run all the postprocessors on the given file."""
info['filepath'] = filename
info['__files_to_move'] = files_to_move or {}
- for pp in ie_info.get('__postprocessors', []) + self._pps['normal']:
+ for pp in ie_info.get('__postprocessors', []) + self._pps['post_process']:
info = self.run_pp(pp, info)
info = self.run_pp(MoveFilesAfterDownloadPP(self), info)
del info['__files_to_move']
- for pp in self._pps['aftermove']:
+ for pp in self._pps['after_move']:
info = self.run_pp(pp, info)
return info
def list_formats(self, info_dict):
formats = info_dict.get('formats', [info_dict])
- new_format = self.params.get('listformats_table', False)
+ new_format = (
+ 'list-formats' not in self.params.get('compat_opts', [])
+ and self.params.get('list_formats_as_table', True) is not False)
if new_format:
table = [
[
hideEmpty=new_format)))
def list_thumbnails(self, info_dict):
- thumbnails = info_dict.get('thumbnails')
+ thumbnails = list(info_dict.get('thumbnails'))
if not thumbnails:
self.to_screen('[info] No thumbnails present for %s' % info_dict['id'])
return
return
self.to_screen(
'Available %s for %s:' % (name, video_id))
+
+ def _row(lang, formats):
+ exts, names = zip(*((f['ext'], f.get('name', 'unknown')) for f in reversed(formats)))
+ if len(set(names)) == 1:
+ names = [] if names[0] == 'unknown' else names[:1]
+ return [lang, ', '.join(names), ', '.join(exts)]
+
self.to_screen(render_table(
- ['Language', 'formats'],
- [[lang, ', '.join(f['ext'] for f in reversed(formats))]
- for lang, formats in subtitles.items()]))
+ ['Language', 'Name', 'Formats'],
+ [_row(lang, formats) for lang, formats in subtitles.items()],
+ hideEmpty=True))
def urlopen(self, req):
""" Start an HTTP download """
if _PLUGIN_CLASSES:
self._write_string(
'[debug] Plugin Extractors: %s\n' % [ie.ie_key() for ie in _PLUGIN_CLASSES])
+ if self.params.get('compat_opts'):
+ self._write_string(
+ '[debug] Compatibility options: %s\n' % ', '.join(self.params.get('compat_opts')))
try:
sp = subprocess.Popen(
['git', 'rev-parse', '--short', 'HEAD'],
thumb_ext = determine_ext(t['url'], 'jpg')
suffix = '%s.' % t['id'] if multiple else ''
thumb_display_id = '%s ' % t['id'] if multiple else ''
- t['filepath'] = thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
+ thumb_filename = replace_extension(filename, suffix + thumb_ext, info_dict.get('ext'))
if not self.params.get('overwrites', True) and os.path.exists(encodeFilename(thumb_filename)):
ret.append(suffix + thumb_ext)
+ t['filepath'] = thumb_filename
self.to_screen('[%s] %s: Thumbnail %sis already present' %
(info_dict['extractor'], info_dict['id'], thumb_display_id))
else:
ret.append(suffix + thumb_ext)
self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
(info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
- except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
+ t['filepath'] = thumb_filename
+ except network_exceptions as err:
self.report_warning('Unable to download thumbnail "%s": %s' %
(t['url'], error_to_compat_str(err)))
if ret and not write_all: