X-Git-Url: https://jfr.im/git/yt-dlp.git/blobdiff_plain/5ef7d9bdd8c4d480612d88bd0bd2a4196f2e2562..46f1370e9af6f8af8762f67e27e5acb8f0c48a47:/yt_dlp/downloader/common.py diff --git a/yt_dlp/downloader/common.py b/yt_dlp/downloader/common.py index 2a9a62df4..8f9bc05d6 100644 --- a/yt_dlp/downloader/common.py +++ b/yt_dlp/downloader/common.py @@ -1,23 +1,40 @@ -from __future__ import division, unicode_literals - +import contextlib +import errno +import functools import os +import random import re -import sys import time -import random -from ..compat import compat_os_name +from ..minicurses import ( + BreaklineStatusPrinter, + MultilineLogger, + MultilinePrinter, + QuietMultilinePrinter, +) from ..utils import ( + IDENTITY, + NO_DEFAULT, + LockingUnsupportedError, + Namespace, + RetryManager, + classproperty, decodeArgument, + deprecation_warning, encodeFilename, - error_to_compat_str, format_bytes, + join_nonempty, + parse_bytes, + remove_start, + sanitize_open, shell_quote, timeconvert, + timetuple_from_msec, + try_call, ) -class FileDownloader(object): +class FileDownloader: """File Downloader class. File downloader objects are the ones responsible of downloading the @@ -32,25 +49,32 @@ class FileDownloader(object): verbose: Print additional info to stdout. quiet: Do not print messages to stdout. ratelimit: Download speed limit, in bytes/sec. - retries: Number of times to retry for HTTP error 5xx + continuedl: Attempt to continue downloads if possible + throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) + retries: Number of times to retry for expected network errors. + Default is 0 for API, but 10 for CLI + file_access_retries: Number of times to retry on file access error (default: 3) buffersize: Size of download buffer in bytes. noresizebuffer: Do not automatically resize the download buffer. continuedl: Try to continue downloads if possible. noprogress: Do not print the progress bar. - logtostderr: Log messages to stderr instead of stdout. - consoletitle: Display progress in console window's titlebar. nopart: Do not use temporary .part files. updatetime: Use the Last-modified header to set output file timestamps. test: Download only first bytes to test the downloader. min_filesize: Skip files smaller than this size max_filesize: Skip files larger than this size xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. - external_downloader_args: A list of additional command-line arguments for the - external downloader. + external_downloader_args: A dictionary of downloader keys (in lower case) + and a list of additional command-line arguments for the + executable. Use 'default' as the name for arguments to be + passed to all downloaders. For compatibility with youtube-dl, + a single list of args can also be used hls_use_mpegts: Use the mpegts container for HLS videos. http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be useful for bypassing bandwidth throttling imposed by a webserver (experimental) + progress_template: See YoutubeDL.py + retry_sleep_functions: See YoutubeDL.py Subclasses of this one must re-define the real_download method. """ @@ -60,21 +84,50 @@ class FileDownloader(object): def __init__(self, ydl, params): """Create a FileDownloader object with the given options.""" - self.ydl = ydl + self._set_ydl(ydl) self._progress_hooks = [] self.params = params + self._prepare_multiline_status() self.add_progress_hook(self.report_progress) + def _set_ydl(self, ydl): + self.ydl = ydl + + for func in ( + 'deprecation_warning', + 'deprecated_feature', + 'report_error', + 'report_file_already_downloaded', + 'report_warning', + 'to_console_title', + 'to_stderr', + 'trouble', + 'write_debug', + ): + if not hasattr(self, func): + setattr(self, func, getattr(ydl, func)) + + def to_screen(self, *args, **kargs): + self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) + + __to_screen = to_screen + + @classproperty + def FD_NAME(cls): + return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() + @staticmethod def format_seconds(seconds): - (mins, secs) = divmod(seconds, 60) - (hours, mins) = divmod(mins, 60) - if hours > 99: + if seconds is None: + return ' Unknown' + time = timetuple_from_msec(seconds * 1000) + if time.hours > 99: return '--:--:--' - if hours == 0: - return '%02d:%02d' % (mins, secs) - else: - return '%02d:%02d:%02d' % (hours, mins, secs) + return '%02d:%02d:%02d' % time[:-1] + + @classmethod + def format_eta(cls, seconds): + return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' @staticmethod def calc_percent(byte_counter, data_len): @@ -84,9 +137,7 @@ def calc_percent(byte_counter, data_len): @staticmethod def format_percent(percent): - if percent is None: - return '---.-%' - return '%6s' % ('%3.1f%%' % percent) + return ' N/A%' if percent is None else f'{percent:>5.1f}%' @staticmethod def calc_eta(start, now, total, current): @@ -100,12 +151,6 @@ def calc_eta(start, now, total, current): rate = float(current) / dif return int((float(total) - float(current)) / rate) - @staticmethod - def format_eta(eta): - if eta is None: - return '--:--' - return FileDownloader.format_seconds(eta) - @staticmethod def calc_speed(start, now, bytes): dif = now - start @@ -115,13 +160,11 @@ def calc_speed(start, now, bytes): @staticmethod def format_speed(speed): - if speed is None: - return '%10s' % '---b/s' - return '%10s' % ('%s/s' % format_bytes(speed)) + return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' @staticmethod def format_retries(retries): - return 'inf' if retries == float('inf') else '%.0f' % retries + return 'inf' if retries == float('inf') else int(retries) @staticmethod def best_block_size(elapsed_time, bytes): @@ -139,30 +182,9 @@ def best_block_size(elapsed_time, bytes): @staticmethod def parse_bytes(bytestr): """Parse a string indicating a byte quantity into an integer.""" - matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) - if matchobj is None: - return None - number = float(matchobj.group(1)) - multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) - return int(round(number * multiplier)) - - def to_screen(self, *args, **kargs): - self.ydl.to_screen(*args, **kargs) - - def to_stderr(self, message): - self.ydl.to_screen(message) - - def to_console_title(self, message): - self.ydl.to_console_title(message) - - def trouble(self, *args, **kargs): - self.ydl.trouble(*args, **kargs) - - def report_warning(self, *args, **kargs): - self.ydl.report_warning(*args, **kargs) - - def report_error(self, *args, **kargs): - self.ydl.report_error(*args, **kargs) + deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' + 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') + return parse_bytes(bytestr) def slow_down(self, start_time, now, byte_counter): """Sleep if the download speed is over the rate limit.""" @@ -195,13 +217,42 @@ def undo_temp_name(self, filename): def ytdl_filename(self, filename): return filename + '.ytdl' + def wrap_file_access(action, *, fatal=False): + def error_callback(err, count, retries, *, fd): + return RetryManager.report_retry( + err, count, retries, info=fd.__to_screen, + warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), + error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), + sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) + + def wrapper(self, func, *args, **kwargs): + for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self): + try: + return func(self, *args, **kwargs) + except OSError as err: + if err.errno in (errno.EACCES, errno.EINVAL): + retry.error = err + continue + retry.error_callback(err, 1, 0) + + return functools.partial(functools.partialmethod, wrapper) + + @wrap_file_access('open', fatal=True) + def sanitize_open(self, filename, open_mode): + f, filename = sanitize_open(filename, open_mode) + if not getattr(f, 'locked', None): + self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) + return f, filename + + @wrap_file_access('remove') + def try_remove(self, filename): + os.remove(filename) + + @wrap_file_access('rename') def try_rename(self, old_filename, new_filename): - try: - if old_filename == new_filename: - return - os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) - except (IOError, OSError) as err: - self.report_error('unable to rename file: %s' % error_to_compat_str(err)) + if old_filename == new_filename: + return + os.replace(old_filename, new_filename) def try_utime(self, filename, last_modified_hdr): """Try to set the last-modified time of the given file.""" @@ -218,121 +269,148 @@ def try_utime(self, filename, last_modified_hdr): # Ignore obviously invalid dates if filetime == 0: return - try: + with contextlib.suppress(Exception): os.utime(filename, (time.time(), filetime)) - except Exception: - pass return filetime def report_destination(self, filename): """Report destination filename.""" self.to_screen('[download] Destination: ' + filename) - def _report_progress_status(self, msg, is_last_line=False): - fullmsg = '[download] ' + msg - if self.params.get('progress_with_newline', False): - self.to_screen(fullmsg) + def _prepare_multiline_status(self, lines=1): + if self.params.get('noprogress'): + self._multiline = QuietMultilinePrinter() + elif self.ydl.params.get('logger'): + self._multiline = MultilineLogger(self.ydl.params['logger'], lines) + elif self.params.get('progress_with_newline'): + self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) else: - if compat_os_name == 'nt': - prev_len = getattr(self, '_report_progress_prev_line_length', - 0) - if prev_len > len(fullmsg): - fullmsg += ' ' * (prev_len - len(fullmsg)) - self._report_progress_prev_line_length = len(fullmsg) - clear_line = '\r' - else: - clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r') - self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) - self.to_console_title('yt-dlp ' + msg) + self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) + self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') + + def _finish_multiline_status(self): + self._multiline.end() + + ProgressStyles = Namespace( + downloaded_bytes='light blue', + percent='light blue', + eta='yellow', + speed='green', + elapsed='bold white', + total_bytes='', + total_bytes_estimate='', + ) + + def _report_progress_status(self, s, default_template): + for name, style in self.ProgressStyles.items_: + name = f'_{name}_str' + if name not in s: + continue + s[name] = self._format_progress(s[name], style) + s['_default_template'] = default_template % s + + progress_dict = s.copy() + progress_dict.pop('info_dict') + progress_dict = {'info': s['info_dict'], 'progress': progress_dict} + + progress_template = self.params.get('progress_template', {}) + self._multiline.print_at_line(self.ydl.evaluate_outtmpl( + progress_template.get('download') or '[download] %(progress._default_template)s', + progress_dict), s.get('progress_idx') or 0) + self.to_console_title(self.ydl.evaluate_outtmpl( + progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', + progress_dict)) + + def _format_progress(self, *args, **kwargs): + return self.ydl._format_text( + self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) def report_progress(self, s): + def with_fields(*tups, default=''): + for *fields, tmpl in tups: + if all(s.get(f) is not None for f in fields): + return tmpl + return default + + _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' + if s['status'] == 'finished': - if self.params.get('noprogress', False): + if self.params.get('noprogress'): self.to_screen('[download] Download completed') - else: - msg_template = '100%%' - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template += ' of %(_total_bytes_str)s' - if s.get('elapsed') is not None: - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template += ' in %(_elapsed_str)s' - self._report_progress_status( - msg_template % s, is_last_line=True) - - if self.params.get('noprogress'): - return + speed = try_call(lambda: s['total_bytes'] / s['elapsed']) + s.update({ + 'speed': speed, + '_speed_str': self.format_speed(speed).strip(), + '_total_bytes_str': _format_bytes('total_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + '_percent_str': self.format_percent(100), + }) + self._report_progress_status(s, join_nonempty( + '100%%', + with_fields(('total_bytes', 'of %(_total_bytes_str)s')), + with_fields(('elapsed', 'in %(_elapsed_str)s')), + with_fields(('speed', 'at %(_speed_str)s')), + delim=' ')) if s['status'] != 'downloading': return - if s.get('eta') is not None: - s['_eta_str'] = self.format_eta(s['eta']) - else: - s['_eta_str'] = 'Unknown ETA' - - if s.get('total_bytes') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) - elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: - s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) - else: - if s.get('downloaded_bytes') == 0: - s['_percent_str'] = self.format_percent(0) - else: - s['_percent_str'] = 'Unknown %' - - if s.get('speed') is not None: - s['_speed_str'] = self.format_speed(s['speed']) - else: - s['_speed_str'] = 'Unknown speed' - - if s.get('total_bytes') is not None: - s['_total_bytes_str'] = format_bytes(s['total_bytes']) - msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' - elif s.get('total_bytes_estimate') is not None: - s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) - msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' - else: - if s.get('downloaded_bytes') is not None: - s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) - if s.get('elapsed'): - s['_elapsed_str'] = self.format_seconds(s['elapsed']) - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' - else: - msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' - else: - msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s' - - self._report_progress_status(msg_template % s) + s.update({ + '_eta_str': self.format_eta(s.get('eta')).strip(), + '_speed_str': self.format_speed(s.get('speed')), + '_percent_str': self.format_percent(try_call( + lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], + lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], + lambda: s['downloaded_bytes'] == 0 and 0)), + '_total_bytes_str': _format_bytes('total_bytes'), + '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), + '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), + '_elapsed_str': self.format_seconds(s.get('elapsed')), + }) + + msg_template = with_fields( + ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), + ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), + ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), + default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') + + msg_template += with_fields( + ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), + ('fragment_index', ' (frag %(fragment_index)s)')) + self._report_progress_status(s, msg_template) def report_resuming_byte(self, resume_len): """Report attempt to resume at given byte.""" self.to_screen('[download] Resuming download at byte %s' % resume_len) - def report_retry(self, err, count, retries): - """Report retry in case of HTTP error 5xx""" - self.to_screen( - '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' - % (error_to_compat_str(err), count, self.format_retries(retries))) - - def report_file_already_downloaded(self, file_name): - """Report file has already been fully downloaded.""" - try: - self.to_screen('[download] %s has already been downloaded' % file_name) - except UnicodeEncodeError: - self.to_screen('[download] The file has already been downloaded') + def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): + """Report retry""" + is_frag = False if frag_index is NO_DEFAULT else 'fragment' + RetryManager.report_retry( + err, count, retries, info=self.__to_screen, + warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), + error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), + sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), + suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) def report_unable_to_resume(self): """Report it was impossible to resume download.""" self.to_screen('[download] Unable to resume') + @staticmethod + def supports_manifest(manifest): + """ Whether the downloader can download the fragments from the manifest. + Redefine in subclasses if needed. """ + pass + def download(self, filename, info_dict, subtitle=False): """Download to a filename using the info from info_dict Return True on success and False otherwise """ nooverwrites_and_exists = ( - not self.params.get('overwrites', subtitle) + not self.params.get('overwrites', True) and os.path.exists(encodeFilename(filename)) ) @@ -350,35 +428,34 @@ def download(self, filename, info_dict, subtitle=False): 'filename': filename, 'status': 'finished', 'total_bytes': os.path.getsize(encodeFilename(filename)), - }) + }, info_dict) + self._finish_multiline_status() return True, False - if subtitle is False: - min_sleep_interval = self.params.get('sleep_interval') - if min_sleep_interval: - max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) - sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - int(sleep_interval) if sleep_interval.is_integer() - else '%.2f' % sleep_interval)) - time.sleep(sleep_interval) + if subtitle: + sleep_interval = self.params.get('sleep_interval_subtitles') or 0 else: - sleep_interval_sub = 0 - if type(self.params.get('sleep_interval_subtitles')) is int: - sleep_interval_sub = self.params.get('sleep_interval_subtitles') - if sleep_interval_sub > 0: - self.to_screen( - '[download] Sleeping %s seconds ...' % ( - sleep_interval_sub)) - time.sleep(sleep_interval_sub) - return self.real_download(filename, info_dict), True + min_sleep_interval = self.params.get('sleep_interval') or 0 + sleep_interval = random.uniform( + min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) + if sleep_interval > 0: + self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') + time.sleep(sleep_interval) + + ret = self.real_download(filename, info_dict) + self._finish_multiline_status() + return ret, True def real_download(self, filename, info_dict): """Real download process. Redefine in subclasses.""" raise NotImplementedError('This method must be implemented by subclasses') - def _hook_progress(self, status): + def _hook_progress(self, status, info_dict): + # Ideally we want to make a copy of the dict, but that is too slow + status['info_dict'] = info_dict + # youtube-dl passes the same status object to all the hooks. + # Some third party scripts seems to be relying on this. + # So keep this behavior if possible for ph in self._progress_hooks: ph(status) @@ -396,5 +473,4 @@ def _debug_cmd(self, args, exe=None): if exe is None: exe = os.path.basename(str_args[0]) - self.to_screen('[debug] %s command line: %s' % ( - exe, shell_quote(str_args))) + self.write_debug(f'{exe} command line: {shell_quote(str_args)}')