]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[downloader/aria2c] Native progress for aria2c via RPC (#3724)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
f8271158 18 LockingUnsupportedError,
19a03940 19 Namespace,
be5c1ae8 20 RetryManager,
1a8cc837 21 classproperty,
1433734c 22 decodeArgument,
71df9b7f 23 deprecation_warning,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
64c464a1 27 parse_bytes,
a057779d 28 remove_start,
205a0654 29 sanitize_open,
1433734c 30 shell_quote,
e3ced9ed 31 timeconvert,
aa7785f8 32 timetuple_from_msec,
11233f2a 33 try_call,
3bc2ddcc
JMF
34)
35
36
86e5f3ed 37class FileDownloader:
3bc2ddcc
JMF
38 """File Downloader class.
39
40 File downloader objects are the ones responsible of downloading the
41 actual video file and writing it to disk.
42
43 File downloaders accept a lot of parameters. In order not to saturate
44 the object constructor with arguments, it receives a dictionary of
45 options instead.
46
47 Available options:
48
881e6a1f
PH
49 verbose: Print additional info to stdout.
50 quiet: Do not print messages to stdout.
51 ratelimit: Download speed limit, in bytes/sec.
c487cf00 52 continuedl: Attempt to continue downloads if possible
51d9739f 53 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 54 retries: Number of times to retry for HTTP error 5xx
205a0654 55 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
56 buffersize: Size of download buffer in bytes.
57 noresizebuffer: Do not automatically resize the download buffer.
58 continuedl: Try to continue downloads if possible.
59 noprogress: Do not print the progress bar.
881e6a1f
PH
60 nopart: Do not use temporary .part files.
61 updatetime: Use the Last-modified header to set output file timestamps.
62 test: Download only first bytes to test the downloader.
63 min_filesize: Skip files smaller than this size
64 max_filesize: Skip files larger than this size
65 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 66 external_downloader_args: A dictionary of downloader keys (in lower case)
67 and a list of additional command-line arguments for the
68 executable. Use 'default' as the name for arguments to be
69 passed to all downloaders. For compatibility with youtube-dl,
70 a single list of args can also be used
7d106a65 71 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 72 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
73 useful for bypassing bandwidth throttling imposed by
74 a webserver (experimental)
819e0531 75 progress_template: See YoutubeDL.py
23326151 76 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
77
78 Subclasses of this one must re-define the real_download method.
79 """
80
b686fc18 81 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
82 params = None
83
84 def __init__(self, ydl, params):
85 """Create a FileDownloader object with the given options."""
19a03940 86 self._set_ydl(ydl)
3bc2ddcc
JMF
87 self._progress_hooks = []
88 self.params = params
819e0531 89 self._prepare_multiline_status()
5cda4eda 90 self.add_progress_hook(self.report_progress)
3bc2ddcc 91
19a03940 92 def _set_ydl(self, ydl):
93 self.ydl = ydl
94
95 for func in (
96 'deprecation_warning',
da4db748 97 'deprecated_feature',
19a03940 98 'report_error',
99 'report_file_already_downloaded',
100 'report_warning',
101 'to_console_title',
102 'to_stderr',
103 'trouble',
104 'write_debug',
105 ):
1d485a1a 106 if not hasattr(self, func):
107 setattr(self, func, getattr(ydl, func))
19a03940 108
109 def to_screen(self, *args, **kargs):
110 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
111
23326151 112 __to_screen = to_screen
113
1a8cc837 114 @classproperty
115 def FD_NAME(cls):
998a3cae 116 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 117
3bc2ddcc
JMF
118 @staticmethod
119 def format_seconds(seconds):
11233f2a 120 if seconds is None:
121 return ' Unknown'
aa7785f8 122 time = timetuple_from_msec(seconds * 1000)
123 if time.hours > 99:
3bc2ddcc 124 return '--:--:--'
aa7785f8 125 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 126
a057779d 127 @classmethod
128 def format_eta(cls, seconds):
129 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 130
3bc2ddcc
JMF
131 @staticmethod
132 def calc_percent(byte_counter, data_len):
133 if data_len is None:
134 return None
135 return float(byte_counter) / float(data_len) * 100.0
136
137 @staticmethod
138 def format_percent(percent):
11233f2a 139 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
140
141 @staticmethod
142 def calc_eta(start, now, total, current):
143 if total is None:
144 return None
c7667c2d
S
145 if now is None:
146 now = time.time()
3bc2ddcc 147 dif = now - start
5f6a1245 148 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
149 return None
150 rate = float(current) / dif
151 return int((float(total) - float(current)) / rate)
152
3bc2ddcc
JMF
153 @staticmethod
154 def calc_speed(start, now, bytes):
155 dif = now - start
5f6a1245 156 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
157 return None
158 return float(bytes) / dif
159
160 @staticmethod
161 def format_speed(speed):
11233f2a 162 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 163
617e58d8
S
164 @staticmethod
165 def format_retries(retries):
11233f2a 166 return 'inf' if retries == float('inf') else int(retries)
617e58d8 167
3bc2ddcc
JMF
168 @staticmethod
169 def best_block_size(elapsed_time, bytes):
170 new_min = max(bytes / 2.0, 1.0)
5f6a1245 171 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
172 if elapsed_time < 0.001:
173 return int(new_max)
174 rate = bytes / elapsed_time
175 if rate > new_max:
176 return int(new_max)
177 if rate < new_min:
178 return int(new_min)
179 return int(rate)
180
181 @staticmethod
182 def parse_bytes(bytestr):
183 """Parse a string indicating a byte quantity into an integer."""
71df9b7f 184 deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
185 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
186 return parse_bytes(bytestr)
3bc2ddcc 187
c7667c2d 188 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 189 """Sleep if the download speed is over the rate limit."""
d800609c 190 rate_limit = self.params.get('ratelimit')
8a77e5e6 191 if rate_limit is None or byte_counter == 0:
3bc2ddcc 192 return
c7667c2d
S
193 if now is None:
194 now = time.time()
3bc2ddcc
JMF
195 elapsed = now - start_time
196 if elapsed <= 0.0:
197 return
198 speed = float(byte_counter) / elapsed
8a77e5e6 199 if speed > rate_limit:
1a01639b
S
200 sleep_time = float(byte_counter) / rate_limit - elapsed
201 if sleep_time > 0:
202 time.sleep(sleep_time)
3bc2ddcc
JMF
203
204 def temp_name(self, filename):
205 """Returns a temporary filename for the given filename."""
b6b70730 206 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
207 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
208 return filename
b6b70730 209 return filename + '.part'
3bc2ddcc
JMF
210
211 def undo_temp_name(self, filename):
b6b70730
PH
212 if filename.endswith('.part'):
213 return filename[:-len('.part')]
3bc2ddcc
JMF
214 return filename
215
ea0c2f21
RA
216 def ytdl_filename(self, filename):
217 return filename + '.ytdl'
218
45806d44 219 def wrap_file_access(action, *, fatal=False):
be5c1ae8 220 def error_callback(err, count, retries, *, fd):
221 return RetryManager.report_retry(
222 err, count, retries, info=fd.__to_screen,
223 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
224 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
225 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
226
227 def wrapper(self, func, *args, **kwargs):
228 for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
229 try:
230 return func(self, *args, **kwargs)
231 except OSError as err:
232 if err.errno in (errno.EACCES, errno.EINVAL):
233 retry.error = err
234 continue
235 retry.error_callback(err, 1, 0)
236
237 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
238
239 @wrap_file_access('open', fatal=True)
205a0654 240 def sanitize_open(self, filename, open_mode):
0edb3e33 241 f, filename = sanitize_open(filename, open_mode)
242 if not getattr(f, 'locked', None):
243 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
244 return f, filename
205a0654 245
45806d44
EH
246 @wrap_file_access('remove')
247 def try_remove(self, filename):
248 os.remove(filename)
249
250 @wrap_file_access('rename')
3bc2ddcc 251 def try_rename(self, old_filename, new_filename):
f775c831 252 if old_filename == new_filename:
253 return
45806d44 254 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
255
256 def try_utime(self, filename, last_modified_hdr):
257 """Try to set the last-modified time of the given file."""
258 if last_modified_hdr is None:
259 return
260 if not os.path.isfile(encodeFilename(filename)):
261 return
262 timestr = last_modified_hdr
263 if timestr is None:
264 return
265 filetime = timeconvert(timestr)
266 if filetime is None:
267 return filetime
268 # Ignore obviously invalid dates
269 if filetime == 0:
270 return
19a03940 271 with contextlib.suppress(Exception):
3bc2ddcc 272 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
273 return filetime
274
275 def report_destination(self, filename):
276 """Report destination filename."""
b6b70730 277 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 278
819e0531 279 def _prepare_multiline_status(self, lines=1):
280 if self.params.get('noprogress'):
bd50a52b 281 self._multiline = QuietMultilinePrinter()
819e0531 282 elif self.ydl.params.get('logger'):
283 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
284 elif self.params.get('progress_with_newline'):
8a7f6d7a 285 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 286 else:
8a7f6d7a 287 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 288 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
289
290 def _finish_multiline_status(self):
819e0531 291 self._multiline.end()
292
19a03940 293 ProgressStyles = Namespace(
294 downloaded_bytes='light blue',
295 percent='light blue',
296 eta='yellow',
297 speed='green',
298 elapsed='bold white',
299 total_bytes='',
300 total_bytes_estimate='',
301 )
7578d77d 302
303 def _report_progress_status(self, s, default_template):
64fa820c 304 for name, style in self.ProgressStyles.items_:
7578d77d 305 name = f'_{name}_str'
306 if name not in s:
307 continue
308 s[name] = self._format_progress(s[name], style)
309 s['_default_template'] = default_template % s
310
819e0531 311 progress_dict = s.copy()
312 progress_dict.pop('info_dict')
313 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
314
315 progress_template = self.params.get('progress_template', {})
316 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
317 progress_template.get('download') or '[download] %(progress._default_template)s',
318 progress_dict), s.get('progress_idx') or 0)
319 self.to_console_title(self.ydl.evaluate_outtmpl(
320 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
321 progress_dict))
3bc2ddcc 322
7578d77d 323 def _format_progress(self, *args, **kwargs):
324 return self.ydl._format_text(
325 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
326
5cda4eda 327 def report_progress(self, s):
11233f2a 328 def with_fields(*tups, default=''):
329 for *fields, tmpl in tups:
330 if all(s.get(f) is not None for f in fields):
331 return tmpl
332 return default
333
d5d1df8a 334 _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
a057779d 335
5cda4eda 336 if s['status'] == 'finished':
819e0531 337 if self.params.get('noprogress'):
5cda4eda 338 self.to_screen('[download] Download completed')
3df4f81d 339 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 340 s.update({
3df4f81d 341 'speed': speed,
342 '_speed_str': self.format_speed(speed).strip(),
d5d1df8a 343 '_total_bytes_str': _format_bytes('total_bytes'),
11233f2a 344 '_elapsed_str': self.format_seconds(s.get('elapsed')),
345 '_percent_str': self.format_percent(100),
346 })
347 self._report_progress_status(s, join_nonempty(
348 '100%%',
349 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
350 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 351 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 352 delim=' '))
5cda4eda
PH
353
354 if s['status'] != 'downloading':
355 return
356
11233f2a 357 s.update({
a057779d 358 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 359 '_speed_str': self.format_speed(s.get('speed')),
360 '_percent_str': self.format_percent(try_call(
361 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
362 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
363 lambda: s['downloaded_bytes'] == 0 and 0)),
d5d1df8a 364 '_total_bytes_str': _format_bytes('total_bytes'),
365 '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
366 '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
11233f2a 367 '_elapsed_str': self.format_seconds(s.get('elapsed')),
368 })
369
370 msg_template = with_fields(
371 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
372 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
373 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
374 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
375 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
376
377 msg_template += with_fields(
378 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
379 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 380 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
381
382 def report_resuming_byte(self, resume_len):
383 """Report attempt to resume at given byte."""
b6b70730 384 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 385
be5c1ae8 386 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
387 """Report retry"""
388 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
389 RetryManager.report_retry(
390 err, count, retries, info=self.__to_screen,
391 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
392 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
393 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
394 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 395
3bc2ddcc
JMF
396 def report_unable_to_resume(self):
397 """Report it was impossible to resume download."""
b6b70730 398 self.to_screen('[download] Unable to resume')
3bc2ddcc 399
0a473f2f 400 @staticmethod
401 def supports_manifest(manifest):
402 """ Whether the downloader can download the fragments from the manifest.
403 Redefine in subclasses if needed. """
404 pass
405
9f448fcb 406 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
407 """Download to a filename using the info from info_dict
408 Return True on success and False otherwise
409 """
5f0d813d 410
4340deca 411 nooverwrites_and_exists = (
9cc1a313 412 not self.params.get('overwrites', True)
3089bc74 413 and os.path.exists(encodeFilename(filename))
4340deca
P
414 )
415
75a24854
RA
416 if not hasattr(filename, 'write'):
417 continuedl_and_exists = (
3089bc74
S
418 self.params.get('continuedl', True)
419 and os.path.isfile(encodeFilename(filename))
420 and not self.params.get('nopart', False)
75a24854
RA
421 )
422
423 # Check file already present
424 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
425 self.report_file_already_downloaded(filename)
426 self._hook_progress({
427 'filename': filename,
428 'status': 'finished',
429 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 430 }, info_dict)
b69fd25c 431 self._finish_multiline_status()
a9e7f546 432 return True, False
dabc1273 433
19a03940 434 if subtitle:
435 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 436 else:
19a03940 437 min_sleep_interval = self.params.get('sleep_interval') or 0
438 sleep_interval = random.uniform(
43cc91ad 439 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 440 if sleep_interval > 0:
441 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
442 time.sleep(sleep_interval)
443
819e0531 444 ret = self.real_download(filename, info_dict)
445 self._finish_multiline_status()
446 return ret, True
3bc2ddcc
JMF
447
448 def real_download(self, filename, info_dict):
449 """Real download process. Redefine in subclasses."""
b6b70730 450 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 451
3ba7740d 452 def _hook_progress(self, status, info_dict):
f5ea4748 453 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 454 status['info_dict'] = info_dict
f45e6c11 455 # youtube-dl passes the same status object to all the hooks.
456 # Some third party scripts seems to be relying on this.
457 # So keep this behavior if possible
3bc2ddcc 458 for ph in self._progress_hooks:
f45e6c11 459 ph(status)
3bc2ddcc
JMF
460
461 def add_progress_hook(self, ph):
71b640cc
PH
462 # See YoutubeDl.py (search for progress_hooks) for a description of
463 # this interface
3bc2ddcc 464 self._progress_hooks.append(ph)
222516d9 465
cd8a07a7 466 def _debug_cmd(self, args, exe=None):
222516d9
PH
467 if not self.params.get('verbose', False):
468 return
469
cd8a07a7
S
470 str_args = [decodeArgument(a) for a in args]
471
222516d9 472 if exe is None:
cd8a07a7 473 exe = os.path.basename(str_args[0])
222516d9 474
86e5f3ed 475 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')