]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
Standardize retry mechanism (#1649)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
1d485a1a 18 NUMBER_RE,
f8271158 19 LockingUnsupportedError,
19a03940 20 Namespace,
be5c1ae8 21 RetryManager,
1a8cc837 22 classproperty,
1433734c 23 decodeArgument,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
205a0654 27 sanitize_open,
1433734c 28 shell_quote,
e3ced9ed 29 timeconvert,
aa7785f8 30 timetuple_from_msec,
11233f2a 31 try_call,
3bc2ddcc
JMF
32)
33
34
86e5f3ed 35class FileDownloader:
3bc2ddcc
JMF
36 """File Downloader class.
37
38 File downloader objects are the ones responsible of downloading the
39 actual video file and writing it to disk.
40
41 File downloaders accept a lot of parameters. In order not to saturate
42 the object constructor with arguments, it receives a dictionary of
43 options instead.
44
45 Available options:
46
881e6a1f
PH
47 verbose: Print additional info to stdout.
48 quiet: Do not print messages to stdout.
49 ratelimit: Download speed limit, in bytes/sec.
c487cf00 50 continuedl: Attempt to continue downloads if possible
51d9739f 51 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 52 retries: Number of times to retry for HTTP error 5xx
205a0654 53 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
54 buffersize: Size of download buffer in bytes.
55 noresizebuffer: Do not automatically resize the download buffer.
56 continuedl: Try to continue downloads if possible.
57 noprogress: Do not print the progress bar.
881e6a1f
PH
58 nopart: Do not use temporary .part files.
59 updatetime: Use the Last-modified header to set output file timestamps.
60 test: Download only first bytes to test the downloader.
61 min_filesize: Skip files smaller than this size
62 max_filesize: Skip files larger than this size
63 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 64 external_downloader_args: A dictionary of downloader keys (in lower case)
65 and a list of additional command-line arguments for the
66 executable. Use 'default' as the name for arguments to be
67 passed to all downloaders. For compatibility with youtube-dl,
68 a single list of args can also be used
7d106a65 69 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 70 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
71 useful for bypassing bandwidth throttling imposed by
72 a webserver (experimental)
819e0531 73 progress_template: See YoutubeDL.py
23326151 74 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
75
76 Subclasses of this one must re-define the real_download method.
77 """
78
b686fc18 79 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
80 params = None
81
82 def __init__(self, ydl, params):
83 """Create a FileDownloader object with the given options."""
19a03940 84 self._set_ydl(ydl)
3bc2ddcc
JMF
85 self._progress_hooks = []
86 self.params = params
819e0531 87 self._prepare_multiline_status()
5cda4eda 88 self.add_progress_hook(self.report_progress)
3bc2ddcc 89
19a03940 90 def _set_ydl(self, ydl):
91 self.ydl = ydl
92
93 for func in (
94 'deprecation_warning',
95 'report_error',
96 'report_file_already_downloaded',
97 'report_warning',
98 'to_console_title',
99 'to_stderr',
100 'trouble',
101 'write_debug',
102 ):
1d485a1a 103 if not hasattr(self, func):
104 setattr(self, func, getattr(ydl, func))
19a03940 105
106 def to_screen(self, *args, **kargs):
107 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
108
23326151 109 __to_screen = to_screen
110
1a8cc837 111 @classproperty
112 def FD_NAME(cls):
998a3cae 113 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 114
3bc2ddcc
JMF
115 @staticmethod
116 def format_seconds(seconds):
11233f2a 117 if seconds is None:
118 return ' Unknown'
aa7785f8 119 time = timetuple_from_msec(seconds * 1000)
120 if time.hours > 99:
3bc2ddcc 121 return '--:--:--'
aa7785f8 122 if not time.hours:
123 return '%02d:%02d' % time[1:-1]
124 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 125
11233f2a 126 format_eta = format_seconds
127
3bc2ddcc
JMF
128 @staticmethod
129 def calc_percent(byte_counter, data_len):
130 if data_len is None:
131 return None
132 return float(byte_counter) / float(data_len) * 100.0
133
134 @staticmethod
135 def format_percent(percent):
11233f2a 136 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
137
138 @staticmethod
139 def calc_eta(start, now, total, current):
140 if total is None:
141 return None
c7667c2d
S
142 if now is None:
143 now = time.time()
3bc2ddcc 144 dif = now - start
5f6a1245 145 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
146 return None
147 rate = float(current) / dif
148 return int((float(total) - float(current)) / rate)
149
3bc2ddcc
JMF
150 @staticmethod
151 def calc_speed(start, now, bytes):
152 dif = now - start
5f6a1245 153 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
154 return None
155 return float(bytes) / dif
156
157 @staticmethod
158 def format_speed(speed):
11233f2a 159 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 160
617e58d8
S
161 @staticmethod
162 def format_retries(retries):
11233f2a 163 return 'inf' if retries == float('inf') else int(retries)
617e58d8 164
3bc2ddcc
JMF
165 @staticmethod
166 def best_block_size(elapsed_time, bytes):
167 new_min = max(bytes / 2.0, 1.0)
5f6a1245 168 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
169 if elapsed_time < 0.001:
170 return int(new_max)
171 rate = bytes / elapsed_time
172 if rate > new_max:
173 return int(new_max)
174 if rate < new_min:
175 return int(new_min)
176 return int(rate)
177
178 @staticmethod
179 def parse_bytes(bytestr):
180 """Parse a string indicating a byte quantity into an integer."""
1d485a1a 181 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
3bc2ddcc
JMF
182 if matchobj is None:
183 return None
184 number = float(matchobj.group(1))
185 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
186 return int(round(number * multiplier))
187
c7667c2d 188 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 189 """Sleep if the download speed is over the rate limit."""
d800609c 190 rate_limit = self.params.get('ratelimit')
8a77e5e6 191 if rate_limit is None or byte_counter == 0:
3bc2ddcc 192 return
c7667c2d
S
193 if now is None:
194 now = time.time()
3bc2ddcc
JMF
195 elapsed = now - start_time
196 if elapsed <= 0.0:
197 return
198 speed = float(byte_counter) / elapsed
8a77e5e6 199 if speed > rate_limit:
1a01639b
S
200 sleep_time = float(byte_counter) / rate_limit - elapsed
201 if sleep_time > 0:
202 time.sleep(sleep_time)
3bc2ddcc
JMF
203
204 def temp_name(self, filename):
205 """Returns a temporary filename for the given filename."""
b6b70730 206 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
207 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
208 return filename
b6b70730 209 return filename + '.part'
3bc2ddcc
JMF
210
211 def undo_temp_name(self, filename):
b6b70730
PH
212 if filename.endswith('.part'):
213 return filename[:-len('.part')]
3bc2ddcc
JMF
214 return filename
215
ea0c2f21
RA
216 def ytdl_filename(self, filename):
217 return filename + '.ytdl'
218
45806d44 219 def wrap_file_access(action, *, fatal=False):
be5c1ae8 220 def error_callback(err, count, retries, *, fd):
221 return RetryManager.report_retry(
222 err, count, retries, info=fd.__to_screen,
223 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
224 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
225 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
226
227 def wrapper(self, func, *args, **kwargs):
228 for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
229 try:
230 return func(self, *args, **kwargs)
231 except OSError as err:
232 if err.errno in (errno.EACCES, errno.EINVAL):
233 retry.error = err
234 continue
235 retry.error_callback(err, 1, 0)
236
237 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
238
239 @wrap_file_access('open', fatal=True)
205a0654 240 def sanitize_open(self, filename, open_mode):
0edb3e33 241 f, filename = sanitize_open(filename, open_mode)
242 if not getattr(f, 'locked', None):
243 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
244 return f, filename
205a0654 245
45806d44
EH
246 @wrap_file_access('remove')
247 def try_remove(self, filename):
248 os.remove(filename)
249
250 @wrap_file_access('rename')
3bc2ddcc 251 def try_rename(self, old_filename, new_filename):
f775c831 252 if old_filename == new_filename:
253 return
45806d44 254 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
255
256 def try_utime(self, filename, last_modified_hdr):
257 """Try to set the last-modified time of the given file."""
258 if last_modified_hdr is None:
259 return
260 if not os.path.isfile(encodeFilename(filename)):
261 return
262 timestr = last_modified_hdr
263 if timestr is None:
264 return
265 filetime = timeconvert(timestr)
266 if filetime is None:
267 return filetime
268 # Ignore obviously invalid dates
269 if filetime == 0:
270 return
19a03940 271 with contextlib.suppress(Exception):
3bc2ddcc 272 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
273 return filetime
274
275 def report_destination(self, filename):
276 """Report destination filename."""
b6b70730 277 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 278
819e0531 279 def _prepare_multiline_status(self, lines=1):
280 if self.params.get('noprogress'):
bd50a52b 281 self._multiline = QuietMultilinePrinter()
819e0531 282 elif self.ydl.params.get('logger'):
283 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
284 elif self.params.get('progress_with_newline'):
8a7f6d7a 285 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 286 else:
8a7f6d7a 287 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 288 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
289
290 def _finish_multiline_status(self):
819e0531 291 self._multiline.end()
292
19a03940 293 ProgressStyles = Namespace(
294 downloaded_bytes='light blue',
295 percent='light blue',
296 eta='yellow',
297 speed='green',
298 elapsed='bold white',
299 total_bytes='',
300 total_bytes_estimate='',
301 )
7578d77d 302
303 def _report_progress_status(self, s, default_template):
64fa820c 304 for name, style in self.ProgressStyles.items_:
7578d77d 305 name = f'_{name}_str'
306 if name not in s:
307 continue
308 s[name] = self._format_progress(s[name], style)
309 s['_default_template'] = default_template % s
310
819e0531 311 progress_dict = s.copy()
312 progress_dict.pop('info_dict')
313 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
314
315 progress_template = self.params.get('progress_template', {})
316 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
317 progress_template.get('download') or '[download] %(progress._default_template)s',
318 progress_dict), s.get('progress_idx') or 0)
319 self.to_console_title(self.ydl.evaluate_outtmpl(
320 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
321 progress_dict))
3bc2ddcc 322
7578d77d 323 def _format_progress(self, *args, **kwargs):
324 return self.ydl._format_text(
325 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
326
5cda4eda 327 def report_progress(self, s):
11233f2a 328 def with_fields(*tups, default=''):
329 for *fields, tmpl in tups:
330 if all(s.get(f) is not None for f in fields):
331 return tmpl
332 return default
333
5cda4eda 334 if s['status'] == 'finished':
819e0531 335 if self.params.get('noprogress'):
5cda4eda 336 self.to_screen('[download] Download completed')
3df4f81d 337 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 338 s.update({
3df4f81d 339 'speed': speed,
340 '_speed_str': self.format_speed(speed).strip(),
11233f2a 341 '_total_bytes_str': format_bytes(s.get('total_bytes')),
342 '_elapsed_str': self.format_seconds(s.get('elapsed')),
343 '_percent_str': self.format_percent(100),
344 })
345 self._report_progress_status(s, join_nonempty(
346 '100%%',
347 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
348 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 349 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 350 delim=' '))
5cda4eda
PH
351
352 if s['status'] != 'downloading':
353 return
354
11233f2a 355 s.update({
356 '_eta_str': self.format_eta(s.get('eta')),
357 '_speed_str': self.format_speed(s.get('speed')),
358 '_percent_str': self.format_percent(try_call(
359 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
360 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
361 lambda: s['downloaded_bytes'] == 0 and 0)),
362 '_total_bytes_str': format_bytes(s.get('total_bytes')),
363 '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')),
364 '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')),
365 '_elapsed_str': self.format_seconds(s.get('elapsed')),
366 })
367
368 msg_template = with_fields(
369 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
370 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
371 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
372 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
373 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
374
375 msg_template += with_fields(
376 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
377 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 378 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
379
380 def report_resuming_byte(self, resume_len):
381 """Report attempt to resume at given byte."""
b6b70730 382 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 383
be5c1ae8 384 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
385 """Report retry"""
386 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
387 RetryManager.report_retry(
388 err, count, retries, info=self.__to_screen,
389 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
390 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
391 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
392 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 393
3bc2ddcc
JMF
394 def report_unable_to_resume(self):
395 """Report it was impossible to resume download."""
b6b70730 396 self.to_screen('[download] Unable to resume')
3bc2ddcc 397
0a473f2f 398 @staticmethod
399 def supports_manifest(manifest):
400 """ Whether the downloader can download the fragments from the manifest.
401 Redefine in subclasses if needed. """
402 pass
403
9f448fcb 404 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
405 """Download to a filename using the info from info_dict
406 Return True on success and False otherwise
407 """
5f0d813d 408
4340deca 409 nooverwrites_and_exists = (
9cc1a313 410 not self.params.get('overwrites', True)
3089bc74 411 and os.path.exists(encodeFilename(filename))
4340deca
P
412 )
413
75a24854
RA
414 if not hasattr(filename, 'write'):
415 continuedl_and_exists = (
3089bc74
S
416 self.params.get('continuedl', True)
417 and os.path.isfile(encodeFilename(filename))
418 and not self.params.get('nopart', False)
75a24854
RA
419 )
420
421 # Check file already present
422 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
423 self.report_file_already_downloaded(filename)
424 self._hook_progress({
425 'filename': filename,
426 'status': 'finished',
427 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 428 }, info_dict)
b69fd25c 429 self._finish_multiline_status()
a9e7f546 430 return True, False
dabc1273 431
19a03940 432 if subtitle:
433 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 434 else:
19a03940 435 min_sleep_interval = self.params.get('sleep_interval') or 0
436 sleep_interval = random.uniform(
43cc91ad 437 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 438 if sleep_interval > 0:
439 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
440 time.sleep(sleep_interval)
441
819e0531 442 ret = self.real_download(filename, info_dict)
443 self._finish_multiline_status()
444 return ret, True
3bc2ddcc
JMF
445
446 def real_download(self, filename, info_dict):
447 """Real download process. Redefine in subclasses."""
b6b70730 448 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 449
3ba7740d 450 def _hook_progress(self, status, info_dict):
f5ea4748 451 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 452 status['info_dict'] = info_dict
f45e6c11 453 # youtube-dl passes the same status object to all the hooks.
454 # Some third party scripts seems to be relying on this.
455 # So keep this behavior if possible
3bc2ddcc 456 for ph in self._progress_hooks:
f45e6c11 457 ph(status)
3bc2ddcc
JMF
458
459 def add_progress_hook(self, ph):
71b640cc
PH
460 # See YoutubeDl.py (search for progress_hooks) for a description of
461 # this interface
3bc2ddcc 462 self._progress_hooks.append(ph)
222516d9 463
cd8a07a7 464 def _debug_cmd(self, args, exe=None):
222516d9
PH
465 if not self.params.get('verbose', False):
466 return
467
cd8a07a7
S
468 str_args = [decodeArgument(a) for a in args]
469
222516d9 470 if exe is None:
cd8a07a7 471 exe = os.path.basename(str_args[0])
222516d9 472
86e5f3ed 473 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')