]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[cleanup] Add more ruff rules (#10149)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
9590cc6b 7import threading
3bc2ddcc
JMF
8import time
9
f8271158 10from ..minicurses import (
11 BreaklineStatusPrinter,
12 MultilineLogger,
13 MultilinePrinter,
14 QuietMultilinePrinter,
15)
3bc2ddcc 16from ..utils import (
be5c1ae8 17 IDENTITY,
18 NO_DEFAULT,
f8271158 19 LockingUnsupportedError,
19a03940 20 Namespace,
be5c1ae8 21 RetryManager,
1a8cc837 22 classproperty,
1433734c 23 decodeArgument,
71df9b7f 24 deprecation_warning,
3bc2ddcc 25 encodeFilename,
3bc2ddcc 26 format_bytes,
11233f2a 27 join_nonempty,
64c464a1 28 parse_bytes,
a057779d 29 remove_start,
205a0654 30 sanitize_open,
1433734c 31 shell_quote,
e3ced9ed 32 timeconvert,
aa7785f8 33 timetuple_from_msec,
11233f2a 34 try_call,
3bc2ddcc
JMF
35)
36
37
86e5f3ed 38class FileDownloader:
3bc2ddcc
JMF
39 """File Downloader class.
40
41 File downloader objects are the ones responsible of downloading the
42 actual video file and writing it to disk.
43
44 File downloaders accept a lot of parameters. In order not to saturate
45 the object constructor with arguments, it receives a dictionary of
46 options instead.
47
48 Available options:
49
881e6a1f
PH
50 verbose: Print additional info to stdout.
51 quiet: Do not print messages to stdout.
52 ratelimit: Download speed limit, in bytes/sec.
51d9739f 53 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
46f1370e 54 retries: Number of times to retry for expected network errors.
55 Default is 0 for API, but 10 for CLI
56 file_access_retries: Number of times to retry on file access error (default: 3)
881e6a1f
PH
57 buffersize: Size of download buffer in bytes.
58 noresizebuffer: Do not automatically resize the download buffer.
59 continuedl: Try to continue downloads if possible.
60 noprogress: Do not print the progress bar.
881e6a1f
PH
61 nopart: Do not use temporary .part files.
62 updatetime: Use the Last-modified header to set output file timestamps.
63 test: Download only first bytes to test the downloader.
64 min_filesize: Skip files smaller than this size
65 max_filesize: Skip files larger than this size
66 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
9590cc6b 67 progress_delta: The minimum time between progress output, in seconds
34488702 68 external_downloader_args: A dictionary of downloader keys (in lower case)
69 and a list of additional command-line arguments for the
70 executable. Use 'default' as the name for arguments to be
71 passed to all downloaders. For compatibility with youtube-dl,
72 a single list of args can also be used
7d106a65 73 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 74 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
75 useful for bypassing bandwidth throttling imposed by
76 a webserver (experimental)
819e0531 77 progress_template: See YoutubeDL.py
23326151 78 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
79
80 Subclasses of this one must re-define the real_download method.
81 """
82
b686fc18 83 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
84 params = None
85
86 def __init__(self, ydl, params):
87 """Create a FileDownloader object with the given options."""
19a03940 88 self._set_ydl(ydl)
3bc2ddcc
JMF
89 self._progress_hooks = []
90 self.params = params
819e0531 91 self._prepare_multiline_status()
5cda4eda 92 self.add_progress_hook(self.report_progress)
9590cc6b
SS
93 if self.params.get('progress_delta'):
94 self._progress_delta_lock = threading.Lock()
95 self._progress_delta_time = time.monotonic()
3bc2ddcc 96
19a03940 97 def _set_ydl(self, ydl):
98 self.ydl = ydl
99
100 for func in (
101 'deprecation_warning',
da4db748 102 'deprecated_feature',
19a03940 103 'report_error',
104 'report_file_already_downloaded',
105 'report_warning',
106 'to_console_title',
107 'to_stderr',
108 'trouble',
109 'write_debug',
110 ):
1d485a1a 111 if not hasattr(self, func):
112 setattr(self, func, getattr(ydl, func))
19a03940 113
114 def to_screen(self, *args, **kargs):
115 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
116
23326151 117 __to_screen = to_screen
118
1a8cc837 119 @classproperty
120 def FD_NAME(cls):
998a3cae 121 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 122
3bc2ddcc
JMF
123 @staticmethod
124 def format_seconds(seconds):
11233f2a 125 if seconds is None:
126 return ' Unknown'
aa7785f8 127 time = timetuple_from_msec(seconds * 1000)
128 if time.hours > 99:
3bc2ddcc 129 return '--:--:--'
aa7785f8 130 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 131
a057779d 132 @classmethod
133 def format_eta(cls, seconds):
134 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 135
3bc2ddcc
JMF
136 @staticmethod
137 def calc_percent(byte_counter, data_len):
138 if data_len is None:
139 return None
140 return float(byte_counter) / float(data_len) * 100.0
141
142 @staticmethod
143 def format_percent(percent):
11233f2a 144 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc 145
4823ec9f 146 @classmethod
147 def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
148 if total is NO_DEFAULT:
149 rate, remaining = start_or_rate, now_or_remaining
150 if None in (rate, remaining):
151 return None
152 return int(float(remaining) / rate)
153
154 start, now = start_or_rate, now_or_remaining
3bc2ddcc
JMF
155 if total is None:
156 return None
c7667c2d
S
157 if now is None:
158 now = time.time()
4823ec9f 159 rate = cls.calc_speed(start, now, current)
160 return rate and int((float(total) - float(current)) / rate)
3bc2ddcc 161
3bc2ddcc
JMF
162 @staticmethod
163 def calc_speed(start, now, bytes):
164 dif = now - start
5f6a1245 165 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
166 return None
167 return float(bytes) / dif
168
169 @staticmethod
170 def format_speed(speed):
11233f2a 171 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 172
617e58d8
S
173 @staticmethod
174 def format_retries(retries):
11233f2a 175 return 'inf' if retries == float('inf') else int(retries)
617e58d8 176
4823ec9f 177 @staticmethod
178 def filesize_or_none(unencoded_filename):
179 if os.path.isfile(unencoded_filename):
180 return os.path.getsize(unencoded_filename)
181 return 0
182
3bc2ddcc
JMF
183 @staticmethod
184 def best_block_size(elapsed_time, bytes):
185 new_min = max(bytes / 2.0, 1.0)
5f6a1245 186 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
187 if elapsed_time < 0.001:
188 return int(new_max)
189 rate = bytes / elapsed_time
190 if rate > new_max:
191 return int(new_max)
192 if rate < new_min:
193 return int(new_min)
194 return int(rate)
195
196 @staticmethod
197 def parse_bytes(bytestr):
198 """Parse a string indicating a byte quantity into an integer."""
71df9b7f 199 deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
200 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
201 return parse_bytes(bytestr)
3bc2ddcc 202
c7667c2d 203 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 204 """Sleep if the download speed is over the rate limit."""
d800609c 205 rate_limit = self.params.get('ratelimit')
8a77e5e6 206 if rate_limit is None or byte_counter == 0:
3bc2ddcc 207 return
c7667c2d
S
208 if now is None:
209 now = time.time()
3bc2ddcc
JMF
210 elapsed = now - start_time
211 if elapsed <= 0.0:
212 return
213 speed = float(byte_counter) / elapsed
8a77e5e6 214 if speed > rate_limit:
1a01639b
S
215 sleep_time = float(byte_counter) / rate_limit - elapsed
216 if sleep_time > 0:
217 time.sleep(sleep_time)
3bc2ddcc
JMF
218
219 def temp_name(self, filename):
220 """Returns a temporary filename for the given filename."""
b6b70730 221 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
222 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
223 return filename
b6b70730 224 return filename + '.part'
3bc2ddcc
JMF
225
226 def undo_temp_name(self, filename):
b6b70730
PH
227 if filename.endswith('.part'):
228 return filename[:-len('.part')]
3bc2ddcc
JMF
229 return filename
230
ea0c2f21
RA
231 def ytdl_filename(self, filename):
232 return filename + '.ytdl'
233
45806d44 234 def wrap_file_access(action, *, fatal=False):
be5c1ae8 235 def error_callback(err, count, retries, *, fd):
236 return RetryManager.report_retry(
237 err, count, retries, info=fd.__to_screen,
238 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
239 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
240 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
241
242 def wrapper(self, func, *args, **kwargs):
46f1370e 243 for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
be5c1ae8 244 try:
245 return func(self, *args, **kwargs)
246 except OSError as err:
247 if err.errno in (errno.EACCES, errno.EINVAL):
248 retry.error = err
249 continue
250 retry.error_callback(err, 1, 0)
251
252 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
253
254 @wrap_file_access('open', fatal=True)
205a0654 255 def sanitize_open(self, filename, open_mode):
0edb3e33 256 f, filename = sanitize_open(filename, open_mode)
257 if not getattr(f, 'locked', None):
258 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
259 return f, filename
205a0654 260
45806d44
EH
261 @wrap_file_access('remove')
262 def try_remove(self, filename):
337734d4 263 if os.path.isfile(filename):
264 os.remove(filename)
45806d44
EH
265
266 @wrap_file_access('rename')
3bc2ddcc 267 def try_rename(self, old_filename, new_filename):
f775c831 268 if old_filename == new_filename:
269 return
45806d44 270 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
271
272 def try_utime(self, filename, last_modified_hdr):
273 """Try to set the last-modified time of the given file."""
274 if last_modified_hdr is None:
275 return
276 if not os.path.isfile(encodeFilename(filename)):
277 return
278 timestr = last_modified_hdr
279 if timestr is None:
280 return
281 filetime = timeconvert(timestr)
282 if filetime is None:
283 return filetime
284 # Ignore obviously invalid dates
285 if filetime == 0:
286 return
19a03940 287 with contextlib.suppress(Exception):
3bc2ddcc 288 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
289 return filetime
290
291 def report_destination(self, filename):
292 """Report destination filename."""
b6b70730 293 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 294
819e0531 295 def _prepare_multiline_status(self, lines=1):
296 if self.params.get('noprogress'):
bd50a52b 297 self._multiline = QuietMultilinePrinter()
819e0531 298 elif self.ydl.params.get('logger'):
299 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
300 elif self.params.get('progress_with_newline'):
8a7f6d7a 301 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 302 else:
8a7f6d7a 303 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
8417f26b
SS
304 self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
305 self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
bd50a52b
THD
306
307 def _finish_multiline_status(self):
819e0531 308 self._multiline.end()
309
19a03940 310 ProgressStyles = Namespace(
311 downloaded_bytes='light blue',
312 percent='light blue',
313 eta='yellow',
314 speed='green',
315 elapsed='bold white',
316 total_bytes='',
317 total_bytes_estimate='',
318 )
7578d77d 319
320 def _report_progress_status(self, s, default_template):
64fa820c 321 for name, style in self.ProgressStyles.items_:
7578d77d 322 name = f'_{name}_str'
323 if name not in s:
324 continue
325 s[name] = self._format_progress(s[name], style)
326 s['_default_template'] = default_template % s
327
819e0531 328 progress_dict = s.copy()
329 progress_dict.pop('info_dict')
330 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
331
332 progress_template = self.params.get('progress_template', {})
333 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
334 progress_template.get('download') or '[download] %(progress._default_template)s',
335 progress_dict), s.get('progress_idx') or 0)
336 self.to_console_title(self.ydl.evaluate_outtmpl(
337 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
338 progress_dict))
3bc2ddcc 339
7578d77d 340 def _format_progress(self, *args, **kwargs):
341 return self.ydl._format_text(
342 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
343
5cda4eda 344 def report_progress(self, s):
11233f2a 345 def with_fields(*tups, default=''):
346 for *fields, tmpl in tups:
347 if all(s.get(f) is not None for f in fields):
348 return tmpl
349 return default
350
d5d1df8a 351 _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
a057779d 352
5cda4eda 353 if s['status'] == 'finished':
819e0531 354 if self.params.get('noprogress'):
5cda4eda 355 self.to_screen('[download] Download completed')
3df4f81d 356 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 357 s.update({
3df4f81d 358 'speed': speed,
359 '_speed_str': self.format_speed(speed).strip(),
d5d1df8a 360 '_total_bytes_str': _format_bytes('total_bytes'),
11233f2a 361 '_elapsed_str': self.format_seconds(s.get('elapsed')),
362 '_percent_str': self.format_percent(100),
363 })
364 self._report_progress_status(s, join_nonempty(
365 '100%%',
366 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
367 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 368 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 369 delim=' '))
5cda4eda
PH
370
371 if s['status'] != 'downloading':
372 return
373
9590cc6b
SS
374 if update_delta := self.params.get('progress_delta'):
375 with self._progress_delta_lock:
376 if time.monotonic() < self._progress_delta_time:
377 return
378 self._progress_delta_time += update_delta
379
11233f2a 380 s.update({
a057779d 381 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 382 '_speed_str': self.format_speed(s.get('speed')),
383 '_percent_str': self.format_percent(try_call(
384 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
385 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
386 lambda: s['downloaded_bytes'] == 0 and 0)),
d5d1df8a 387 '_total_bytes_str': _format_bytes('total_bytes'),
388 '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
389 '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
11233f2a 390 '_elapsed_str': self.format_seconds(s.get('elapsed')),
391 })
392
393 msg_template = with_fields(
394 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
395 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
396 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
397 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
398 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
399
400 msg_template += with_fields(
401 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
402 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 403 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
404
405 def report_resuming_byte(self, resume_len):
406 """Report attempt to resume at given byte."""
add96eb9 407 self.to_screen(f'[download] Resuming download at byte {resume_len}')
3bc2ddcc 408
be5c1ae8 409 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
410 """Report retry"""
411 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
412 RetryManager.report_retry(
413 err, count, retries, info=self.__to_screen,
414 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
415 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
416 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
417 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 418
3bc2ddcc
JMF
419 def report_unable_to_resume(self):
420 """Report it was impossible to resume download."""
b6b70730 421 self.to_screen('[download] Unable to resume')
3bc2ddcc 422
0a473f2f 423 @staticmethod
424 def supports_manifest(manifest):
425 """ Whether the downloader can download the fragments from the manifest.
426 Redefine in subclasses if needed. """
427 pass
428
9f448fcb 429 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
430 """Download to a filename using the info from info_dict
431 Return True on success and False otherwise
432 """
4340deca 433 nooverwrites_and_exists = (
9cc1a313 434 not self.params.get('overwrites', True)
3089bc74 435 and os.path.exists(encodeFilename(filename))
4340deca
P
436 )
437
75a24854
RA
438 if not hasattr(filename, 'write'):
439 continuedl_and_exists = (
3089bc74
S
440 self.params.get('continuedl', True)
441 and os.path.isfile(encodeFilename(filename))
442 and not self.params.get('nopart', False)
75a24854
RA
443 )
444
445 # Check file already present
446 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
447 self.report_file_already_downloaded(filename)
448 self._hook_progress({
449 'filename': filename,
450 'status': 'finished',
451 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 452 }, info_dict)
b69fd25c 453 self._finish_multiline_status()
a9e7f546 454 return True, False
dabc1273 455
19a03940 456 if subtitle:
457 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 458 else:
19a03940 459 min_sleep_interval = self.params.get('sleep_interval') or 0
460 sleep_interval = random.uniform(
43cc91ad 461 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 462 if sleep_interval > 0:
463 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
464 time.sleep(sleep_interval)
465
819e0531 466 ret = self.real_download(filename, info_dict)
467 self._finish_multiline_status()
468 return ret, True
3bc2ddcc
JMF
469
470 def real_download(self, filename, info_dict):
471 """Real download process. Redefine in subclasses."""
b6b70730 472 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 473
3ba7740d 474 def _hook_progress(self, status, info_dict):
f5ea4748 475 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 476 status['info_dict'] = info_dict
f45e6c11 477 # youtube-dl passes the same status object to all the hooks.
478 # Some third party scripts seems to be relying on this.
479 # So keep this behavior if possible
3bc2ddcc 480 for ph in self._progress_hooks:
f45e6c11 481 ph(status)
3bc2ddcc
JMF
482
483 def add_progress_hook(self, ph):
71b640cc
PH
484 # See YoutubeDl.py (search for progress_hooks) for a description of
485 # this interface
3bc2ddcc 486 self._progress_hooks.append(ph)
222516d9 487
cd8a07a7 488 def _debug_cmd(self, args, exe=None):
222516d9
PH
489 if not self.params.get('verbose', False):
490 return
491
cd8a07a7
S
492 str_args = [decodeArgument(a) for a in args]
493
222516d9 494 if exe is None:
cd8a07a7 495 exe = os.path.basename(str_args[0])
222516d9 496
86e5f3ed 497 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')