]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[compat] Add `functools.cached_property`
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
3bc2ddcc 3import os
f8271158 4import random
3bc2ddcc 5import re
3bc2ddcc
JMF
6import time
7
f8271158 8from ..minicurses import (
9 BreaklineStatusPrinter,
10 MultilineLogger,
11 MultilinePrinter,
12 QuietMultilinePrinter,
13)
2762dbb1 14from ..compat import functools
3bc2ddcc 15from ..utils import (
1d485a1a 16 NUMBER_RE,
f8271158 17 LockingUnsupportedError,
19a03940 18 Namespace,
1433734c 19 decodeArgument,
3bc2ddcc 20 encodeFilename,
9b9c5355 21 error_to_compat_str,
666c36d5 22 float_or_none,
3bc2ddcc 23 format_bytes,
205a0654 24 sanitize_open,
1433734c 25 shell_quote,
e3ced9ed 26 timeconvert,
aa7785f8 27 timetuple_from_msec,
3bc2ddcc
JMF
28)
29
30
86e5f3ed 31class FileDownloader:
3bc2ddcc
JMF
32 """File Downloader class.
33
34 File downloader objects are the ones responsible of downloading the
35 actual video file and writing it to disk.
36
37 File downloaders accept a lot of parameters. In order not to saturate
38 the object constructor with arguments, it receives a dictionary of
39 options instead.
40
41 Available options:
42
881e6a1f
PH
43 verbose: Print additional info to stdout.
44 quiet: Do not print messages to stdout.
45 ratelimit: Download speed limit, in bytes/sec.
51d9739f 46 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 47 retries: Number of times to retry for HTTP error 5xx
205a0654 48 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
49 buffersize: Size of download buffer in bytes.
50 noresizebuffer: Do not automatically resize the download buffer.
51 continuedl: Try to continue downloads if possible.
52 noprogress: Do not print the progress bar.
881e6a1f
PH
53 nopart: Do not use temporary .part files.
54 updatetime: Use the Last-modified header to set output file timestamps.
55 test: Download only first bytes to test the downloader.
56 min_filesize: Skip files smaller than this size
57 max_filesize: Skip files larger than this size
58 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 59 external_downloader_args: A dictionary of downloader keys (in lower case)
60 and a list of additional command-line arguments for the
61 executable. Use 'default' as the name for arguments to be
62 passed to all downloaders. For compatibility with youtube-dl,
63 a single list of args can also be used
7d106a65 64 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 65 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
66 useful for bypassing bandwidth throttling imposed by
67 a webserver (experimental)
819e0531 68 progress_template: See YoutubeDL.py
23326151 69 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
70
71 Subclasses of this one must re-define the real_download method.
72 """
73
b686fc18 74 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
75 params = None
76
77 def __init__(self, ydl, params):
78 """Create a FileDownloader object with the given options."""
19a03940 79 self._set_ydl(ydl)
3bc2ddcc
JMF
80 self._progress_hooks = []
81 self.params = params
819e0531 82 self._prepare_multiline_status()
5cda4eda 83 self.add_progress_hook(self.report_progress)
3bc2ddcc 84
19a03940 85 def _set_ydl(self, ydl):
86 self.ydl = ydl
87
88 for func in (
89 'deprecation_warning',
90 'report_error',
91 'report_file_already_downloaded',
92 'report_warning',
93 'to_console_title',
94 'to_stderr',
95 'trouble',
96 'write_debug',
97 ):
1d485a1a 98 if not hasattr(self, func):
99 setattr(self, func, getattr(ydl, func))
19a03940 100
101 def to_screen(self, *args, **kargs):
102 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
103
23326151 104 __to_screen = to_screen
105
2762dbb1 106 @functools.cached_property
3a408f9d 107 def FD_NAME(self):
108 return re.sub(r'(?<!^)(?=[A-Z])', '_', type(self).__name__[:-2]).lower()
109
3bc2ddcc
JMF
110 @staticmethod
111 def format_seconds(seconds):
aa7785f8 112 time = timetuple_from_msec(seconds * 1000)
113 if time.hours > 99:
3bc2ddcc 114 return '--:--:--'
aa7785f8 115 if not time.hours:
116 return '%02d:%02d' % time[1:-1]
117 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc
JMF
118
119 @staticmethod
120 def calc_percent(byte_counter, data_len):
121 if data_len is None:
122 return None
123 return float(byte_counter) / float(data_len) * 100.0
124
125 @staticmethod
126 def format_percent(percent):
127 if percent is None:
128 return '---.-%'
f304da8a 129 elif percent == 100:
130 return '100%'
3bc2ddcc
JMF
131 return '%6s' % ('%3.1f%%' % percent)
132
133 @staticmethod
134 def calc_eta(start, now, total, current):
135 if total is None:
136 return None
c7667c2d
S
137 if now is None:
138 now = time.time()
3bc2ddcc 139 dif = now - start
5f6a1245 140 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
141 return None
142 rate = float(current) / dif
143 return int((float(total) - float(current)) / rate)
144
145 @staticmethod
146 def format_eta(eta):
147 if eta is None:
148 return '--:--'
149 return FileDownloader.format_seconds(eta)
150
151 @staticmethod
152 def calc_speed(start, now, bytes):
153 dif = now - start
5f6a1245 154 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
155 return None
156 return float(bytes) / dif
157
158 @staticmethod
159 def format_speed(speed):
160 if speed is None:
161 return '%10s' % '---b/s'
162 return '%10s' % ('%s/s' % format_bytes(speed))
163
617e58d8
S
164 @staticmethod
165 def format_retries(retries):
166 return 'inf' if retries == float('inf') else '%.0f' % retries
167
3bc2ddcc
JMF
168 @staticmethod
169 def best_block_size(elapsed_time, bytes):
170 new_min = max(bytes / 2.0, 1.0)
5f6a1245 171 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
172 if elapsed_time < 0.001:
173 return int(new_max)
174 rate = bytes / elapsed_time
175 if rate > new_max:
176 return int(new_max)
177 if rate < new_min:
178 return int(new_min)
179 return int(rate)
180
181 @staticmethod
182 def parse_bytes(bytestr):
183 """Parse a string indicating a byte quantity into an integer."""
1d485a1a 184 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
3bc2ddcc
JMF
185 if matchobj is None:
186 return None
187 number = float(matchobj.group(1))
188 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
189 return int(round(number * multiplier))
190
c7667c2d 191 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 192 """Sleep if the download speed is over the rate limit."""
d800609c 193 rate_limit = self.params.get('ratelimit')
8a77e5e6 194 if rate_limit is None or byte_counter == 0:
3bc2ddcc 195 return
c7667c2d
S
196 if now is None:
197 now = time.time()
3bc2ddcc
JMF
198 elapsed = now - start_time
199 if elapsed <= 0.0:
200 return
201 speed = float(byte_counter) / elapsed
8a77e5e6 202 if speed > rate_limit:
1a01639b
S
203 sleep_time = float(byte_counter) / rate_limit - elapsed
204 if sleep_time > 0:
205 time.sleep(sleep_time)
3bc2ddcc
JMF
206
207 def temp_name(self, filename):
208 """Returns a temporary filename for the given filename."""
b6b70730 209 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
210 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
211 return filename
b6b70730 212 return filename + '.part'
3bc2ddcc
JMF
213
214 def undo_temp_name(self, filename):
b6b70730
PH
215 if filename.endswith('.part'):
216 return filename[:-len('.part')]
3bc2ddcc
JMF
217 return filename
218
ea0c2f21
RA
219 def ytdl_filename(self, filename):
220 return filename + '.ytdl'
221
45806d44
EH
222 def wrap_file_access(action, *, fatal=False):
223 def outer(func):
224 def inner(self, *args, **kwargs):
225 file_access_retries = self.params.get('file_access_retries', 0)
226 retry = 0
227 while True:
228 try:
229 return func(self, *args, **kwargs)
86e5f3ed 230 except OSError as err:
45806d44
EH
231 retry = retry + 1
232 if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
233 if not fatal:
234 self.report_error(f'unable to {action} file: {err}')
235 return
236 raise
237 self.to_screen(
238 f'[download] Unable to {action} file due to file access error. '
239 f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
23326151 240 if not self.sleep_retry('file_access', retry):
241 time.sleep(0.01)
45806d44
EH
242 return inner
243 return outer
244
245 @wrap_file_access('open', fatal=True)
205a0654 246 def sanitize_open(self, filename, open_mode):
0edb3e33 247 f, filename = sanitize_open(filename, open_mode)
248 if not getattr(f, 'locked', None):
249 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
250 return f, filename
205a0654 251
45806d44
EH
252 @wrap_file_access('remove')
253 def try_remove(self, filename):
254 os.remove(filename)
255
256 @wrap_file_access('rename')
3bc2ddcc 257 def try_rename(self, old_filename, new_filename):
f775c831 258 if old_filename == new_filename:
259 return
45806d44 260 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
261
262 def try_utime(self, filename, last_modified_hdr):
263 """Try to set the last-modified time of the given file."""
264 if last_modified_hdr is None:
265 return
266 if not os.path.isfile(encodeFilename(filename)):
267 return
268 timestr = last_modified_hdr
269 if timestr is None:
270 return
271 filetime = timeconvert(timestr)
272 if filetime is None:
273 return filetime
274 # Ignore obviously invalid dates
275 if filetime == 0:
276 return
19a03940 277 with contextlib.suppress(Exception):
3bc2ddcc 278 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
279 return filetime
280
281 def report_destination(self, filename):
282 """Report destination filename."""
b6b70730 283 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 284
819e0531 285 def _prepare_multiline_status(self, lines=1):
286 if self.params.get('noprogress'):
bd50a52b 287 self._multiline = QuietMultilinePrinter()
819e0531 288 elif self.ydl.params.get('logger'):
289 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
290 elif self.params.get('progress_with_newline'):
591bb9d3 291 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.screen, lines)
bd50a52b 292 else:
591bb9d3 293 self._multiline = MultilinePrinter(self.ydl._out_files.screen, lines, not self.params.get('quiet'))
7578d77d 294 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
295
296 def _finish_multiline_status(self):
819e0531 297 self._multiline.end()
298
19a03940 299 ProgressStyles = Namespace(
300 downloaded_bytes='light blue',
301 percent='light blue',
302 eta='yellow',
303 speed='green',
304 elapsed='bold white',
305 total_bytes='',
306 total_bytes_estimate='',
307 )
7578d77d 308
309 def _report_progress_status(self, s, default_template):
7896214c 310 for name, style in self.ProgressStyles:
7578d77d 311 name = f'_{name}_str'
312 if name not in s:
313 continue
314 s[name] = self._format_progress(s[name], style)
315 s['_default_template'] = default_template % s
316
819e0531 317 progress_dict = s.copy()
318 progress_dict.pop('info_dict')
319 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
320
321 progress_template = self.params.get('progress_template', {})
322 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
323 progress_template.get('download') or '[download] %(progress._default_template)s',
324 progress_dict), s.get('progress_idx') or 0)
325 self.to_console_title(self.ydl.evaluate_outtmpl(
326 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
327 progress_dict))
3bc2ddcc 328
7578d77d 329 def _format_progress(self, *args, **kwargs):
330 return self.ydl._format_text(
331 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
332
5cda4eda
PH
333 def report_progress(self, s):
334 if s['status'] == 'finished':
819e0531 335 if self.params.get('noprogress'):
5cda4eda 336 self.to_screen('[download] Download completed')
819e0531 337 msg_template = '100%%'
338 if s.get('total_bytes') is not None:
339 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
340 msg_template += ' of %(_total_bytes_str)s'
341 if s.get('elapsed') is not None:
342 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
343 msg_template += ' in %(_elapsed_str)s'
344 s['_percent_str'] = self.format_percent(100)
f304da8a 345 self._report_progress_status(s, msg_template)
3bc2ddcc 346 return
5cda4eda
PH
347
348 if s['status'] != 'downloading':
349 return
350
351 if s.get('eta') is not None:
352 s['_eta_str'] = self.format_eta(s['eta'])
3bc2ddcc 353 else:
f304da8a 354 s['_eta_str'] = 'Unknown'
3bc2ddcc 355
5cda4eda
PH
356 if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
357 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
358 elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
359 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
360 else:
361 if s.get('downloaded_bytes') == 0:
362 s['_percent_str'] = self.format_percent(0)
363 else:
364 s['_percent_str'] = 'Unknown %'
3bc2ddcc 365
5cda4eda
PH
366 if s.get('speed') is not None:
367 s['_speed_str'] = self.format_speed(s['speed'])
368 else:
369 s['_speed_str'] = 'Unknown speed'
370
371 if s.get('total_bytes') is not None:
372 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
373 msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
374 elif s.get('total_bytes_estimate') is not None:
375 s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
376 msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
3bc2ddcc 377 else:
5cda4eda
PH
378 if s.get('downloaded_bytes') is not None:
379 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
380 if s.get('elapsed'):
381 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
382 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
383 else:
384 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
385 else:
f304da8a 386 msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
a4211baf 387 if s.get('fragment_index') and s.get('fragment_count'):
388 msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
389 elif s.get('fragment_index'):
bd93fd5d 390 msg_template += ' (frag %(fragment_index)s)'
7578d77d 391 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
392
393 def report_resuming_byte(self, resume_len):
394 """Report attempt to resume at given byte."""
b6b70730 395 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 396
a3c3a1e1 397 def report_retry(self, err, count, retries):
3bc2ddcc 398 """Report retry in case of HTTP error 5xx"""
23326151 399 self.__to_screen(
5ef7d9bd 400 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
a3c3a1e1 401 % (error_to_compat_str(err), count, self.format_retries(retries)))
23326151 402 self.sleep_retry('http', count)
3bc2ddcc 403
3bc2ddcc
JMF
404 def report_unable_to_resume(self):
405 """Report it was impossible to resume download."""
b6b70730 406 self.to_screen('[download] Unable to resume')
3bc2ddcc 407
23326151 408 def sleep_retry(self, retry_type, count):
409 sleep_func = self.params.get('retry_sleep_functions', {}).get(retry_type)
666c36d5 410 delay = float_or_none(sleep_func(n=count - 1)) if sleep_func else None
23326151 411 if delay:
666c36d5 412 self.__to_screen(f'Sleeping {delay:.2f} seconds ...')
23326151 413 time.sleep(delay)
414 return sleep_func is not None
415
0a473f2f 416 @staticmethod
417 def supports_manifest(manifest):
418 """ Whether the downloader can download the fragments from the manifest.
419 Redefine in subclasses if needed. """
420 pass
421
9f448fcb 422 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
423 """Download to a filename using the info from info_dict
424 Return True on success and False otherwise
425 """
5f0d813d 426
4340deca 427 nooverwrites_and_exists = (
9cc1a313 428 not self.params.get('overwrites', True)
3089bc74 429 and os.path.exists(encodeFilename(filename))
4340deca
P
430 )
431
75a24854
RA
432 if not hasattr(filename, 'write'):
433 continuedl_and_exists = (
3089bc74
S
434 self.params.get('continuedl', True)
435 and os.path.isfile(encodeFilename(filename))
436 and not self.params.get('nopart', False)
75a24854
RA
437 )
438
439 # Check file already present
440 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
441 self.report_file_already_downloaded(filename)
442 self._hook_progress({
443 'filename': filename,
444 'status': 'finished',
445 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 446 }, info_dict)
b69fd25c 447 self._finish_multiline_status()
a9e7f546 448 return True, False
dabc1273 449
19a03940 450 if subtitle:
451 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 452 else:
19a03940 453 min_sleep_interval = self.params.get('sleep_interval') or 0
454 sleep_interval = random.uniform(
43cc91ad 455 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 456 if sleep_interval > 0:
457 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
458 time.sleep(sleep_interval)
459
819e0531 460 ret = self.real_download(filename, info_dict)
461 self._finish_multiline_status()
462 return ret, True
3bc2ddcc
JMF
463
464 def real_download(self, filename, info_dict):
465 """Real download process. Redefine in subclasses."""
b6b70730 466 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 467
3ba7740d 468 def _hook_progress(self, status, info_dict):
469 if not self._progress_hooks:
470 return
03b4de72 471 status['info_dict'] = info_dict
f45e6c11 472 # youtube-dl passes the same status object to all the hooks.
473 # Some third party scripts seems to be relying on this.
474 # So keep this behavior if possible
3bc2ddcc 475 for ph in self._progress_hooks:
f45e6c11 476 ph(status)
3bc2ddcc
JMF
477
478 def add_progress_hook(self, ph):
71b640cc
PH
479 # See YoutubeDl.py (search for progress_hooks) for a description of
480 # this interface
3bc2ddcc 481 self._progress_hooks.append(ph)
222516d9 482
cd8a07a7 483 def _debug_cmd(self, args, exe=None):
222516d9
PH
484 if not self.params.get('verbose', False):
485 return
486
cd8a07a7
S
487 str_args = [decodeArgument(a) for a in args]
488
222516d9 489 if exe is None:
cd8a07a7 490 exe = os.path.basename(str_args[0])
222516d9 491
86e5f3ed 492 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')