]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[devscripts/cli_to_api] Add script
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
f8271158 18 LockingUnsupportedError,
19a03940 19 Namespace,
be5c1ae8 20 RetryManager,
1a8cc837 21 classproperty,
1433734c 22 decodeArgument,
71df9b7f 23 deprecation_warning,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
64c464a1 27 parse_bytes,
a057779d 28 remove_start,
205a0654 29 sanitize_open,
1433734c 30 shell_quote,
e3ced9ed 31 timeconvert,
aa7785f8 32 timetuple_from_msec,
11233f2a 33 try_call,
3bc2ddcc
JMF
34)
35
36
86e5f3ed 37class FileDownloader:
3bc2ddcc
JMF
38 """File Downloader class.
39
40 File downloader objects are the ones responsible of downloading the
41 actual video file and writing it to disk.
42
43 File downloaders accept a lot of parameters. In order not to saturate
44 the object constructor with arguments, it receives a dictionary of
45 options instead.
46
47 Available options:
48
881e6a1f
PH
49 verbose: Print additional info to stdout.
50 quiet: Do not print messages to stdout.
51 ratelimit: Download speed limit, in bytes/sec.
c487cf00 52 continuedl: Attempt to continue downloads if possible
51d9739f 53 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
46f1370e 54 retries: Number of times to retry for expected network errors.
55 Default is 0 for API, but 10 for CLI
56 file_access_retries: Number of times to retry on file access error (default: 3)
881e6a1f
PH
57 buffersize: Size of download buffer in bytes.
58 noresizebuffer: Do not automatically resize the download buffer.
59 continuedl: Try to continue downloads if possible.
60 noprogress: Do not print the progress bar.
881e6a1f
PH
61 nopart: Do not use temporary .part files.
62 updatetime: Use the Last-modified header to set output file timestamps.
63 test: Download only first bytes to test the downloader.
64 min_filesize: Skip files smaller than this size
65 max_filesize: Skip files larger than this size
66 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 67 external_downloader_args: A dictionary of downloader keys (in lower case)
68 and a list of additional command-line arguments for the
69 executable. Use 'default' as the name for arguments to be
70 passed to all downloaders. For compatibility with youtube-dl,
71 a single list of args can also be used
7d106a65 72 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 73 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
74 useful for bypassing bandwidth throttling imposed by
75 a webserver (experimental)
819e0531 76 progress_template: See YoutubeDL.py
23326151 77 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
78
79 Subclasses of this one must re-define the real_download method.
80 """
81
b686fc18 82 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
83 params = None
84
85 def __init__(self, ydl, params):
86 """Create a FileDownloader object with the given options."""
19a03940 87 self._set_ydl(ydl)
3bc2ddcc
JMF
88 self._progress_hooks = []
89 self.params = params
819e0531 90 self._prepare_multiline_status()
5cda4eda 91 self.add_progress_hook(self.report_progress)
3bc2ddcc 92
19a03940 93 def _set_ydl(self, ydl):
94 self.ydl = ydl
95
96 for func in (
97 'deprecation_warning',
da4db748 98 'deprecated_feature',
19a03940 99 'report_error',
100 'report_file_already_downloaded',
101 'report_warning',
102 'to_console_title',
103 'to_stderr',
104 'trouble',
105 'write_debug',
106 ):
1d485a1a 107 if not hasattr(self, func):
108 setattr(self, func, getattr(ydl, func))
19a03940 109
110 def to_screen(self, *args, **kargs):
111 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
112
23326151 113 __to_screen = to_screen
114
1a8cc837 115 @classproperty
116 def FD_NAME(cls):
998a3cae 117 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 118
3bc2ddcc
JMF
119 @staticmethod
120 def format_seconds(seconds):
11233f2a 121 if seconds is None:
122 return ' Unknown'
aa7785f8 123 time = timetuple_from_msec(seconds * 1000)
124 if time.hours > 99:
3bc2ddcc 125 return '--:--:--'
aa7785f8 126 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 127
a057779d 128 @classmethod
129 def format_eta(cls, seconds):
130 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 131
3bc2ddcc
JMF
132 @staticmethod
133 def calc_percent(byte_counter, data_len):
134 if data_len is None:
135 return None
136 return float(byte_counter) / float(data_len) * 100.0
137
138 @staticmethod
139 def format_percent(percent):
11233f2a 140 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
141
142 @staticmethod
143 def calc_eta(start, now, total, current):
144 if total is None:
145 return None
c7667c2d
S
146 if now is None:
147 now = time.time()
3bc2ddcc 148 dif = now - start
5f6a1245 149 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
150 return None
151 rate = float(current) / dif
152 return int((float(total) - float(current)) / rate)
153
3bc2ddcc
JMF
154 @staticmethod
155 def calc_speed(start, now, bytes):
156 dif = now - start
5f6a1245 157 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
158 return None
159 return float(bytes) / dif
160
161 @staticmethod
162 def format_speed(speed):
11233f2a 163 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 164
617e58d8
S
165 @staticmethod
166 def format_retries(retries):
11233f2a 167 return 'inf' if retries == float('inf') else int(retries)
617e58d8 168
3bc2ddcc
JMF
169 @staticmethod
170 def best_block_size(elapsed_time, bytes):
171 new_min = max(bytes / 2.0, 1.0)
5f6a1245 172 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
173 if elapsed_time < 0.001:
174 return int(new_max)
175 rate = bytes / elapsed_time
176 if rate > new_max:
177 return int(new_max)
178 if rate < new_min:
179 return int(new_min)
180 return int(rate)
181
182 @staticmethod
183 def parse_bytes(bytestr):
184 """Parse a string indicating a byte quantity into an integer."""
71df9b7f 185 deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
186 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
187 return parse_bytes(bytestr)
3bc2ddcc 188
c7667c2d 189 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 190 """Sleep if the download speed is over the rate limit."""
d800609c 191 rate_limit = self.params.get('ratelimit')
8a77e5e6 192 if rate_limit is None or byte_counter == 0:
3bc2ddcc 193 return
c7667c2d
S
194 if now is None:
195 now = time.time()
3bc2ddcc
JMF
196 elapsed = now - start_time
197 if elapsed <= 0.0:
198 return
199 speed = float(byte_counter) / elapsed
8a77e5e6 200 if speed > rate_limit:
1a01639b
S
201 sleep_time = float(byte_counter) / rate_limit - elapsed
202 if sleep_time > 0:
203 time.sleep(sleep_time)
3bc2ddcc
JMF
204
205 def temp_name(self, filename):
206 """Returns a temporary filename for the given filename."""
b6b70730 207 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
208 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
209 return filename
b6b70730 210 return filename + '.part'
3bc2ddcc
JMF
211
212 def undo_temp_name(self, filename):
b6b70730
PH
213 if filename.endswith('.part'):
214 return filename[:-len('.part')]
3bc2ddcc
JMF
215 return filename
216
ea0c2f21
RA
217 def ytdl_filename(self, filename):
218 return filename + '.ytdl'
219
45806d44 220 def wrap_file_access(action, *, fatal=False):
be5c1ae8 221 def error_callback(err, count, retries, *, fd):
222 return RetryManager.report_retry(
223 err, count, retries, info=fd.__to_screen,
224 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
225 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
226 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
227
228 def wrapper(self, func, *args, **kwargs):
46f1370e 229 for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
be5c1ae8 230 try:
231 return func(self, *args, **kwargs)
232 except OSError as err:
233 if err.errno in (errno.EACCES, errno.EINVAL):
234 retry.error = err
235 continue
236 retry.error_callback(err, 1, 0)
237
238 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
239
240 @wrap_file_access('open', fatal=True)
205a0654 241 def sanitize_open(self, filename, open_mode):
0edb3e33 242 f, filename = sanitize_open(filename, open_mode)
243 if not getattr(f, 'locked', None):
244 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
245 return f, filename
205a0654 246
45806d44
EH
247 @wrap_file_access('remove')
248 def try_remove(self, filename):
249 os.remove(filename)
250
251 @wrap_file_access('rename')
3bc2ddcc 252 def try_rename(self, old_filename, new_filename):
f775c831 253 if old_filename == new_filename:
254 return
45806d44 255 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
256
257 def try_utime(self, filename, last_modified_hdr):
258 """Try to set the last-modified time of the given file."""
259 if last_modified_hdr is None:
260 return
261 if not os.path.isfile(encodeFilename(filename)):
262 return
263 timestr = last_modified_hdr
264 if timestr is None:
265 return
266 filetime = timeconvert(timestr)
267 if filetime is None:
268 return filetime
269 # Ignore obviously invalid dates
270 if filetime == 0:
271 return
19a03940 272 with contextlib.suppress(Exception):
3bc2ddcc 273 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
274 return filetime
275
276 def report_destination(self, filename):
277 """Report destination filename."""
b6b70730 278 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 279
819e0531 280 def _prepare_multiline_status(self, lines=1):
281 if self.params.get('noprogress'):
bd50a52b 282 self._multiline = QuietMultilinePrinter()
819e0531 283 elif self.ydl.params.get('logger'):
284 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
285 elif self.params.get('progress_with_newline'):
8a7f6d7a 286 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 287 else:
8a7f6d7a 288 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 289 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
290
291 def _finish_multiline_status(self):
819e0531 292 self._multiline.end()
293
19a03940 294 ProgressStyles = Namespace(
295 downloaded_bytes='light blue',
296 percent='light blue',
297 eta='yellow',
298 speed='green',
299 elapsed='bold white',
300 total_bytes='',
301 total_bytes_estimate='',
302 )
7578d77d 303
304 def _report_progress_status(self, s, default_template):
64fa820c 305 for name, style in self.ProgressStyles.items_:
7578d77d 306 name = f'_{name}_str'
307 if name not in s:
308 continue
309 s[name] = self._format_progress(s[name], style)
310 s['_default_template'] = default_template % s
311
819e0531 312 progress_dict = s.copy()
313 progress_dict.pop('info_dict')
314 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
315
316 progress_template = self.params.get('progress_template', {})
317 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
318 progress_template.get('download') or '[download] %(progress._default_template)s',
319 progress_dict), s.get('progress_idx') or 0)
320 self.to_console_title(self.ydl.evaluate_outtmpl(
321 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
322 progress_dict))
3bc2ddcc 323
7578d77d 324 def _format_progress(self, *args, **kwargs):
325 return self.ydl._format_text(
326 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
327
5cda4eda 328 def report_progress(self, s):
11233f2a 329 def with_fields(*tups, default=''):
330 for *fields, tmpl in tups:
331 if all(s.get(f) is not None for f in fields):
332 return tmpl
333 return default
334
d5d1df8a 335 _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
a057779d 336
5cda4eda 337 if s['status'] == 'finished':
819e0531 338 if self.params.get('noprogress'):
5cda4eda 339 self.to_screen('[download] Download completed')
3df4f81d 340 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 341 s.update({
3df4f81d 342 'speed': speed,
343 '_speed_str': self.format_speed(speed).strip(),
d5d1df8a 344 '_total_bytes_str': _format_bytes('total_bytes'),
11233f2a 345 '_elapsed_str': self.format_seconds(s.get('elapsed')),
346 '_percent_str': self.format_percent(100),
347 })
348 self._report_progress_status(s, join_nonempty(
349 '100%%',
350 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
351 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 352 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 353 delim=' '))
5cda4eda
PH
354
355 if s['status'] != 'downloading':
356 return
357
11233f2a 358 s.update({
a057779d 359 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 360 '_speed_str': self.format_speed(s.get('speed')),
361 '_percent_str': self.format_percent(try_call(
362 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
363 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
364 lambda: s['downloaded_bytes'] == 0 and 0)),
d5d1df8a 365 '_total_bytes_str': _format_bytes('total_bytes'),
366 '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
367 '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
11233f2a 368 '_elapsed_str': self.format_seconds(s.get('elapsed')),
369 })
370
371 msg_template = with_fields(
372 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
373 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
374 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
375 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
376 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
377
378 msg_template += with_fields(
379 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
380 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 381 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
382
383 def report_resuming_byte(self, resume_len):
384 """Report attempt to resume at given byte."""
b6b70730 385 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 386
be5c1ae8 387 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
388 """Report retry"""
389 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
390 RetryManager.report_retry(
391 err, count, retries, info=self.__to_screen,
392 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
393 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
394 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
395 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 396
3bc2ddcc
JMF
397 def report_unable_to_resume(self):
398 """Report it was impossible to resume download."""
b6b70730 399 self.to_screen('[download] Unable to resume')
3bc2ddcc 400
0a473f2f 401 @staticmethod
402 def supports_manifest(manifest):
403 """ Whether the downloader can download the fragments from the manifest.
404 Redefine in subclasses if needed. """
405 pass
406
9f448fcb 407 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
408 """Download to a filename using the info from info_dict
409 Return True on success and False otherwise
410 """
5f0d813d 411
4340deca 412 nooverwrites_and_exists = (
9cc1a313 413 not self.params.get('overwrites', True)
3089bc74 414 and os.path.exists(encodeFilename(filename))
4340deca
P
415 )
416
75a24854
RA
417 if not hasattr(filename, 'write'):
418 continuedl_and_exists = (
3089bc74
S
419 self.params.get('continuedl', True)
420 and os.path.isfile(encodeFilename(filename))
421 and not self.params.get('nopart', False)
75a24854
RA
422 )
423
424 # Check file already present
425 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
426 self.report_file_already_downloaded(filename)
427 self._hook_progress({
428 'filename': filename,
429 'status': 'finished',
430 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 431 }, info_dict)
b69fd25c 432 self._finish_multiline_status()
a9e7f546 433 return True, False
dabc1273 434
19a03940 435 if subtitle:
436 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 437 else:
19a03940 438 min_sleep_interval = self.params.get('sleep_interval') or 0
439 sleep_interval = random.uniform(
43cc91ad 440 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 441 if sleep_interval > 0:
442 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
443 time.sleep(sleep_interval)
444
819e0531 445 ret = self.real_download(filename, info_dict)
446 self._finish_multiline_status()
447 return ret, True
3bc2ddcc
JMF
448
449 def real_download(self, filename, info_dict):
450 """Real download process. Redefine in subclasses."""
b6b70730 451 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 452
3ba7740d 453 def _hook_progress(self, status, info_dict):
f5ea4748 454 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 455 status['info_dict'] = info_dict
f45e6c11 456 # youtube-dl passes the same status object to all the hooks.
457 # Some third party scripts seems to be relying on this.
458 # So keep this behavior if possible
3bc2ddcc 459 for ph in self._progress_hooks:
f45e6c11 460 ph(status)
3bc2ddcc
JMF
461
462 def add_progress_hook(self, ph):
71b640cc
PH
463 # See YoutubeDl.py (search for progress_hooks) for a description of
464 # this interface
3bc2ddcc 465 self._progress_hooks.append(ph)
222516d9 466
cd8a07a7 467 def _debug_cmd(self, args, exe=None):
222516d9
PH
468 if not self.params.get('verbose', False):
469 return
470
cd8a07a7
S
471 str_args = [decodeArgument(a) for a in args]
472
222516d9 473 if exe is None:
cd8a07a7 474 exe = os.path.basename(str_args[0])
222516d9 475
86e5f3ed 476 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')