]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[cleanup] Minor fixes
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
1d485a1a 18 NUMBER_RE,
f8271158 19 LockingUnsupportedError,
19a03940 20 Namespace,
be5c1ae8 21 RetryManager,
1a8cc837 22 classproperty,
1433734c 23 decodeArgument,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
a057779d 27 remove_start,
205a0654 28 sanitize_open,
1433734c 29 shell_quote,
e3ced9ed 30 timeconvert,
aa7785f8 31 timetuple_from_msec,
11233f2a 32 try_call,
3bc2ddcc
JMF
33)
34
35
86e5f3ed 36class FileDownloader:
3bc2ddcc
JMF
37 """File Downloader class.
38
39 File downloader objects are the ones responsible of downloading the
40 actual video file and writing it to disk.
41
42 File downloaders accept a lot of parameters. In order not to saturate
43 the object constructor with arguments, it receives a dictionary of
44 options instead.
45
46 Available options:
47
881e6a1f
PH
48 verbose: Print additional info to stdout.
49 quiet: Do not print messages to stdout.
50 ratelimit: Download speed limit, in bytes/sec.
c487cf00 51 continuedl: Attempt to continue downloads if possible
51d9739f 52 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 53 retries: Number of times to retry for HTTP error 5xx
205a0654 54 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
55 buffersize: Size of download buffer in bytes.
56 noresizebuffer: Do not automatically resize the download buffer.
57 continuedl: Try to continue downloads if possible.
58 noprogress: Do not print the progress bar.
881e6a1f
PH
59 nopart: Do not use temporary .part files.
60 updatetime: Use the Last-modified header to set output file timestamps.
61 test: Download only first bytes to test the downloader.
62 min_filesize: Skip files smaller than this size
63 max_filesize: Skip files larger than this size
64 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 65 external_downloader_args: A dictionary of downloader keys (in lower case)
66 and a list of additional command-line arguments for the
67 executable. Use 'default' as the name for arguments to be
68 passed to all downloaders. For compatibility with youtube-dl,
69 a single list of args can also be used
7d106a65 70 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 71 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
72 useful for bypassing bandwidth throttling imposed by
73 a webserver (experimental)
819e0531 74 progress_template: See YoutubeDL.py
23326151 75 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
76
77 Subclasses of this one must re-define the real_download method.
78 """
79
b686fc18 80 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
81 params = None
82
83 def __init__(self, ydl, params):
84 """Create a FileDownloader object with the given options."""
19a03940 85 self._set_ydl(ydl)
3bc2ddcc
JMF
86 self._progress_hooks = []
87 self.params = params
819e0531 88 self._prepare_multiline_status()
5cda4eda 89 self.add_progress_hook(self.report_progress)
3bc2ddcc 90
19a03940 91 def _set_ydl(self, ydl):
92 self.ydl = ydl
93
94 for func in (
95 'deprecation_warning',
da4db748 96 'deprecated_feature',
19a03940 97 'report_error',
98 'report_file_already_downloaded',
99 'report_warning',
100 'to_console_title',
101 'to_stderr',
102 'trouble',
103 'write_debug',
104 ):
1d485a1a 105 if not hasattr(self, func):
106 setattr(self, func, getattr(ydl, func))
19a03940 107
108 def to_screen(self, *args, **kargs):
109 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
110
23326151 111 __to_screen = to_screen
112
1a8cc837 113 @classproperty
114 def FD_NAME(cls):
998a3cae 115 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 116
3bc2ddcc
JMF
117 @staticmethod
118 def format_seconds(seconds):
11233f2a 119 if seconds is None:
120 return ' Unknown'
aa7785f8 121 time = timetuple_from_msec(seconds * 1000)
122 if time.hours > 99:
3bc2ddcc 123 return '--:--:--'
aa7785f8 124 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 125
a057779d 126 @classmethod
127 def format_eta(cls, seconds):
128 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 129
3bc2ddcc
JMF
130 @staticmethod
131 def calc_percent(byte_counter, data_len):
132 if data_len is None:
133 return None
134 return float(byte_counter) / float(data_len) * 100.0
135
136 @staticmethod
137 def format_percent(percent):
11233f2a 138 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
139
140 @staticmethod
141 def calc_eta(start, now, total, current):
142 if total is None:
143 return None
c7667c2d
S
144 if now is None:
145 now = time.time()
3bc2ddcc 146 dif = now - start
5f6a1245 147 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
148 return None
149 rate = float(current) / dif
150 return int((float(total) - float(current)) / rate)
151
3bc2ddcc
JMF
152 @staticmethod
153 def calc_speed(start, now, bytes):
154 dif = now - start
5f6a1245 155 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
156 return None
157 return float(bytes) / dif
158
159 @staticmethod
160 def format_speed(speed):
11233f2a 161 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 162
617e58d8
S
163 @staticmethod
164 def format_retries(retries):
11233f2a 165 return 'inf' if retries == float('inf') else int(retries)
617e58d8 166
3bc2ddcc
JMF
167 @staticmethod
168 def best_block_size(elapsed_time, bytes):
169 new_min = max(bytes / 2.0, 1.0)
5f6a1245 170 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
171 if elapsed_time < 0.001:
172 return int(new_max)
173 rate = bytes / elapsed_time
174 if rate > new_max:
175 return int(new_max)
176 if rate < new_min:
177 return int(new_min)
178 return int(rate)
179
180 @staticmethod
181 def parse_bytes(bytestr):
182 """Parse a string indicating a byte quantity into an integer."""
1d485a1a 183 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
3bc2ddcc
JMF
184 if matchobj is None:
185 return None
186 number = float(matchobj.group(1))
187 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
188 return int(round(number * multiplier))
189
c7667c2d 190 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 191 """Sleep if the download speed is over the rate limit."""
d800609c 192 rate_limit = self.params.get('ratelimit')
8a77e5e6 193 if rate_limit is None or byte_counter == 0:
3bc2ddcc 194 return
c7667c2d
S
195 if now is None:
196 now = time.time()
3bc2ddcc
JMF
197 elapsed = now - start_time
198 if elapsed <= 0.0:
199 return
200 speed = float(byte_counter) / elapsed
8a77e5e6 201 if speed > rate_limit:
1a01639b
S
202 sleep_time = float(byte_counter) / rate_limit - elapsed
203 if sleep_time > 0:
204 time.sleep(sleep_time)
3bc2ddcc
JMF
205
206 def temp_name(self, filename):
207 """Returns a temporary filename for the given filename."""
b6b70730 208 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
209 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
210 return filename
b6b70730 211 return filename + '.part'
3bc2ddcc
JMF
212
213 def undo_temp_name(self, filename):
b6b70730
PH
214 if filename.endswith('.part'):
215 return filename[:-len('.part')]
3bc2ddcc
JMF
216 return filename
217
ea0c2f21
RA
218 def ytdl_filename(self, filename):
219 return filename + '.ytdl'
220
45806d44 221 def wrap_file_access(action, *, fatal=False):
be5c1ae8 222 def error_callback(err, count, retries, *, fd):
223 return RetryManager.report_retry(
224 err, count, retries, info=fd.__to_screen,
225 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
226 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
227 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
228
229 def wrapper(self, func, *args, **kwargs):
230 for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
231 try:
232 return func(self, *args, **kwargs)
233 except OSError as err:
234 if err.errno in (errno.EACCES, errno.EINVAL):
235 retry.error = err
236 continue
237 retry.error_callback(err, 1, 0)
238
239 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
240
241 @wrap_file_access('open', fatal=True)
205a0654 242 def sanitize_open(self, filename, open_mode):
0edb3e33 243 f, filename = sanitize_open(filename, open_mode)
244 if not getattr(f, 'locked', None):
245 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
246 return f, filename
205a0654 247
45806d44
EH
248 @wrap_file_access('remove')
249 def try_remove(self, filename):
250 os.remove(filename)
251
252 @wrap_file_access('rename')
3bc2ddcc 253 def try_rename(self, old_filename, new_filename):
f775c831 254 if old_filename == new_filename:
255 return
45806d44 256 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
257
258 def try_utime(self, filename, last_modified_hdr):
259 """Try to set the last-modified time of the given file."""
260 if last_modified_hdr is None:
261 return
262 if not os.path.isfile(encodeFilename(filename)):
263 return
264 timestr = last_modified_hdr
265 if timestr is None:
266 return
267 filetime = timeconvert(timestr)
268 if filetime is None:
269 return filetime
270 # Ignore obviously invalid dates
271 if filetime == 0:
272 return
19a03940 273 with contextlib.suppress(Exception):
3bc2ddcc 274 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
275 return filetime
276
277 def report_destination(self, filename):
278 """Report destination filename."""
b6b70730 279 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 280
819e0531 281 def _prepare_multiline_status(self, lines=1):
282 if self.params.get('noprogress'):
bd50a52b 283 self._multiline = QuietMultilinePrinter()
819e0531 284 elif self.ydl.params.get('logger'):
285 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
286 elif self.params.get('progress_with_newline'):
8a7f6d7a 287 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 288 else:
8a7f6d7a 289 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 290 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
291
292 def _finish_multiline_status(self):
819e0531 293 self._multiline.end()
294
19a03940 295 ProgressStyles = Namespace(
296 downloaded_bytes='light blue',
297 percent='light blue',
298 eta='yellow',
299 speed='green',
300 elapsed='bold white',
301 total_bytes='',
302 total_bytes_estimate='',
303 )
7578d77d 304
305 def _report_progress_status(self, s, default_template):
64fa820c 306 for name, style in self.ProgressStyles.items_:
7578d77d 307 name = f'_{name}_str'
308 if name not in s:
309 continue
310 s[name] = self._format_progress(s[name], style)
311 s['_default_template'] = default_template % s
312
819e0531 313 progress_dict = s.copy()
314 progress_dict.pop('info_dict')
315 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
316
317 progress_template = self.params.get('progress_template', {})
318 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
319 progress_template.get('download') or '[download] %(progress._default_template)s',
320 progress_dict), s.get('progress_idx') or 0)
321 self.to_console_title(self.ydl.evaluate_outtmpl(
322 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
323 progress_dict))
3bc2ddcc 324
7578d77d 325 def _format_progress(self, *args, **kwargs):
326 return self.ydl._format_text(
327 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
328
5cda4eda 329 def report_progress(self, s):
11233f2a 330 def with_fields(*tups, default=''):
331 for *fields, tmpl in tups:
332 if all(s.get(f) is not None for f in fields):
333 return tmpl
334 return default
335
a057779d 336 _formats_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
337
5cda4eda 338 if s['status'] == 'finished':
819e0531 339 if self.params.get('noprogress'):
5cda4eda 340 self.to_screen('[download] Download completed')
3df4f81d 341 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 342 s.update({
3df4f81d 343 'speed': speed,
344 '_speed_str': self.format_speed(speed).strip(),
a057779d 345 '_total_bytes_str': _formats_bytes('total_bytes'),
11233f2a 346 '_elapsed_str': self.format_seconds(s.get('elapsed')),
347 '_percent_str': self.format_percent(100),
348 })
349 self._report_progress_status(s, join_nonempty(
350 '100%%',
351 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
352 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 353 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 354 delim=' '))
5cda4eda
PH
355
356 if s['status'] != 'downloading':
357 return
358
11233f2a 359 s.update({
a057779d 360 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 361 '_speed_str': self.format_speed(s.get('speed')),
362 '_percent_str': self.format_percent(try_call(
363 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
364 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
365 lambda: s['downloaded_bytes'] == 0 and 0)),
a057779d 366 '_total_bytes_str': _formats_bytes('total_bytes'),
367 '_total_bytes_estimate_str': _formats_bytes('total_bytes_estimate'),
368 '_downloaded_bytes_str': _formats_bytes('downloaded_bytes'),
11233f2a 369 '_elapsed_str': self.format_seconds(s.get('elapsed')),
370 })
371
372 msg_template = with_fields(
373 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
374 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
375 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
376 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
377 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
378
379 msg_template += with_fields(
380 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
381 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 382 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
383
384 def report_resuming_byte(self, resume_len):
385 """Report attempt to resume at given byte."""
b6b70730 386 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 387
be5c1ae8 388 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
389 """Report retry"""
390 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
391 RetryManager.report_retry(
392 err, count, retries, info=self.__to_screen,
393 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
394 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
395 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
396 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 397
3bc2ddcc
JMF
398 def report_unable_to_resume(self):
399 """Report it was impossible to resume download."""
b6b70730 400 self.to_screen('[download] Unable to resume')
3bc2ddcc 401
0a473f2f 402 @staticmethod
403 def supports_manifest(manifest):
404 """ Whether the downloader can download the fragments from the manifest.
405 Redefine in subclasses if needed. """
406 pass
407
9f448fcb 408 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
409 """Download to a filename using the info from info_dict
410 Return True on success and False otherwise
411 """
5f0d813d 412
4340deca 413 nooverwrites_and_exists = (
9cc1a313 414 not self.params.get('overwrites', True)
3089bc74 415 and os.path.exists(encodeFilename(filename))
4340deca
P
416 )
417
75a24854
RA
418 if not hasattr(filename, 'write'):
419 continuedl_and_exists = (
3089bc74
S
420 self.params.get('continuedl', True)
421 and os.path.isfile(encodeFilename(filename))
422 and not self.params.get('nopart', False)
75a24854
RA
423 )
424
425 # Check file already present
426 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
427 self.report_file_already_downloaded(filename)
428 self._hook_progress({
429 'filename': filename,
430 'status': 'finished',
431 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 432 }, info_dict)
b69fd25c 433 self._finish_multiline_status()
a9e7f546 434 return True, False
dabc1273 435
19a03940 436 if subtitle:
437 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 438 else:
19a03940 439 min_sleep_interval = self.params.get('sleep_interval') or 0
440 sleep_interval = random.uniform(
43cc91ad 441 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 442 if sleep_interval > 0:
443 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
444 time.sleep(sleep_interval)
445
819e0531 446 ret = self.real_download(filename, info_dict)
447 self._finish_multiline_status()
448 return ret, True
3bc2ddcc
JMF
449
450 def real_download(self, filename, info_dict):
451 """Real download process. Redefine in subclasses."""
b6b70730 452 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 453
3ba7740d 454 def _hook_progress(self, status, info_dict):
f5ea4748 455 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 456 status['info_dict'] = info_dict
f45e6c11 457 # youtube-dl passes the same status object to all the hooks.
458 # Some third party scripts seems to be relying on this.
459 # So keep this behavior if possible
3bc2ddcc 460 for ph in self._progress_hooks:
f45e6c11 461 ph(status)
3bc2ddcc
JMF
462
463 def add_progress_hook(self, ph):
71b640cc
PH
464 # See YoutubeDl.py (search for progress_hooks) for a description of
465 # this interface
3bc2ddcc 466 self._progress_hooks.append(ph)
222516d9 467
cd8a07a7 468 def _debug_cmd(self, args, exe=None):
222516d9
PH
469 if not self.params.get('verbose', False):
470 return
471
cd8a07a7
S
472 str_args = [decodeArgument(a) for a in args]
473
222516d9 474 if exe is None:
cd8a07a7 475 exe = os.path.basename(str_args[0])
222516d9 476
86e5f3ed 477 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')