]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[cleanup] Misc fixes (see desc)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
3bc2ddcc 3import os
f8271158 4import random
3bc2ddcc 5import re
3bc2ddcc
JMF
6import time
7
f8271158 8from ..minicurses import (
9 BreaklineStatusPrinter,
10 MultilineLogger,
11 MultilinePrinter,
12 QuietMultilinePrinter,
13)
3bc2ddcc 14from ..utils import (
1d485a1a 15 NUMBER_RE,
f8271158 16 LockingUnsupportedError,
19a03940 17 Namespace,
1a8cc837 18 classproperty,
1433734c 19 decodeArgument,
3bc2ddcc 20 encodeFilename,
9b9c5355 21 error_to_compat_str,
666c36d5 22 float_or_none,
3bc2ddcc 23 format_bytes,
11233f2a 24 join_nonempty,
205a0654 25 sanitize_open,
1433734c 26 shell_quote,
e3ced9ed 27 timeconvert,
aa7785f8 28 timetuple_from_msec,
11233f2a 29 try_call,
3bc2ddcc
JMF
30)
31
32
86e5f3ed 33class FileDownloader:
3bc2ddcc
JMF
34 """File Downloader class.
35
36 File downloader objects are the ones responsible of downloading the
37 actual video file and writing it to disk.
38
39 File downloaders accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead.
42
43 Available options:
44
881e6a1f
PH
45 verbose: Print additional info to stdout.
46 quiet: Do not print messages to stdout.
47 ratelimit: Download speed limit, in bytes/sec.
c487cf00 48 continuedl: Attempt to continue downloads if possible
51d9739f 49 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 50 retries: Number of times to retry for HTTP error 5xx
205a0654 51 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
52 buffersize: Size of download buffer in bytes.
53 noresizebuffer: Do not automatically resize the download buffer.
54 continuedl: Try to continue downloads if possible.
55 noprogress: Do not print the progress bar.
881e6a1f
PH
56 nopart: Do not use temporary .part files.
57 updatetime: Use the Last-modified header to set output file timestamps.
58 test: Download only first bytes to test the downloader.
59 min_filesize: Skip files smaller than this size
60 max_filesize: Skip files larger than this size
61 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 62 external_downloader_args: A dictionary of downloader keys (in lower case)
63 and a list of additional command-line arguments for the
64 executable. Use 'default' as the name for arguments to be
65 passed to all downloaders. For compatibility with youtube-dl,
66 a single list of args can also be used
7d106a65 67 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 68 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
69 useful for bypassing bandwidth throttling imposed by
70 a webserver (experimental)
819e0531 71 progress_template: See YoutubeDL.py
23326151 72 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
73
74 Subclasses of this one must re-define the real_download method.
75 """
76
b686fc18 77 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
78 params = None
79
80 def __init__(self, ydl, params):
81 """Create a FileDownloader object with the given options."""
19a03940 82 self._set_ydl(ydl)
3bc2ddcc
JMF
83 self._progress_hooks = []
84 self.params = params
819e0531 85 self._prepare_multiline_status()
5cda4eda 86 self.add_progress_hook(self.report_progress)
3bc2ddcc 87
19a03940 88 def _set_ydl(self, ydl):
89 self.ydl = ydl
90
91 for func in (
92 'deprecation_warning',
93 'report_error',
94 'report_file_already_downloaded',
95 'report_warning',
96 'to_console_title',
97 'to_stderr',
98 'trouble',
99 'write_debug',
100 ):
1d485a1a 101 if not hasattr(self, func):
102 setattr(self, func, getattr(ydl, func))
19a03940 103
104 def to_screen(self, *args, **kargs):
105 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
106
23326151 107 __to_screen = to_screen
108
1a8cc837 109 @classproperty
110 def FD_NAME(cls):
111 return re.sub(r'(?<!^)(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 112
3bc2ddcc
JMF
113 @staticmethod
114 def format_seconds(seconds):
11233f2a 115 if seconds is None:
116 return ' Unknown'
aa7785f8 117 time = timetuple_from_msec(seconds * 1000)
118 if time.hours > 99:
3bc2ddcc 119 return '--:--:--'
aa7785f8 120 if not time.hours:
121 return '%02d:%02d' % time[1:-1]
122 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 123
11233f2a 124 format_eta = format_seconds
125
3bc2ddcc
JMF
126 @staticmethod
127 def calc_percent(byte_counter, data_len):
128 if data_len is None:
129 return None
130 return float(byte_counter) / float(data_len) * 100.0
131
132 @staticmethod
133 def format_percent(percent):
11233f2a 134 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
135
136 @staticmethod
137 def calc_eta(start, now, total, current):
138 if total is None:
139 return None
c7667c2d
S
140 if now is None:
141 now = time.time()
3bc2ddcc 142 dif = now - start
5f6a1245 143 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
144 return None
145 rate = float(current) / dif
146 return int((float(total) - float(current)) / rate)
147
3bc2ddcc
JMF
148 @staticmethod
149 def calc_speed(start, now, bytes):
150 dif = now - start
5f6a1245 151 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
152 return None
153 return float(bytes) / dif
154
155 @staticmethod
156 def format_speed(speed):
11233f2a 157 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 158
617e58d8
S
159 @staticmethod
160 def format_retries(retries):
11233f2a 161 return 'inf' if retries == float('inf') else int(retries)
617e58d8 162
3bc2ddcc
JMF
163 @staticmethod
164 def best_block_size(elapsed_time, bytes):
165 new_min = max(bytes / 2.0, 1.0)
5f6a1245 166 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
167 if elapsed_time < 0.001:
168 return int(new_max)
169 rate = bytes / elapsed_time
170 if rate > new_max:
171 return int(new_max)
172 if rate < new_min:
173 return int(new_min)
174 return int(rate)
175
176 @staticmethod
177 def parse_bytes(bytestr):
178 """Parse a string indicating a byte quantity into an integer."""
1d485a1a 179 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
3bc2ddcc
JMF
180 if matchobj is None:
181 return None
182 number = float(matchobj.group(1))
183 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
184 return int(round(number * multiplier))
185
c7667c2d 186 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 187 """Sleep if the download speed is over the rate limit."""
d800609c 188 rate_limit = self.params.get('ratelimit')
8a77e5e6 189 if rate_limit is None or byte_counter == 0:
3bc2ddcc 190 return
c7667c2d
S
191 if now is None:
192 now = time.time()
3bc2ddcc
JMF
193 elapsed = now - start_time
194 if elapsed <= 0.0:
195 return
196 speed = float(byte_counter) / elapsed
8a77e5e6 197 if speed > rate_limit:
1a01639b
S
198 sleep_time = float(byte_counter) / rate_limit - elapsed
199 if sleep_time > 0:
200 time.sleep(sleep_time)
3bc2ddcc
JMF
201
202 def temp_name(self, filename):
203 """Returns a temporary filename for the given filename."""
b6b70730 204 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
205 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
206 return filename
b6b70730 207 return filename + '.part'
3bc2ddcc
JMF
208
209 def undo_temp_name(self, filename):
b6b70730
PH
210 if filename.endswith('.part'):
211 return filename[:-len('.part')]
3bc2ddcc
JMF
212 return filename
213
ea0c2f21
RA
214 def ytdl_filename(self, filename):
215 return filename + '.ytdl'
216
45806d44
EH
217 def wrap_file_access(action, *, fatal=False):
218 def outer(func):
219 def inner(self, *args, **kwargs):
220 file_access_retries = self.params.get('file_access_retries', 0)
221 retry = 0
222 while True:
223 try:
224 return func(self, *args, **kwargs)
86e5f3ed 225 except OSError as err:
45806d44
EH
226 retry = retry + 1
227 if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
228 if not fatal:
229 self.report_error(f'unable to {action} file: {err}')
230 return
231 raise
232 self.to_screen(
233 f'[download] Unable to {action} file due to file access error. '
234 f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
23326151 235 if not self.sleep_retry('file_access', retry):
236 time.sleep(0.01)
45806d44
EH
237 return inner
238 return outer
239
240 @wrap_file_access('open', fatal=True)
205a0654 241 def sanitize_open(self, filename, open_mode):
0edb3e33 242 f, filename = sanitize_open(filename, open_mode)
243 if not getattr(f, 'locked', None):
244 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
245 return f, filename
205a0654 246
45806d44
EH
247 @wrap_file_access('remove')
248 def try_remove(self, filename):
249 os.remove(filename)
250
251 @wrap_file_access('rename')
3bc2ddcc 252 def try_rename(self, old_filename, new_filename):
f775c831 253 if old_filename == new_filename:
254 return
45806d44 255 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
256
257 def try_utime(self, filename, last_modified_hdr):
258 """Try to set the last-modified time of the given file."""
259 if last_modified_hdr is None:
260 return
261 if not os.path.isfile(encodeFilename(filename)):
262 return
263 timestr = last_modified_hdr
264 if timestr is None:
265 return
266 filetime = timeconvert(timestr)
267 if filetime is None:
268 return filetime
269 # Ignore obviously invalid dates
270 if filetime == 0:
271 return
19a03940 272 with contextlib.suppress(Exception):
3bc2ddcc 273 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
274 return filetime
275
276 def report_destination(self, filename):
277 """Report destination filename."""
b6b70730 278 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 279
819e0531 280 def _prepare_multiline_status(self, lines=1):
281 if self.params.get('noprogress'):
bd50a52b 282 self._multiline = QuietMultilinePrinter()
819e0531 283 elif self.ydl.params.get('logger'):
284 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
285 elif self.params.get('progress_with_newline'):
8a7f6d7a 286 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 287 else:
8a7f6d7a 288 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 289 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
290
291 def _finish_multiline_status(self):
819e0531 292 self._multiline.end()
293
19a03940 294 ProgressStyles = Namespace(
295 downloaded_bytes='light blue',
296 percent='light blue',
297 eta='yellow',
298 speed='green',
299 elapsed='bold white',
300 total_bytes='',
301 total_bytes_estimate='',
302 )
7578d77d 303
304 def _report_progress_status(self, s, default_template):
64fa820c 305 for name, style in self.ProgressStyles.items_:
7578d77d 306 name = f'_{name}_str'
307 if name not in s:
308 continue
309 s[name] = self._format_progress(s[name], style)
310 s['_default_template'] = default_template % s
311
819e0531 312 progress_dict = s.copy()
313 progress_dict.pop('info_dict')
314 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
315
316 progress_template = self.params.get('progress_template', {})
317 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
318 progress_template.get('download') or '[download] %(progress._default_template)s',
319 progress_dict), s.get('progress_idx') or 0)
320 self.to_console_title(self.ydl.evaluate_outtmpl(
321 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
322 progress_dict))
3bc2ddcc 323
7578d77d 324 def _format_progress(self, *args, **kwargs):
325 return self.ydl._format_text(
326 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
327
5cda4eda 328 def report_progress(self, s):
11233f2a 329 def with_fields(*tups, default=''):
330 for *fields, tmpl in tups:
331 if all(s.get(f) is not None for f in fields):
332 return tmpl
333 return default
334
5cda4eda 335 if s['status'] == 'finished':
819e0531 336 if self.params.get('noprogress'):
5cda4eda 337 self.to_screen('[download] Download completed')
11233f2a 338 s.update({
339 '_total_bytes_str': format_bytes(s.get('total_bytes')),
340 '_elapsed_str': self.format_seconds(s.get('elapsed')),
341 '_percent_str': self.format_percent(100),
342 })
343 self._report_progress_status(s, join_nonempty(
344 '100%%',
345 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
346 with_fields(('elapsed', 'in %(_elapsed_str)s')),
347 delim=' '))
5cda4eda
PH
348
349 if s['status'] != 'downloading':
350 return
351
11233f2a 352 s.update({
353 '_eta_str': self.format_eta(s.get('eta')),
354 '_speed_str': self.format_speed(s.get('speed')),
355 '_percent_str': self.format_percent(try_call(
356 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
357 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
358 lambda: s['downloaded_bytes'] == 0 and 0)),
359 '_total_bytes_str': format_bytes(s.get('total_bytes')),
360 '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')),
361 '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')),
362 '_elapsed_str': self.format_seconds(s.get('elapsed')),
363 })
364
365 msg_template = with_fields(
366 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
367 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
368 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
369 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
370 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
371
372 msg_template += with_fields(
373 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
374 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 375 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
376
377 def report_resuming_byte(self, resume_len):
378 """Report attempt to resume at given byte."""
b6b70730 379 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 380
a3c3a1e1 381 def report_retry(self, err, count, retries):
3bc2ddcc 382 """Report retry in case of HTTP error 5xx"""
23326151 383 self.__to_screen(
5ef7d9bd 384 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
a3c3a1e1 385 % (error_to_compat_str(err), count, self.format_retries(retries)))
23326151 386 self.sleep_retry('http', count)
3bc2ddcc 387
3bc2ddcc
JMF
388 def report_unable_to_resume(self):
389 """Report it was impossible to resume download."""
b6b70730 390 self.to_screen('[download] Unable to resume')
3bc2ddcc 391
23326151 392 def sleep_retry(self, retry_type, count):
393 sleep_func = self.params.get('retry_sleep_functions', {}).get(retry_type)
666c36d5 394 delay = float_or_none(sleep_func(n=count - 1)) if sleep_func else None
23326151 395 if delay:
666c36d5 396 self.__to_screen(f'Sleeping {delay:.2f} seconds ...')
23326151 397 time.sleep(delay)
398 return sleep_func is not None
399
0a473f2f 400 @staticmethod
401 def supports_manifest(manifest):
402 """ Whether the downloader can download the fragments from the manifest.
403 Redefine in subclasses if needed. """
404 pass
405
9f448fcb 406 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
407 """Download to a filename using the info from info_dict
408 Return True on success and False otherwise
409 """
5f0d813d 410
4340deca 411 nooverwrites_and_exists = (
9cc1a313 412 not self.params.get('overwrites', True)
3089bc74 413 and os.path.exists(encodeFilename(filename))
4340deca
P
414 )
415
75a24854
RA
416 if not hasattr(filename, 'write'):
417 continuedl_and_exists = (
3089bc74
S
418 self.params.get('continuedl', True)
419 and os.path.isfile(encodeFilename(filename))
420 and not self.params.get('nopart', False)
75a24854
RA
421 )
422
423 # Check file already present
424 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
425 self.report_file_already_downloaded(filename)
426 self._hook_progress({
427 'filename': filename,
428 'status': 'finished',
429 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 430 }, info_dict)
b69fd25c 431 self._finish_multiline_status()
a9e7f546 432 return True, False
dabc1273 433
19a03940 434 if subtitle:
435 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 436 else:
19a03940 437 min_sleep_interval = self.params.get('sleep_interval') or 0
438 sleep_interval = random.uniform(
43cc91ad 439 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 440 if sleep_interval > 0:
441 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
442 time.sleep(sleep_interval)
443
819e0531 444 ret = self.real_download(filename, info_dict)
445 self._finish_multiline_status()
446 return ret, True
3bc2ddcc
JMF
447
448 def real_download(self, filename, info_dict):
449 """Real download process. Redefine in subclasses."""
b6b70730 450 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 451
3ba7740d 452 def _hook_progress(self, status, info_dict):
453 if not self._progress_hooks:
454 return
03b4de72 455 status['info_dict'] = info_dict
f45e6c11 456 # youtube-dl passes the same status object to all the hooks.
457 # Some third party scripts seems to be relying on this.
458 # So keep this behavior if possible
3bc2ddcc 459 for ph in self._progress_hooks:
f45e6c11 460 ph(status)
3bc2ddcc
JMF
461
462 def add_progress_hook(self, ph):
71b640cc
PH
463 # See YoutubeDl.py (search for progress_hooks) for a description of
464 # this interface
3bc2ddcc 465 self._progress_hooks.append(ph)
222516d9 466
cd8a07a7 467 def _debug_cmd(self, args, exe=None):
222516d9
PH
468 if not self.params.get('verbose', False):
469 return
470
cd8a07a7
S
471 str_args = [decodeArgument(a) for a in args]
472
222516d9 473 if exe is None:
cd8a07a7 474 exe = os.path.basename(str_args[0])
222516d9 475
86e5f3ed 476 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')