]> jfr.im git - yt-dlp.git/blob - yt_dlp/downloader/common.py
[compat] Add `functools.cached_property`
[yt-dlp.git] / yt_dlp / downloader / common.py
1 import contextlib
2 import errno
3 import os
4 import random
5 import re
6 import time
7
8 from ..minicurses import (
9 BreaklineStatusPrinter,
10 MultilineLogger,
11 MultilinePrinter,
12 QuietMultilinePrinter,
13 )
14 from ..compat import functools
15 from ..utils import (
16 NUMBER_RE,
17 LockingUnsupportedError,
18 Namespace,
19 decodeArgument,
20 encodeFilename,
21 error_to_compat_str,
22 float_or_none,
23 format_bytes,
24 sanitize_open,
25 shell_quote,
26 timeconvert,
27 timetuple_from_msec,
28 )
29
30
31 class FileDownloader:
32 """File Downloader class.
33
34 File downloader objects are the ones responsible of downloading the
35 actual video file and writing it to disk.
36
37 File downloaders accept a lot of parameters. In order not to saturate
38 the object constructor with arguments, it receives a dictionary of
39 options instead.
40
41 Available options:
42
43 verbose: Print additional info to stdout.
44 quiet: Do not print messages to stdout.
45 ratelimit: Download speed limit, in bytes/sec.
46 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
47 retries: Number of times to retry for HTTP error 5xx
48 file_access_retries: Number of times to retry on file access error
49 buffersize: Size of download buffer in bytes.
50 noresizebuffer: Do not automatically resize the download buffer.
51 continuedl: Try to continue downloads if possible.
52 noprogress: Do not print the progress bar.
53 nopart: Do not use temporary .part files.
54 updatetime: Use the Last-modified header to set output file timestamps.
55 test: Download only first bytes to test the downloader.
56 min_filesize: Skip files smaller than this size
57 max_filesize: Skip files larger than this size
58 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
59 external_downloader_args: A dictionary of downloader keys (in lower case)
60 and a list of additional command-line arguments for the
61 executable. Use 'default' as the name for arguments to be
62 passed to all downloaders. For compatibility with youtube-dl,
63 a single list of args can also be used
64 hls_use_mpegts: Use the mpegts container for HLS videos.
65 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
66 useful for bypassing bandwidth throttling imposed by
67 a webserver (experimental)
68 progress_template: See YoutubeDL.py
69 retry_sleep_functions: See YoutubeDL.py
70
71 Subclasses of this one must re-define the real_download method.
72 """
73
74 _TEST_FILE_SIZE = 10241
75 params = None
76
77 def __init__(self, ydl, params):
78 """Create a FileDownloader object with the given options."""
79 self._set_ydl(ydl)
80 self._progress_hooks = []
81 self.params = params
82 self._prepare_multiline_status()
83 self.add_progress_hook(self.report_progress)
84
85 def _set_ydl(self, ydl):
86 self.ydl = ydl
87
88 for func in (
89 'deprecation_warning',
90 'report_error',
91 'report_file_already_downloaded',
92 'report_warning',
93 'to_console_title',
94 'to_stderr',
95 'trouble',
96 'write_debug',
97 ):
98 if not hasattr(self, func):
99 setattr(self, func, getattr(ydl, func))
100
101 def to_screen(self, *args, **kargs):
102 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
103
104 __to_screen = to_screen
105
106 @functools.cached_property
107 def FD_NAME(self):
108 return re.sub(r'(?<!^)(?=[A-Z])', '_', type(self).__name__[:-2]).lower()
109
110 @staticmethod
111 def format_seconds(seconds):
112 time = timetuple_from_msec(seconds * 1000)
113 if time.hours > 99:
114 return '--:--:--'
115 if not time.hours:
116 return '%02d:%02d' % time[1:-1]
117 return '%02d:%02d:%02d' % time[:-1]
118
119 @staticmethod
120 def calc_percent(byte_counter, data_len):
121 if data_len is None:
122 return None
123 return float(byte_counter) / float(data_len) * 100.0
124
125 @staticmethod
126 def format_percent(percent):
127 if percent is None:
128 return '---.-%'
129 elif percent == 100:
130 return '100%'
131 return '%6s' % ('%3.1f%%' % percent)
132
133 @staticmethod
134 def calc_eta(start, now, total, current):
135 if total is None:
136 return None
137 if now is None:
138 now = time.time()
139 dif = now - start
140 if current == 0 or dif < 0.001: # One millisecond
141 return None
142 rate = float(current) / dif
143 return int((float(total) - float(current)) / rate)
144
145 @staticmethod
146 def format_eta(eta):
147 if eta is None:
148 return '--:--'
149 return FileDownloader.format_seconds(eta)
150
151 @staticmethod
152 def calc_speed(start, now, bytes):
153 dif = now - start
154 if bytes == 0 or dif < 0.001: # One millisecond
155 return None
156 return float(bytes) / dif
157
158 @staticmethod
159 def format_speed(speed):
160 if speed is None:
161 return '%10s' % '---b/s'
162 return '%10s' % ('%s/s' % format_bytes(speed))
163
164 @staticmethod
165 def format_retries(retries):
166 return 'inf' if retries == float('inf') else '%.0f' % retries
167
168 @staticmethod
169 def best_block_size(elapsed_time, bytes):
170 new_min = max(bytes / 2.0, 1.0)
171 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
172 if elapsed_time < 0.001:
173 return int(new_max)
174 rate = bytes / elapsed_time
175 if rate > new_max:
176 return int(new_max)
177 if rate < new_min:
178 return int(new_min)
179 return int(rate)
180
181 @staticmethod
182 def parse_bytes(bytestr):
183 """Parse a string indicating a byte quantity into an integer."""
184 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
185 if matchobj is None:
186 return None
187 number = float(matchobj.group(1))
188 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
189 return int(round(number * multiplier))
190
191 def slow_down(self, start_time, now, byte_counter):
192 """Sleep if the download speed is over the rate limit."""
193 rate_limit = self.params.get('ratelimit')
194 if rate_limit is None or byte_counter == 0:
195 return
196 if now is None:
197 now = time.time()
198 elapsed = now - start_time
199 if elapsed <= 0.0:
200 return
201 speed = float(byte_counter) / elapsed
202 if speed > rate_limit:
203 sleep_time = float(byte_counter) / rate_limit - elapsed
204 if sleep_time > 0:
205 time.sleep(sleep_time)
206
207 def temp_name(self, filename):
208 """Returns a temporary filename for the given filename."""
209 if self.params.get('nopart', False) or filename == '-' or \
210 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
211 return filename
212 return filename + '.part'
213
214 def undo_temp_name(self, filename):
215 if filename.endswith('.part'):
216 return filename[:-len('.part')]
217 return filename
218
219 def ytdl_filename(self, filename):
220 return filename + '.ytdl'
221
222 def wrap_file_access(action, *, fatal=False):
223 def outer(func):
224 def inner(self, *args, **kwargs):
225 file_access_retries = self.params.get('file_access_retries', 0)
226 retry = 0
227 while True:
228 try:
229 return func(self, *args, **kwargs)
230 except OSError as err:
231 retry = retry + 1
232 if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
233 if not fatal:
234 self.report_error(f'unable to {action} file: {err}')
235 return
236 raise
237 self.to_screen(
238 f'[download] Unable to {action} file due to file access error. '
239 f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
240 if not self.sleep_retry('file_access', retry):
241 time.sleep(0.01)
242 return inner
243 return outer
244
245 @wrap_file_access('open', fatal=True)
246 def sanitize_open(self, filename, open_mode):
247 f, filename = sanitize_open(filename, open_mode)
248 if not getattr(f, 'locked', None):
249 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
250 return f, filename
251
252 @wrap_file_access('remove')
253 def try_remove(self, filename):
254 os.remove(filename)
255
256 @wrap_file_access('rename')
257 def try_rename(self, old_filename, new_filename):
258 if old_filename == new_filename:
259 return
260 os.replace(old_filename, new_filename)
261
262 def try_utime(self, filename, last_modified_hdr):
263 """Try to set the last-modified time of the given file."""
264 if last_modified_hdr is None:
265 return
266 if not os.path.isfile(encodeFilename(filename)):
267 return
268 timestr = last_modified_hdr
269 if timestr is None:
270 return
271 filetime = timeconvert(timestr)
272 if filetime is None:
273 return filetime
274 # Ignore obviously invalid dates
275 if filetime == 0:
276 return
277 with contextlib.suppress(Exception):
278 os.utime(filename, (time.time(), filetime))
279 return filetime
280
281 def report_destination(self, filename):
282 """Report destination filename."""
283 self.to_screen('[download] Destination: ' + filename)
284
285 def _prepare_multiline_status(self, lines=1):
286 if self.params.get('noprogress'):
287 self._multiline = QuietMultilinePrinter()
288 elif self.ydl.params.get('logger'):
289 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
290 elif self.params.get('progress_with_newline'):
291 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.screen, lines)
292 else:
293 self._multiline = MultilinePrinter(self.ydl._out_files.screen, lines, not self.params.get('quiet'))
294 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
295
296 def _finish_multiline_status(self):
297 self._multiline.end()
298
299 ProgressStyles = Namespace(
300 downloaded_bytes='light blue',
301 percent='light blue',
302 eta='yellow',
303 speed='green',
304 elapsed='bold white',
305 total_bytes='',
306 total_bytes_estimate='',
307 )
308
309 def _report_progress_status(self, s, default_template):
310 for name, style in self.ProgressStyles:
311 name = f'_{name}_str'
312 if name not in s:
313 continue
314 s[name] = self._format_progress(s[name], style)
315 s['_default_template'] = default_template % s
316
317 progress_dict = s.copy()
318 progress_dict.pop('info_dict')
319 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
320
321 progress_template = self.params.get('progress_template', {})
322 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
323 progress_template.get('download') or '[download] %(progress._default_template)s',
324 progress_dict), s.get('progress_idx') or 0)
325 self.to_console_title(self.ydl.evaluate_outtmpl(
326 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
327 progress_dict))
328
329 def _format_progress(self, *args, **kwargs):
330 return self.ydl._format_text(
331 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
332
333 def report_progress(self, s):
334 if s['status'] == 'finished':
335 if self.params.get('noprogress'):
336 self.to_screen('[download] Download completed')
337 msg_template = '100%%'
338 if s.get('total_bytes') is not None:
339 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
340 msg_template += ' of %(_total_bytes_str)s'
341 if s.get('elapsed') is not None:
342 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
343 msg_template += ' in %(_elapsed_str)s'
344 s['_percent_str'] = self.format_percent(100)
345 self._report_progress_status(s, msg_template)
346 return
347
348 if s['status'] != 'downloading':
349 return
350
351 if s.get('eta') is not None:
352 s['_eta_str'] = self.format_eta(s['eta'])
353 else:
354 s['_eta_str'] = 'Unknown'
355
356 if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
357 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
358 elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
359 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
360 else:
361 if s.get('downloaded_bytes') == 0:
362 s['_percent_str'] = self.format_percent(0)
363 else:
364 s['_percent_str'] = 'Unknown %'
365
366 if s.get('speed') is not None:
367 s['_speed_str'] = self.format_speed(s['speed'])
368 else:
369 s['_speed_str'] = 'Unknown speed'
370
371 if s.get('total_bytes') is not None:
372 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
373 msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
374 elif s.get('total_bytes_estimate') is not None:
375 s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
376 msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
377 else:
378 if s.get('downloaded_bytes') is not None:
379 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
380 if s.get('elapsed'):
381 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
382 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
383 else:
384 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
385 else:
386 msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
387 if s.get('fragment_index') and s.get('fragment_count'):
388 msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
389 elif s.get('fragment_index'):
390 msg_template += ' (frag %(fragment_index)s)'
391 self._report_progress_status(s, msg_template)
392
393 def report_resuming_byte(self, resume_len):
394 """Report attempt to resume at given byte."""
395 self.to_screen('[download] Resuming download at byte %s' % resume_len)
396
397 def report_retry(self, err, count, retries):
398 """Report retry in case of HTTP error 5xx"""
399 self.__to_screen(
400 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
401 % (error_to_compat_str(err), count, self.format_retries(retries)))
402 self.sleep_retry('http', count)
403
404 def report_unable_to_resume(self):
405 """Report it was impossible to resume download."""
406 self.to_screen('[download] Unable to resume')
407
408 def sleep_retry(self, retry_type, count):
409 sleep_func = self.params.get('retry_sleep_functions', {}).get(retry_type)
410 delay = float_or_none(sleep_func(n=count - 1)) if sleep_func else None
411 if delay:
412 self.__to_screen(f'Sleeping {delay:.2f} seconds ...')
413 time.sleep(delay)
414 return sleep_func is not None
415
416 @staticmethod
417 def supports_manifest(manifest):
418 """ Whether the downloader can download the fragments from the manifest.
419 Redefine in subclasses if needed. """
420 pass
421
422 def download(self, filename, info_dict, subtitle=False):
423 """Download to a filename using the info from info_dict
424 Return True on success and False otherwise
425 """
426
427 nooverwrites_and_exists = (
428 not self.params.get('overwrites', True)
429 and os.path.exists(encodeFilename(filename))
430 )
431
432 if not hasattr(filename, 'write'):
433 continuedl_and_exists = (
434 self.params.get('continuedl', True)
435 and os.path.isfile(encodeFilename(filename))
436 and not self.params.get('nopart', False)
437 )
438
439 # Check file already present
440 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
441 self.report_file_already_downloaded(filename)
442 self._hook_progress({
443 'filename': filename,
444 'status': 'finished',
445 'total_bytes': os.path.getsize(encodeFilename(filename)),
446 }, info_dict)
447 self._finish_multiline_status()
448 return True, False
449
450 if subtitle:
451 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
452 else:
453 min_sleep_interval = self.params.get('sleep_interval') or 0
454 sleep_interval = random.uniform(
455 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
456 if sleep_interval > 0:
457 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
458 time.sleep(sleep_interval)
459
460 ret = self.real_download(filename, info_dict)
461 self._finish_multiline_status()
462 return ret, True
463
464 def real_download(self, filename, info_dict):
465 """Real download process. Redefine in subclasses."""
466 raise NotImplementedError('This method must be implemented by subclasses')
467
468 def _hook_progress(self, status, info_dict):
469 if not self._progress_hooks:
470 return
471 status['info_dict'] = info_dict
472 # youtube-dl passes the same status object to all the hooks.
473 # Some third party scripts seems to be relying on this.
474 # So keep this behavior if possible
475 for ph in self._progress_hooks:
476 ph(status)
477
478 def add_progress_hook(self, ph):
479 # See YoutubeDl.py (search for progress_hooks) for a description of
480 # this interface
481 self._progress_hooks.append(ph)
482
483 def _debug_cmd(self, args, exe=None):
484 if not self.params.get('verbose', False):
485 return
486
487 str_args = [decodeArgument(a) for a in args]
488
489 if exe is None:
490 exe = os.path.basename(str_args[0])
491
492 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')