]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[compat] Split into sub-modules (#2173)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
3bc2ddcc 3import os
f8271158 4import random
3bc2ddcc 5import re
3bc2ddcc
JMF
6import time
7
f8271158 8from ..minicurses import (
9 BreaklineStatusPrinter,
10 MultilineLogger,
11 MultilinePrinter,
12 QuietMultilinePrinter,
13)
3bc2ddcc 14from ..utils import (
f8271158 15 LockingUnsupportedError,
19a03940 16 Namespace,
1433734c 17 decodeArgument,
3bc2ddcc 18 encodeFilename,
9b9c5355 19 error_to_compat_str,
3bc2ddcc 20 format_bytes,
205a0654 21 sanitize_open,
1433734c 22 shell_quote,
e3ced9ed 23 timeconvert,
aa7785f8 24 timetuple_from_msec,
3bc2ddcc
JMF
25)
26
27
86e5f3ed 28class FileDownloader:
3bc2ddcc
JMF
29 """File Downloader class.
30
31 File downloader objects are the ones responsible of downloading the
32 actual video file and writing it to disk.
33
34 File downloaders accept a lot of parameters. In order not to saturate
35 the object constructor with arguments, it receives a dictionary of
36 options instead.
37
38 Available options:
39
881e6a1f
PH
40 verbose: Print additional info to stdout.
41 quiet: Do not print messages to stdout.
42 ratelimit: Download speed limit, in bytes/sec.
51d9739f 43 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 44 retries: Number of times to retry for HTTP error 5xx
205a0654 45 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
46 buffersize: Size of download buffer in bytes.
47 noresizebuffer: Do not automatically resize the download buffer.
48 continuedl: Try to continue downloads if possible.
49 noprogress: Do not print the progress bar.
881e6a1f
PH
50 nopart: Do not use temporary .part files.
51 updatetime: Use the Last-modified header to set output file timestamps.
52 test: Download only first bytes to test the downloader.
53 min_filesize: Skip files smaller than this size
54 max_filesize: Skip files larger than this size
55 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 56 external_downloader_args: A dictionary of downloader keys (in lower case)
57 and a list of additional command-line arguments for the
58 executable. Use 'default' as the name for arguments to be
59 passed to all downloaders. For compatibility with youtube-dl,
60 a single list of args can also be used
7d106a65 61 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 62 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
63 useful for bypassing bandwidth throttling imposed by
64 a webserver (experimental)
819e0531 65 progress_template: See YoutubeDL.py
3bc2ddcc
JMF
66
67 Subclasses of this one must re-define the real_download method.
68 """
69
b686fc18 70 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
71 params = None
72
73 def __init__(self, ydl, params):
74 """Create a FileDownloader object with the given options."""
19a03940 75 self._set_ydl(ydl)
3bc2ddcc
JMF
76 self._progress_hooks = []
77 self.params = params
819e0531 78 self._prepare_multiline_status()
5cda4eda 79 self.add_progress_hook(self.report_progress)
3bc2ddcc 80
19a03940 81 def _set_ydl(self, ydl):
82 self.ydl = ydl
83
84 for func in (
85 'deprecation_warning',
86 'report_error',
87 'report_file_already_downloaded',
88 'report_warning',
89 'to_console_title',
90 'to_stderr',
91 'trouble',
92 'write_debug',
93 ):
94 setattr(self, func, getattr(ydl, func))
95
96 def to_screen(self, *args, **kargs):
97 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
98
3bc2ddcc
JMF
99 @staticmethod
100 def format_seconds(seconds):
aa7785f8 101 time = timetuple_from_msec(seconds * 1000)
102 if time.hours > 99:
3bc2ddcc 103 return '--:--:--'
aa7785f8 104 if not time.hours:
105 return '%02d:%02d' % time[1:-1]
106 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc
JMF
107
108 @staticmethod
109 def calc_percent(byte_counter, data_len):
110 if data_len is None:
111 return None
112 return float(byte_counter) / float(data_len) * 100.0
113
114 @staticmethod
115 def format_percent(percent):
116 if percent is None:
117 return '---.-%'
f304da8a 118 elif percent == 100:
119 return '100%'
3bc2ddcc
JMF
120 return '%6s' % ('%3.1f%%' % percent)
121
122 @staticmethod
123 def calc_eta(start, now, total, current):
124 if total is None:
125 return None
c7667c2d
S
126 if now is None:
127 now = time.time()
3bc2ddcc 128 dif = now - start
5f6a1245 129 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
130 return None
131 rate = float(current) / dif
132 return int((float(total) - float(current)) / rate)
133
134 @staticmethod
135 def format_eta(eta):
136 if eta is None:
137 return '--:--'
138 return FileDownloader.format_seconds(eta)
139
140 @staticmethod
141 def calc_speed(start, now, bytes):
142 dif = now - start
5f6a1245 143 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
144 return None
145 return float(bytes) / dif
146
147 @staticmethod
148 def format_speed(speed):
149 if speed is None:
150 return '%10s' % '---b/s'
151 return '%10s' % ('%s/s' % format_bytes(speed))
152
617e58d8
S
153 @staticmethod
154 def format_retries(retries):
155 return 'inf' if retries == float('inf') else '%.0f' % retries
156
3bc2ddcc
JMF
157 @staticmethod
158 def best_block_size(elapsed_time, bytes):
159 new_min = max(bytes / 2.0, 1.0)
5f6a1245 160 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
161 if elapsed_time < 0.001:
162 return int(new_max)
163 rate = bytes / elapsed_time
164 if rate > new_max:
165 return int(new_max)
166 if rate < new_min:
167 return int(new_min)
168 return int(rate)
169
170 @staticmethod
171 def parse_bytes(bytestr):
172 """Parse a string indicating a byte quantity into an integer."""
173 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
174 if matchobj is None:
175 return None
176 number = float(matchobj.group(1))
177 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
178 return int(round(number * multiplier))
179
c7667c2d 180 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 181 """Sleep if the download speed is over the rate limit."""
d800609c 182 rate_limit = self.params.get('ratelimit')
8a77e5e6 183 if rate_limit is None or byte_counter == 0:
3bc2ddcc 184 return
c7667c2d
S
185 if now is None:
186 now = time.time()
3bc2ddcc
JMF
187 elapsed = now - start_time
188 if elapsed <= 0.0:
189 return
190 speed = float(byte_counter) / elapsed
8a77e5e6 191 if speed > rate_limit:
1a01639b
S
192 sleep_time = float(byte_counter) / rate_limit - elapsed
193 if sleep_time > 0:
194 time.sleep(sleep_time)
3bc2ddcc
JMF
195
196 def temp_name(self, filename):
197 """Returns a temporary filename for the given filename."""
b6b70730 198 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
199 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
200 return filename
b6b70730 201 return filename + '.part'
3bc2ddcc
JMF
202
203 def undo_temp_name(self, filename):
b6b70730
PH
204 if filename.endswith('.part'):
205 return filename[:-len('.part')]
3bc2ddcc
JMF
206 return filename
207
ea0c2f21
RA
208 def ytdl_filename(self, filename):
209 return filename + '.ytdl'
210
45806d44
EH
211 def wrap_file_access(action, *, fatal=False):
212 def outer(func):
213 def inner(self, *args, **kwargs):
214 file_access_retries = self.params.get('file_access_retries', 0)
215 retry = 0
216 while True:
217 try:
218 return func(self, *args, **kwargs)
86e5f3ed 219 except OSError as err:
45806d44
EH
220 retry = retry + 1
221 if retry > file_access_retries or err.errno not in (errno.EACCES, errno.EINVAL):
222 if not fatal:
223 self.report_error(f'unable to {action} file: {err}')
224 return
225 raise
226 self.to_screen(
227 f'[download] Unable to {action} file due to file access error. '
228 f'Retrying (attempt {retry} of {self.format_retries(file_access_retries)}) ...')
229 time.sleep(0.01)
230 return inner
231 return outer
232
233 @wrap_file_access('open', fatal=True)
205a0654 234 def sanitize_open(self, filename, open_mode):
0edb3e33 235 f, filename = sanitize_open(filename, open_mode)
236 if not getattr(f, 'locked', None):
237 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
238 return f, filename
205a0654 239
45806d44
EH
240 @wrap_file_access('remove')
241 def try_remove(self, filename):
242 os.remove(filename)
243
244 @wrap_file_access('rename')
3bc2ddcc 245 def try_rename(self, old_filename, new_filename):
f775c831 246 if old_filename == new_filename:
247 return
45806d44 248 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
249
250 def try_utime(self, filename, last_modified_hdr):
251 """Try to set the last-modified time of the given file."""
252 if last_modified_hdr is None:
253 return
254 if not os.path.isfile(encodeFilename(filename)):
255 return
256 timestr = last_modified_hdr
257 if timestr is None:
258 return
259 filetime = timeconvert(timestr)
260 if filetime is None:
261 return filetime
262 # Ignore obviously invalid dates
263 if filetime == 0:
264 return
19a03940 265 with contextlib.suppress(Exception):
3bc2ddcc 266 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
267 return filetime
268
269 def report_destination(self, filename):
270 """Report destination filename."""
b6b70730 271 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 272
819e0531 273 def _prepare_multiline_status(self, lines=1):
274 if self.params.get('noprogress'):
bd50a52b 275 self._multiline = QuietMultilinePrinter()
819e0531 276 elif self.ydl.params.get('logger'):
277 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
278 elif self.params.get('progress_with_newline'):
cf4f42cb 279 self._multiline = BreaklineStatusPrinter(self.ydl._out_files['screen'], lines)
bd50a52b 280 else:
cf4f42cb 281 self._multiline = MultilinePrinter(self.ydl._out_files['screen'], lines, not self.params.get('quiet'))
7578d77d 282 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
283
284 def _finish_multiline_status(self):
819e0531 285 self._multiline.end()
286
19a03940 287 ProgressStyles = Namespace(
288 downloaded_bytes='light blue',
289 percent='light blue',
290 eta='yellow',
291 speed='green',
292 elapsed='bold white',
293 total_bytes='',
294 total_bytes_estimate='',
295 )
7578d77d 296
297 def _report_progress_status(self, s, default_template):
19a03940 298 for name, style in self.ProgressStyles._asdict().items():
7578d77d 299 name = f'_{name}_str'
300 if name not in s:
301 continue
302 s[name] = self._format_progress(s[name], style)
303 s['_default_template'] = default_template % s
304
819e0531 305 progress_dict = s.copy()
306 progress_dict.pop('info_dict')
307 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
308
309 progress_template = self.params.get('progress_template', {})
310 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
311 progress_template.get('download') or '[download] %(progress._default_template)s',
312 progress_dict), s.get('progress_idx') or 0)
313 self.to_console_title(self.ydl.evaluate_outtmpl(
314 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
315 progress_dict))
3bc2ddcc 316
7578d77d 317 def _format_progress(self, *args, **kwargs):
318 return self.ydl._format_text(
319 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
320
5cda4eda
PH
321 def report_progress(self, s):
322 if s['status'] == 'finished':
819e0531 323 if self.params.get('noprogress'):
5cda4eda 324 self.to_screen('[download] Download completed')
819e0531 325 msg_template = '100%%'
326 if s.get('total_bytes') is not None:
327 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
328 msg_template += ' of %(_total_bytes_str)s'
329 if s.get('elapsed') is not None:
330 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
331 msg_template += ' in %(_elapsed_str)s'
332 s['_percent_str'] = self.format_percent(100)
f304da8a 333 self._report_progress_status(s, msg_template)
3bc2ddcc 334 return
5cda4eda
PH
335
336 if s['status'] != 'downloading':
337 return
338
339 if s.get('eta') is not None:
340 s['_eta_str'] = self.format_eta(s['eta'])
3bc2ddcc 341 else:
f304da8a 342 s['_eta_str'] = 'Unknown'
3bc2ddcc 343
5cda4eda
PH
344 if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
345 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
346 elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
347 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
348 else:
349 if s.get('downloaded_bytes') == 0:
350 s['_percent_str'] = self.format_percent(0)
351 else:
352 s['_percent_str'] = 'Unknown %'
3bc2ddcc 353
5cda4eda
PH
354 if s.get('speed') is not None:
355 s['_speed_str'] = self.format_speed(s['speed'])
356 else:
357 s['_speed_str'] = 'Unknown speed'
358
359 if s.get('total_bytes') is not None:
360 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
361 msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
362 elif s.get('total_bytes_estimate') is not None:
363 s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
364 msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
3bc2ddcc 365 else:
5cda4eda
PH
366 if s.get('downloaded_bytes') is not None:
367 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
368 if s.get('elapsed'):
369 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
370 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
371 else:
372 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
373 else:
f304da8a 374 msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
a4211baf 375 if s.get('fragment_index') and s.get('fragment_count'):
376 msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
377 elif s.get('fragment_index'):
bd93fd5d 378 msg_template += ' (frag %(fragment_index)s)'
7578d77d 379 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
380
381 def report_resuming_byte(self, resume_len):
382 """Report attempt to resume at given byte."""
b6b70730 383 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 384
a3c3a1e1 385 def report_retry(self, err, count, retries):
3bc2ddcc 386 """Report retry in case of HTTP error 5xx"""
617e58d8 387 self.to_screen(
5ef7d9bd 388 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
a3c3a1e1 389 % (error_to_compat_str(err), count, self.format_retries(retries)))
3bc2ddcc 390
3bc2ddcc
JMF
391 def report_unable_to_resume(self):
392 """Report it was impossible to resume download."""
b6b70730 393 self.to_screen('[download] Unable to resume')
3bc2ddcc 394
0a473f2f 395 @staticmethod
396 def supports_manifest(manifest):
397 """ Whether the downloader can download the fragments from the manifest.
398 Redefine in subclasses if needed. """
399 pass
400
9f448fcb 401 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
402 """Download to a filename using the info from info_dict
403 Return True on success and False otherwise
404 """
5f0d813d 405
4340deca 406 nooverwrites_and_exists = (
9cc1a313 407 not self.params.get('overwrites', True)
3089bc74 408 and os.path.exists(encodeFilename(filename))
4340deca
P
409 )
410
75a24854
RA
411 if not hasattr(filename, 'write'):
412 continuedl_and_exists = (
3089bc74
S
413 self.params.get('continuedl', True)
414 and os.path.isfile(encodeFilename(filename))
415 and not self.params.get('nopart', False)
75a24854
RA
416 )
417
418 # Check file already present
419 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
420 self.report_file_already_downloaded(filename)
421 self._hook_progress({
422 'filename': filename,
423 'status': 'finished',
424 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 425 }, info_dict)
b69fd25c 426 self._finish_multiline_status()
a9e7f546 427 return True, False
dabc1273 428
19a03940 429 if subtitle:
430 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 431 else:
19a03940 432 min_sleep_interval = self.params.get('sleep_interval') or 0
433 sleep_interval = random.uniform(
434 min_sleep_interval, self.params.get('max_sleep_interval', min_sleep_interval))
435 if sleep_interval > 0:
436 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
437 time.sleep(sleep_interval)
438
819e0531 439 ret = self.real_download(filename, info_dict)
440 self._finish_multiline_status()
441 return ret, True
3bc2ddcc
JMF
442
443 def real_download(self, filename, info_dict):
444 """Real download process. Redefine in subclasses."""
b6b70730 445 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 446
3ba7740d 447 def _hook_progress(self, status, info_dict):
448 if not self._progress_hooks:
449 return
03b4de72 450 status['info_dict'] = info_dict
f45e6c11 451 # youtube-dl passes the same status object to all the hooks.
452 # Some third party scripts seems to be relying on this.
453 # So keep this behavior if possible
3bc2ddcc 454 for ph in self._progress_hooks:
f45e6c11 455 ph(status)
3bc2ddcc
JMF
456
457 def add_progress_hook(self, ph):
71b640cc
PH
458 # See YoutubeDl.py (search for progress_hooks) for a description of
459 # this interface
3bc2ddcc 460 self._progress_hooks.append(ph)
222516d9 461
cd8a07a7 462 def _debug_cmd(self, args, exe=None):
222516d9
PH
463 if not self.params.get('verbose', False):
464 return
465
cd8a07a7
S
466 str_args = [decodeArgument(a) for a in args]
467
222516d9 468 if exe is None:
cd8a07a7 469 exe = os.path.basename(str_args[0])
222516d9 470
86e5f3ed 471 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')