]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[extractor/AmazonStore] Fix JSON extraction (#5111)
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
1d485a1a 18 NUMBER_RE,
f8271158 19 LockingUnsupportedError,
19a03940 20 Namespace,
be5c1ae8 21 RetryManager,
1a8cc837 22 classproperty,
1433734c 23 decodeArgument,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
205a0654 27 sanitize_open,
1433734c 28 shell_quote,
e3ced9ed 29 timeconvert,
aa7785f8 30 timetuple_from_msec,
11233f2a 31 try_call,
3bc2ddcc
JMF
32)
33
34
86e5f3ed 35class FileDownloader:
3bc2ddcc
JMF
36 """File Downloader class.
37
38 File downloader objects are the ones responsible of downloading the
39 actual video file and writing it to disk.
40
41 File downloaders accept a lot of parameters. In order not to saturate
42 the object constructor with arguments, it receives a dictionary of
43 options instead.
44
45 Available options:
46
881e6a1f
PH
47 verbose: Print additional info to stdout.
48 quiet: Do not print messages to stdout.
49 ratelimit: Download speed limit, in bytes/sec.
c487cf00 50 continuedl: Attempt to continue downloads if possible
51d9739f 51 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 52 retries: Number of times to retry for HTTP error 5xx
205a0654 53 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
54 buffersize: Size of download buffer in bytes.
55 noresizebuffer: Do not automatically resize the download buffer.
56 continuedl: Try to continue downloads if possible.
57 noprogress: Do not print the progress bar.
881e6a1f
PH
58 nopart: Do not use temporary .part files.
59 updatetime: Use the Last-modified header to set output file timestamps.
60 test: Download only first bytes to test the downloader.
61 min_filesize: Skip files smaller than this size
62 max_filesize: Skip files larger than this size
63 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 64 external_downloader_args: A dictionary of downloader keys (in lower case)
65 and a list of additional command-line arguments for the
66 executable. Use 'default' as the name for arguments to be
67 passed to all downloaders. For compatibility with youtube-dl,
68 a single list of args can also be used
7d106a65 69 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 70 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
71 useful for bypassing bandwidth throttling imposed by
72 a webserver (experimental)
819e0531 73 progress_template: See YoutubeDL.py
23326151 74 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
75
76 Subclasses of this one must re-define the real_download method.
77 """
78
b686fc18 79 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
80 params = None
81
82 def __init__(self, ydl, params):
83 """Create a FileDownloader object with the given options."""
19a03940 84 self._set_ydl(ydl)
3bc2ddcc
JMF
85 self._progress_hooks = []
86 self.params = params
819e0531 87 self._prepare_multiline_status()
5cda4eda 88 self.add_progress_hook(self.report_progress)
3bc2ddcc 89
19a03940 90 def _set_ydl(self, ydl):
91 self.ydl = ydl
92
93 for func in (
94 'deprecation_warning',
da4db748 95 'deprecated_feature',
19a03940 96 'report_error',
97 'report_file_already_downloaded',
98 'report_warning',
99 'to_console_title',
100 'to_stderr',
101 'trouble',
102 'write_debug',
103 ):
1d485a1a 104 if not hasattr(self, func):
105 setattr(self, func, getattr(ydl, func))
19a03940 106
107 def to_screen(self, *args, **kargs):
108 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
109
23326151 110 __to_screen = to_screen
111
1a8cc837 112 @classproperty
113 def FD_NAME(cls):
998a3cae 114 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 115
3bc2ddcc
JMF
116 @staticmethod
117 def format_seconds(seconds):
11233f2a 118 if seconds is None:
119 return ' Unknown'
aa7785f8 120 time = timetuple_from_msec(seconds * 1000)
121 if time.hours > 99:
3bc2ddcc 122 return '--:--:--'
aa7785f8 123 if not time.hours:
deae7c17 124 return ' %02d:%02d' % time[1:-1]
aa7785f8 125 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 126
11233f2a 127 format_eta = format_seconds
128
3bc2ddcc
JMF
129 @staticmethod
130 def calc_percent(byte_counter, data_len):
131 if data_len is None:
132 return None
133 return float(byte_counter) / float(data_len) * 100.0
134
135 @staticmethod
136 def format_percent(percent):
11233f2a 137 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc
JMF
138
139 @staticmethod
140 def calc_eta(start, now, total, current):
141 if total is None:
142 return None
c7667c2d
S
143 if now is None:
144 now = time.time()
3bc2ddcc 145 dif = now - start
5f6a1245 146 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
147 return None
148 rate = float(current) / dif
149 return int((float(total) - float(current)) / rate)
150
3bc2ddcc
JMF
151 @staticmethod
152 def calc_speed(start, now, bytes):
153 dif = now - start
5f6a1245 154 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
155 return None
156 return float(bytes) / dif
157
158 @staticmethod
159 def format_speed(speed):
11233f2a 160 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 161
617e58d8
S
162 @staticmethod
163 def format_retries(retries):
11233f2a 164 return 'inf' if retries == float('inf') else int(retries)
617e58d8 165
3bc2ddcc
JMF
166 @staticmethod
167 def best_block_size(elapsed_time, bytes):
168 new_min = max(bytes / 2.0, 1.0)
5f6a1245 169 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
170 if elapsed_time < 0.001:
171 return int(new_max)
172 rate = bytes / elapsed_time
173 if rate > new_max:
174 return int(new_max)
175 if rate < new_min:
176 return int(new_min)
177 return int(rate)
178
179 @staticmethod
180 def parse_bytes(bytestr):
181 """Parse a string indicating a byte quantity into an integer."""
1d485a1a 182 matchobj = re.match(rf'(?i)^({NUMBER_RE})([kMGTPEZY]?)$', bytestr)
3bc2ddcc
JMF
183 if matchobj is None:
184 return None
185 number = float(matchobj.group(1))
186 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
187 return int(round(number * multiplier))
188
c7667c2d 189 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 190 """Sleep if the download speed is over the rate limit."""
d800609c 191 rate_limit = self.params.get('ratelimit')
8a77e5e6 192 if rate_limit is None or byte_counter == 0:
3bc2ddcc 193 return
c7667c2d
S
194 if now is None:
195 now = time.time()
3bc2ddcc
JMF
196 elapsed = now - start_time
197 if elapsed <= 0.0:
198 return
199 speed = float(byte_counter) / elapsed
8a77e5e6 200 if speed > rate_limit:
1a01639b
S
201 sleep_time = float(byte_counter) / rate_limit - elapsed
202 if sleep_time > 0:
203 time.sleep(sleep_time)
3bc2ddcc
JMF
204
205 def temp_name(self, filename):
206 """Returns a temporary filename for the given filename."""
b6b70730 207 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
208 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
209 return filename
b6b70730 210 return filename + '.part'
3bc2ddcc
JMF
211
212 def undo_temp_name(self, filename):
b6b70730
PH
213 if filename.endswith('.part'):
214 return filename[:-len('.part')]
3bc2ddcc
JMF
215 return filename
216
ea0c2f21
RA
217 def ytdl_filename(self, filename):
218 return filename + '.ytdl'
219
45806d44 220 def wrap_file_access(action, *, fatal=False):
be5c1ae8 221 def error_callback(err, count, retries, *, fd):
222 return RetryManager.report_retry(
223 err, count, retries, info=fd.__to_screen,
224 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
225 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
226 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
227
228 def wrapper(self, func, *args, **kwargs):
229 for retry in RetryManager(self.params.get('file_access_retries'), error_callback, fd=self):
230 try:
231 return func(self, *args, **kwargs)
232 except OSError as err:
233 if err.errno in (errno.EACCES, errno.EINVAL):
234 retry.error = err
235 continue
236 retry.error_callback(err, 1, 0)
237
238 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
239
240 @wrap_file_access('open', fatal=True)
205a0654 241 def sanitize_open(self, filename, open_mode):
0edb3e33 242 f, filename = sanitize_open(filename, open_mode)
243 if not getattr(f, 'locked', None):
244 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
245 return f, filename
205a0654 246
45806d44
EH
247 @wrap_file_access('remove')
248 def try_remove(self, filename):
249 os.remove(filename)
250
251 @wrap_file_access('rename')
3bc2ddcc 252 def try_rename(self, old_filename, new_filename):
f775c831 253 if old_filename == new_filename:
254 return
45806d44 255 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
256
257 def try_utime(self, filename, last_modified_hdr):
258 """Try to set the last-modified time of the given file."""
259 if last_modified_hdr is None:
260 return
261 if not os.path.isfile(encodeFilename(filename)):
262 return
263 timestr = last_modified_hdr
264 if timestr is None:
265 return
266 filetime = timeconvert(timestr)
267 if filetime is None:
268 return filetime
269 # Ignore obviously invalid dates
270 if filetime == 0:
271 return
19a03940 272 with contextlib.suppress(Exception):
3bc2ddcc 273 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
274 return filetime
275
276 def report_destination(self, filename):
277 """Report destination filename."""
b6b70730 278 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 279
819e0531 280 def _prepare_multiline_status(self, lines=1):
281 if self.params.get('noprogress'):
bd50a52b 282 self._multiline = QuietMultilinePrinter()
819e0531 283 elif self.ydl.params.get('logger'):
284 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
285 elif self.params.get('progress_with_newline'):
8a7f6d7a 286 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 287 else:
8a7f6d7a 288 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
7578d77d 289 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
290
291 def _finish_multiline_status(self):
819e0531 292 self._multiline.end()
293
19a03940 294 ProgressStyles = Namespace(
295 downloaded_bytes='light blue',
296 percent='light blue',
297 eta='yellow',
298 speed='green',
299 elapsed='bold white',
300 total_bytes='',
301 total_bytes_estimate='',
302 )
7578d77d 303
304 def _report_progress_status(self, s, default_template):
64fa820c 305 for name, style in self.ProgressStyles.items_:
7578d77d 306 name = f'_{name}_str'
307 if name not in s:
308 continue
309 s[name] = self._format_progress(s[name], style)
310 s['_default_template'] = default_template % s
311
819e0531 312 progress_dict = s.copy()
313 progress_dict.pop('info_dict')
314 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
315
316 progress_template = self.params.get('progress_template', {})
317 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
318 progress_template.get('download') or '[download] %(progress._default_template)s',
319 progress_dict), s.get('progress_idx') or 0)
320 self.to_console_title(self.ydl.evaluate_outtmpl(
321 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
322 progress_dict))
3bc2ddcc 323
7578d77d 324 def _format_progress(self, *args, **kwargs):
325 return self.ydl._format_text(
326 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
327
5cda4eda 328 def report_progress(self, s):
11233f2a 329 def with_fields(*tups, default=''):
330 for *fields, tmpl in tups:
331 if all(s.get(f) is not None for f in fields):
332 return tmpl
333 return default
334
5cda4eda 335 if s['status'] == 'finished':
819e0531 336 if self.params.get('noprogress'):
5cda4eda 337 self.to_screen('[download] Download completed')
3df4f81d 338 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 339 s.update({
3df4f81d 340 'speed': speed,
341 '_speed_str': self.format_speed(speed).strip(),
11233f2a 342 '_total_bytes_str': format_bytes(s.get('total_bytes')),
343 '_elapsed_str': self.format_seconds(s.get('elapsed')),
344 '_percent_str': self.format_percent(100),
345 })
346 self._report_progress_status(s, join_nonempty(
347 '100%%',
348 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
349 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 350 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 351 delim=' '))
5cda4eda
PH
352
353 if s['status'] != 'downloading':
354 return
355
11233f2a 356 s.update({
357 '_eta_str': self.format_eta(s.get('eta')),
358 '_speed_str': self.format_speed(s.get('speed')),
359 '_percent_str': self.format_percent(try_call(
360 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
361 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
362 lambda: s['downloaded_bytes'] == 0 and 0)),
363 '_total_bytes_str': format_bytes(s.get('total_bytes')),
364 '_total_bytes_estimate_str': format_bytes(s.get('total_bytes_estimate')),
365 '_downloaded_bytes_str': format_bytes(s.get('downloaded_bytes')),
366 '_elapsed_str': self.format_seconds(s.get('elapsed')),
367 })
368
369 msg_template = with_fields(
370 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
371 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
372 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
373 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
374 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
375
376 msg_template += with_fields(
377 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
378 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 379 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
380
381 def report_resuming_byte(self, resume_len):
382 """Report attempt to resume at given byte."""
b6b70730 383 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 384
be5c1ae8 385 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
386 """Report retry"""
387 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
388 RetryManager.report_retry(
389 err, count, retries, info=self.__to_screen,
390 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
391 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
392 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
393 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 394
3bc2ddcc
JMF
395 def report_unable_to_resume(self):
396 """Report it was impossible to resume download."""
b6b70730 397 self.to_screen('[download] Unable to resume')
3bc2ddcc 398
0a473f2f 399 @staticmethod
400 def supports_manifest(manifest):
401 """ Whether the downloader can download the fragments from the manifest.
402 Redefine in subclasses if needed. """
403 pass
404
9f448fcb 405 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
406 """Download to a filename using the info from info_dict
407 Return True on success and False otherwise
408 """
5f0d813d 409
4340deca 410 nooverwrites_and_exists = (
9cc1a313 411 not self.params.get('overwrites', True)
3089bc74 412 and os.path.exists(encodeFilename(filename))
4340deca
P
413 )
414
75a24854
RA
415 if not hasattr(filename, 'write'):
416 continuedl_and_exists = (
3089bc74
S
417 self.params.get('continuedl', True)
418 and os.path.isfile(encodeFilename(filename))
419 and not self.params.get('nopart', False)
75a24854
RA
420 )
421
422 # Check file already present
423 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
424 self.report_file_already_downloaded(filename)
425 self._hook_progress({
426 'filename': filename,
427 'status': 'finished',
428 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 429 }, info_dict)
b69fd25c 430 self._finish_multiline_status()
a9e7f546 431 return True, False
dabc1273 432
19a03940 433 if subtitle:
434 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 435 else:
19a03940 436 min_sleep_interval = self.params.get('sleep_interval') or 0
437 sleep_interval = random.uniform(
43cc91ad 438 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 439 if sleep_interval > 0:
440 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
441 time.sleep(sleep_interval)
442
819e0531 443 ret = self.real_download(filename, info_dict)
444 self._finish_multiline_status()
445 return ret, True
3bc2ddcc
JMF
446
447 def real_download(self, filename, info_dict):
448 """Real download process. Redefine in subclasses."""
b6b70730 449 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 450
3ba7740d 451 def _hook_progress(self, status, info_dict):
f5ea4748 452 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 453 status['info_dict'] = info_dict
f45e6c11 454 # youtube-dl passes the same status object to all the hooks.
455 # Some third party scripts seems to be relying on this.
456 # So keep this behavior if possible
3bc2ddcc 457 for ph in self._progress_hooks:
f45e6c11 458 ph(status)
3bc2ddcc
JMF
459
460 def add_progress_hook(self, ph):
71b640cc
PH
461 # See YoutubeDl.py (search for progress_hooks) for a description of
462 # this interface
3bc2ddcc 463 self._progress_hooks.append(ph)
222516d9 464
cd8a07a7 465 def _debug_cmd(self, args, exe=None):
222516d9
PH
466 if not self.params.get('verbose', False):
467 return
468
cd8a07a7
S
469 str_args = [decodeArgument(a) for a in args]
470
222516d9 471 if exe is None:
cd8a07a7 472 exe = os.path.basename(str_args[0])
222516d9 473
86e5f3ed 474 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')