]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[core] Change how `Cookie` headers are handled
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
f8271158 18 LockingUnsupportedError,
19a03940 19 Namespace,
be5c1ae8 20 RetryManager,
1a8cc837 21 classproperty,
1433734c 22 decodeArgument,
71df9b7f 23 deprecation_warning,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
64c464a1 27 parse_bytes,
a057779d 28 remove_start,
205a0654 29 sanitize_open,
1433734c 30 shell_quote,
e3ced9ed 31 timeconvert,
aa7785f8 32 timetuple_from_msec,
11233f2a 33 try_call,
3bc2ddcc 34)
31215122 35from ..utils.traversal import traverse_obj
3bc2ddcc
JMF
36
37
86e5f3ed 38class FileDownloader:
3bc2ddcc
JMF
39 """File Downloader class.
40
41 File downloader objects are the ones responsible of downloading the
42 actual video file and writing it to disk.
43
44 File downloaders accept a lot of parameters. In order not to saturate
45 the object constructor with arguments, it receives a dictionary of
46 options instead.
47
48 Available options:
49
881e6a1f
PH
50 verbose: Print additional info to stdout.
51 quiet: Do not print messages to stdout.
52 ratelimit: Download speed limit, in bytes/sec.
51d9739f 53 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
46f1370e 54 retries: Number of times to retry for expected network errors.
55 Default is 0 for API, but 10 for CLI
56 file_access_retries: Number of times to retry on file access error (default: 3)
881e6a1f
PH
57 buffersize: Size of download buffer in bytes.
58 noresizebuffer: Do not automatically resize the download buffer.
59 continuedl: Try to continue downloads if possible.
60 noprogress: Do not print the progress bar.
881e6a1f
PH
61 nopart: Do not use temporary .part files.
62 updatetime: Use the Last-modified header to set output file timestamps.
63 test: Download only first bytes to test the downloader.
64 min_filesize: Skip files smaller than this size
65 max_filesize: Skip files larger than this size
66 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 67 external_downloader_args: A dictionary of downloader keys (in lower case)
68 and a list of additional command-line arguments for the
69 executable. Use 'default' as the name for arguments to be
70 passed to all downloaders. For compatibility with youtube-dl,
71 a single list of args can also be used
7d106a65 72 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 73 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
74 useful for bypassing bandwidth throttling imposed by
75 a webserver (experimental)
819e0531 76 progress_template: See YoutubeDL.py
23326151 77 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
78
79 Subclasses of this one must re-define the real_download method.
80 """
81
b686fc18 82 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
83 params = None
84
85 def __init__(self, ydl, params):
86 """Create a FileDownloader object with the given options."""
19a03940 87 self._set_ydl(ydl)
3bc2ddcc
JMF
88 self._progress_hooks = []
89 self.params = params
819e0531 90 self._prepare_multiline_status()
5cda4eda 91 self.add_progress_hook(self.report_progress)
3bc2ddcc 92
19a03940 93 def _set_ydl(self, ydl):
94 self.ydl = ydl
95
96 for func in (
97 'deprecation_warning',
da4db748 98 'deprecated_feature',
19a03940 99 'report_error',
100 'report_file_already_downloaded',
101 'report_warning',
102 'to_console_title',
103 'to_stderr',
104 'trouble',
105 'write_debug',
106 ):
1d485a1a 107 if not hasattr(self, func):
108 setattr(self, func, getattr(ydl, func))
19a03940 109
110 def to_screen(self, *args, **kargs):
111 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
112
23326151 113 __to_screen = to_screen
114
1a8cc837 115 @classproperty
116 def FD_NAME(cls):
998a3cae 117 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 118
3bc2ddcc
JMF
119 @staticmethod
120 def format_seconds(seconds):
11233f2a 121 if seconds is None:
122 return ' Unknown'
aa7785f8 123 time = timetuple_from_msec(seconds * 1000)
124 if time.hours > 99:
3bc2ddcc 125 return '--:--:--'
aa7785f8 126 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 127
a057779d 128 @classmethod
129 def format_eta(cls, seconds):
130 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 131
3bc2ddcc
JMF
132 @staticmethod
133 def calc_percent(byte_counter, data_len):
134 if data_len is None:
135 return None
136 return float(byte_counter) / float(data_len) * 100.0
137
138 @staticmethod
139 def format_percent(percent):
11233f2a 140 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc 141
4823ec9f 142 @classmethod
143 def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
144 if total is NO_DEFAULT:
145 rate, remaining = start_or_rate, now_or_remaining
146 if None in (rate, remaining):
147 return None
148 return int(float(remaining) / rate)
149
150 start, now = start_or_rate, now_or_remaining
3bc2ddcc
JMF
151 if total is None:
152 return None
c7667c2d
S
153 if now is None:
154 now = time.time()
4823ec9f 155 rate = cls.calc_speed(start, now, current)
156 return rate and int((float(total) - float(current)) / rate)
3bc2ddcc 157
3bc2ddcc
JMF
158 @staticmethod
159 def calc_speed(start, now, bytes):
160 dif = now - start
5f6a1245 161 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
162 return None
163 return float(bytes) / dif
164
165 @staticmethod
166 def format_speed(speed):
11233f2a 167 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 168
617e58d8
S
169 @staticmethod
170 def format_retries(retries):
11233f2a 171 return 'inf' if retries == float('inf') else int(retries)
617e58d8 172
4823ec9f 173 @staticmethod
174 def filesize_or_none(unencoded_filename):
175 if os.path.isfile(unencoded_filename):
176 return os.path.getsize(unencoded_filename)
177 return 0
178
3bc2ddcc
JMF
179 @staticmethod
180 def best_block_size(elapsed_time, bytes):
181 new_min = max(bytes / 2.0, 1.0)
5f6a1245 182 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
183 if elapsed_time < 0.001:
184 return int(new_max)
185 rate = bytes / elapsed_time
186 if rate > new_max:
187 return int(new_max)
188 if rate < new_min:
189 return int(new_min)
190 return int(rate)
191
192 @staticmethod
193 def parse_bytes(bytestr):
194 """Parse a string indicating a byte quantity into an integer."""
71df9b7f 195 deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
196 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
197 return parse_bytes(bytestr)
3bc2ddcc 198
c7667c2d 199 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 200 """Sleep if the download speed is over the rate limit."""
d800609c 201 rate_limit = self.params.get('ratelimit')
8a77e5e6 202 if rate_limit is None or byte_counter == 0:
3bc2ddcc 203 return
c7667c2d
S
204 if now is None:
205 now = time.time()
3bc2ddcc
JMF
206 elapsed = now - start_time
207 if elapsed <= 0.0:
208 return
209 speed = float(byte_counter) / elapsed
8a77e5e6 210 if speed > rate_limit:
1a01639b
S
211 sleep_time = float(byte_counter) / rate_limit - elapsed
212 if sleep_time > 0:
213 time.sleep(sleep_time)
3bc2ddcc
JMF
214
215 def temp_name(self, filename):
216 """Returns a temporary filename for the given filename."""
b6b70730 217 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
218 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
219 return filename
b6b70730 220 return filename + '.part'
3bc2ddcc
JMF
221
222 def undo_temp_name(self, filename):
b6b70730
PH
223 if filename.endswith('.part'):
224 return filename[:-len('.part')]
3bc2ddcc
JMF
225 return filename
226
ea0c2f21
RA
227 def ytdl_filename(self, filename):
228 return filename + '.ytdl'
229
45806d44 230 def wrap_file_access(action, *, fatal=False):
be5c1ae8 231 def error_callback(err, count, retries, *, fd):
232 return RetryManager.report_retry(
233 err, count, retries, info=fd.__to_screen,
234 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
235 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
236 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
237
238 def wrapper(self, func, *args, **kwargs):
46f1370e 239 for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
be5c1ae8 240 try:
241 return func(self, *args, **kwargs)
242 except OSError as err:
243 if err.errno in (errno.EACCES, errno.EINVAL):
244 retry.error = err
245 continue
246 retry.error_callback(err, 1, 0)
247
248 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
249
250 @wrap_file_access('open', fatal=True)
205a0654 251 def sanitize_open(self, filename, open_mode):
0edb3e33 252 f, filename = sanitize_open(filename, open_mode)
253 if not getattr(f, 'locked', None):
254 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
255 return f, filename
205a0654 256
45806d44
EH
257 @wrap_file_access('remove')
258 def try_remove(self, filename):
337734d4 259 if os.path.isfile(filename):
260 os.remove(filename)
45806d44
EH
261
262 @wrap_file_access('rename')
3bc2ddcc 263 def try_rename(self, old_filename, new_filename):
f775c831 264 if old_filename == new_filename:
265 return
45806d44 266 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
267
268 def try_utime(self, filename, last_modified_hdr):
269 """Try to set the last-modified time of the given file."""
270 if last_modified_hdr is None:
271 return
272 if not os.path.isfile(encodeFilename(filename)):
273 return
274 timestr = last_modified_hdr
275 if timestr is None:
276 return
277 filetime = timeconvert(timestr)
278 if filetime is None:
279 return filetime
280 # Ignore obviously invalid dates
281 if filetime == 0:
282 return
19a03940 283 with contextlib.suppress(Exception):
3bc2ddcc 284 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
285 return filetime
286
287 def report_destination(self, filename):
288 """Report destination filename."""
b6b70730 289 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 290
819e0531 291 def _prepare_multiline_status(self, lines=1):
292 if self.params.get('noprogress'):
bd50a52b 293 self._multiline = QuietMultilinePrinter()
819e0531 294 elif self.ydl.params.get('logger'):
295 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
296 elif self.params.get('progress_with_newline'):
8a7f6d7a 297 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 298 else:
8a7f6d7a 299 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
8417f26b
SS
300 self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
301 self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
bd50a52b
THD
302
303 def _finish_multiline_status(self):
819e0531 304 self._multiline.end()
305
19a03940 306 ProgressStyles = Namespace(
307 downloaded_bytes='light blue',
308 percent='light blue',
309 eta='yellow',
310 speed='green',
311 elapsed='bold white',
312 total_bytes='',
313 total_bytes_estimate='',
314 )
7578d77d 315
316 def _report_progress_status(self, s, default_template):
64fa820c 317 for name, style in self.ProgressStyles.items_:
7578d77d 318 name = f'_{name}_str'
319 if name not in s:
320 continue
321 s[name] = self._format_progress(s[name], style)
322 s['_default_template'] = default_template % s
323
819e0531 324 progress_dict = s.copy()
325 progress_dict.pop('info_dict')
326 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
327
328 progress_template = self.params.get('progress_template', {})
329 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
330 progress_template.get('download') or '[download] %(progress._default_template)s',
331 progress_dict), s.get('progress_idx') or 0)
332 self.to_console_title(self.ydl.evaluate_outtmpl(
333 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
334 progress_dict))
3bc2ddcc 335
7578d77d 336 def _format_progress(self, *args, **kwargs):
337 return self.ydl._format_text(
338 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
339
5cda4eda 340 def report_progress(self, s):
11233f2a 341 def with_fields(*tups, default=''):
342 for *fields, tmpl in tups:
343 if all(s.get(f) is not None for f in fields):
344 return tmpl
345 return default
346
d5d1df8a 347 _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
a057779d 348
5cda4eda 349 if s['status'] == 'finished':
819e0531 350 if self.params.get('noprogress'):
5cda4eda 351 self.to_screen('[download] Download completed')
3df4f81d 352 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 353 s.update({
3df4f81d 354 'speed': speed,
355 '_speed_str': self.format_speed(speed).strip(),
d5d1df8a 356 '_total_bytes_str': _format_bytes('total_bytes'),
11233f2a 357 '_elapsed_str': self.format_seconds(s.get('elapsed')),
358 '_percent_str': self.format_percent(100),
359 })
360 self._report_progress_status(s, join_nonempty(
361 '100%%',
362 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
363 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 364 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 365 delim=' '))
5cda4eda
PH
366
367 if s['status'] != 'downloading':
368 return
369
11233f2a 370 s.update({
a057779d 371 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 372 '_speed_str': self.format_speed(s.get('speed')),
373 '_percent_str': self.format_percent(try_call(
374 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
375 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
376 lambda: s['downloaded_bytes'] == 0 and 0)),
d5d1df8a 377 '_total_bytes_str': _format_bytes('total_bytes'),
378 '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
379 '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
11233f2a 380 '_elapsed_str': self.format_seconds(s.get('elapsed')),
381 })
382
383 msg_template = with_fields(
384 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
385 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
386 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
387 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
388 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
389
390 msg_template += with_fields(
391 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
392 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 393 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
394
395 def report_resuming_byte(self, resume_len):
396 """Report attempt to resume at given byte."""
b6b70730 397 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 398
be5c1ae8 399 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
400 """Report retry"""
401 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
402 RetryManager.report_retry(
403 err, count, retries, info=self.__to_screen,
404 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
405 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
406 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
407 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 408
3bc2ddcc
JMF
409 def report_unable_to_resume(self):
410 """Report it was impossible to resume download."""
b6b70730 411 self.to_screen('[download] Unable to resume')
3bc2ddcc 412
0a473f2f 413 @staticmethod
414 def supports_manifest(manifest):
415 """ Whether the downloader can download the fragments from the manifest.
416 Redefine in subclasses if needed. """
417 pass
418
9f448fcb 419 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
420 """Download to a filename using the info from info_dict
421 Return True on success and False otherwise
422 """
4340deca 423 nooverwrites_and_exists = (
9cc1a313 424 not self.params.get('overwrites', True)
3089bc74 425 and os.path.exists(encodeFilename(filename))
4340deca
P
426 )
427
75a24854
RA
428 if not hasattr(filename, 'write'):
429 continuedl_and_exists = (
3089bc74
S
430 self.params.get('continuedl', True)
431 and os.path.isfile(encodeFilename(filename))
432 and not self.params.get('nopart', False)
75a24854
RA
433 )
434
435 # Check file already present
436 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
437 self.report_file_already_downloaded(filename)
438 self._hook_progress({
439 'filename': filename,
440 'status': 'finished',
441 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 442 }, info_dict)
b69fd25c 443 self._finish_multiline_status()
a9e7f546 444 return True, False
dabc1273 445
19a03940 446 if subtitle:
447 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 448 else:
19a03940 449 min_sleep_interval = self.params.get('sleep_interval') or 0
450 sleep_interval = random.uniform(
43cc91ad 451 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 452 if sleep_interval > 0:
453 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
454 time.sleep(sleep_interval)
455
31215122
SS
456 # Filter the `Cookie` header from the info_dict to prevent leaks.
457 # See: https://github.com/yt-dlp/yt-dlp/security/advisories/GHSA-v8mc-9377-rwjj
458 info_dict['http_headers'] = dict(traverse_obj(info_dict, (
459 'http_headers', {dict.items}, lambda _, pair: pair[0].lower() != 'cookie'))) or None
460
819e0531 461 ret = self.real_download(filename, info_dict)
462 self._finish_multiline_status()
463 return ret, True
3bc2ddcc
JMF
464
465 def real_download(self, filename, info_dict):
466 """Real download process. Redefine in subclasses."""
b6b70730 467 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 468
3ba7740d 469 def _hook_progress(self, status, info_dict):
f5ea4748 470 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 471 status['info_dict'] = info_dict
f45e6c11 472 # youtube-dl passes the same status object to all the hooks.
473 # Some third party scripts seems to be relying on this.
474 # So keep this behavior if possible
3bc2ddcc 475 for ph in self._progress_hooks:
f45e6c11 476 ph(status)
3bc2ddcc
JMF
477
478 def add_progress_hook(self, ph):
71b640cc
PH
479 # See YoutubeDl.py (search for progress_hooks) for a description of
480 # this interface
3bc2ddcc 481 self._progress_hooks.append(ph)
222516d9 482
cd8a07a7 483 def _debug_cmd(self, args, exe=None):
222516d9
PH
484 if not self.params.get('verbose', False):
485 return
486
cd8a07a7
S
487 str_args = [decodeArgument(a) for a in args]
488
222516d9 489 if exe is None:
cd8a07a7 490 exe = os.path.basename(str_args[0])
222516d9 491
86e5f3ed 492 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')