]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[core] Fix HTTP headers and cookie handling
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
19a03940 1import contextlib
f8271158 2import errno
be5c1ae8 3import functools
3bc2ddcc 4import os
f8271158 5import random
3bc2ddcc 6import re
3bc2ddcc
JMF
7import time
8
f8271158 9from ..minicurses import (
10 BreaklineStatusPrinter,
11 MultilineLogger,
12 MultilinePrinter,
13 QuietMultilinePrinter,
14)
3bc2ddcc 15from ..utils import (
be5c1ae8 16 IDENTITY,
17 NO_DEFAULT,
f8271158 18 LockingUnsupportedError,
19a03940 19 Namespace,
be5c1ae8 20 RetryManager,
1a8cc837 21 classproperty,
1433734c 22 decodeArgument,
71df9b7f 23 deprecation_warning,
3bc2ddcc 24 encodeFilename,
3bc2ddcc 25 format_bytes,
11233f2a 26 join_nonempty,
64c464a1 27 parse_bytes,
a057779d 28 remove_start,
205a0654 29 sanitize_open,
1433734c 30 shell_quote,
e3ced9ed 31 timeconvert,
aa7785f8 32 timetuple_from_msec,
11233f2a 33 try_call,
3bc2ddcc
JMF
34)
35
36
86e5f3ed 37class FileDownloader:
3bc2ddcc
JMF
38 """File Downloader class.
39
40 File downloader objects are the ones responsible of downloading the
41 actual video file and writing it to disk.
42
43 File downloaders accept a lot of parameters. In order not to saturate
44 the object constructor with arguments, it receives a dictionary of
45 options instead.
46
47 Available options:
48
881e6a1f
PH
49 verbose: Print additional info to stdout.
50 quiet: Do not print messages to stdout.
51 ratelimit: Download speed limit, in bytes/sec.
51d9739f 52 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
46f1370e 53 retries: Number of times to retry for expected network errors.
54 Default is 0 for API, but 10 for CLI
55 file_access_retries: Number of times to retry on file access error (default: 3)
881e6a1f
PH
56 buffersize: Size of download buffer in bytes.
57 noresizebuffer: Do not automatically resize the download buffer.
58 continuedl: Try to continue downloads if possible.
59 noprogress: Do not print the progress bar.
881e6a1f
PH
60 nopart: Do not use temporary .part files.
61 updatetime: Use the Last-modified header to set output file timestamps.
62 test: Download only first bytes to test the downloader.
63 min_filesize: Skip files smaller than this size
64 max_filesize: Skip files larger than this size
65 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 66 external_downloader_args: A dictionary of downloader keys (in lower case)
67 and a list of additional command-line arguments for the
68 executable. Use 'default' as the name for arguments to be
69 passed to all downloaders. For compatibility with youtube-dl,
70 a single list of args can also be used
7d106a65 71 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 72 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
73 useful for bypassing bandwidth throttling imposed by
74 a webserver (experimental)
819e0531 75 progress_template: See YoutubeDL.py
23326151 76 retry_sleep_functions: See YoutubeDL.py
3bc2ddcc
JMF
77
78 Subclasses of this one must re-define the real_download method.
79 """
80
b686fc18 81 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
82 params = None
83
84 def __init__(self, ydl, params):
85 """Create a FileDownloader object with the given options."""
19a03940 86 self._set_ydl(ydl)
3bc2ddcc
JMF
87 self._progress_hooks = []
88 self.params = params
819e0531 89 self._prepare_multiline_status()
5cda4eda 90 self.add_progress_hook(self.report_progress)
3bc2ddcc 91
19a03940 92 def _set_ydl(self, ydl):
93 self.ydl = ydl
94
95 for func in (
96 'deprecation_warning',
da4db748 97 'deprecated_feature',
19a03940 98 'report_error',
99 'report_file_already_downloaded',
100 'report_warning',
101 'to_console_title',
102 'to_stderr',
103 'trouble',
104 'write_debug',
105 ):
1d485a1a 106 if not hasattr(self, func):
107 setattr(self, func, getattr(ydl, func))
19a03940 108
109 def to_screen(self, *args, **kargs):
110 self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs)
111
23326151 112 __to_screen = to_screen
113
1a8cc837 114 @classproperty
115 def FD_NAME(cls):
998a3cae 116 return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower()
3a408f9d 117
3bc2ddcc
JMF
118 @staticmethod
119 def format_seconds(seconds):
11233f2a 120 if seconds is None:
121 return ' Unknown'
aa7785f8 122 time = timetuple_from_msec(seconds * 1000)
123 if time.hours > 99:
3bc2ddcc 124 return '--:--:--'
aa7785f8 125 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc 126
a057779d 127 @classmethod
128 def format_eta(cls, seconds):
129 return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}'
11233f2a 130
3bc2ddcc
JMF
131 @staticmethod
132 def calc_percent(byte_counter, data_len):
133 if data_len is None:
134 return None
135 return float(byte_counter) / float(data_len) * 100.0
136
137 @staticmethod
138 def format_percent(percent):
11233f2a 139 return ' N/A%' if percent is None else f'{percent:>5.1f}%'
3bc2ddcc 140
4823ec9f 141 @classmethod
142 def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT):
143 if total is NO_DEFAULT:
144 rate, remaining = start_or_rate, now_or_remaining
145 if None in (rate, remaining):
146 return None
147 return int(float(remaining) / rate)
148
149 start, now = start_or_rate, now_or_remaining
3bc2ddcc
JMF
150 if total is None:
151 return None
c7667c2d
S
152 if now is None:
153 now = time.time()
4823ec9f 154 rate = cls.calc_speed(start, now, current)
155 return rate and int((float(total) - float(current)) / rate)
3bc2ddcc 156
3bc2ddcc
JMF
157 @staticmethod
158 def calc_speed(start, now, bytes):
159 dif = now - start
5f6a1245 160 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
161 return None
162 return float(bytes) / dif
163
164 @staticmethod
165 def format_speed(speed):
11233f2a 166 return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s'
3bc2ddcc 167
617e58d8
S
168 @staticmethod
169 def format_retries(retries):
11233f2a 170 return 'inf' if retries == float('inf') else int(retries)
617e58d8 171
4823ec9f 172 @staticmethod
173 def filesize_or_none(unencoded_filename):
174 if os.path.isfile(unencoded_filename):
175 return os.path.getsize(unencoded_filename)
176 return 0
177
3bc2ddcc
JMF
178 @staticmethod
179 def best_block_size(elapsed_time, bytes):
180 new_min = max(bytes / 2.0, 1.0)
5f6a1245 181 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
182 if elapsed_time < 0.001:
183 return int(new_max)
184 rate = bytes / elapsed_time
185 if rate > new_max:
186 return int(new_max)
187 if rate < new_min:
188 return int(new_min)
189 return int(rate)
190
191 @staticmethod
192 def parse_bytes(bytestr):
193 """Parse a string indicating a byte quantity into an integer."""
71df9b7f 194 deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and '
195 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead')
196 return parse_bytes(bytestr)
3bc2ddcc 197
c7667c2d 198 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 199 """Sleep if the download speed is over the rate limit."""
d800609c 200 rate_limit = self.params.get('ratelimit')
8a77e5e6 201 if rate_limit is None or byte_counter == 0:
3bc2ddcc 202 return
c7667c2d
S
203 if now is None:
204 now = time.time()
3bc2ddcc
JMF
205 elapsed = now - start_time
206 if elapsed <= 0.0:
207 return
208 speed = float(byte_counter) / elapsed
8a77e5e6 209 if speed > rate_limit:
1a01639b
S
210 sleep_time = float(byte_counter) / rate_limit - elapsed
211 if sleep_time > 0:
212 time.sleep(sleep_time)
3bc2ddcc
JMF
213
214 def temp_name(self, filename):
215 """Returns a temporary filename for the given filename."""
b6b70730 216 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
217 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
218 return filename
b6b70730 219 return filename + '.part'
3bc2ddcc
JMF
220
221 def undo_temp_name(self, filename):
b6b70730
PH
222 if filename.endswith('.part'):
223 return filename[:-len('.part')]
3bc2ddcc
JMF
224 return filename
225
ea0c2f21
RA
226 def ytdl_filename(self, filename):
227 return filename + '.ytdl'
228
45806d44 229 def wrap_file_access(action, *, fatal=False):
be5c1ae8 230 def error_callback(err, count, retries, *, fd):
231 return RetryManager.report_retry(
232 err, count, retries, info=fd.__to_screen,
233 warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')),
234 error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'),
235 sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access'))
236
237 def wrapper(self, func, *args, **kwargs):
46f1370e 238 for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self):
be5c1ae8 239 try:
240 return func(self, *args, **kwargs)
241 except OSError as err:
242 if err.errno in (errno.EACCES, errno.EINVAL):
243 retry.error = err
244 continue
245 retry.error_callback(err, 1, 0)
246
247 return functools.partial(functools.partialmethod, wrapper)
45806d44
EH
248
249 @wrap_file_access('open', fatal=True)
205a0654 250 def sanitize_open(self, filename, open_mode):
0edb3e33 251 f, filename = sanitize_open(filename, open_mode)
252 if not getattr(f, 'locked', None):
253 self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True)
254 return f, filename
205a0654 255
45806d44
EH
256 @wrap_file_access('remove')
257 def try_remove(self, filename):
337734d4 258 if os.path.isfile(filename):
259 os.remove(filename)
45806d44
EH
260
261 @wrap_file_access('rename')
3bc2ddcc 262 def try_rename(self, old_filename, new_filename):
f775c831 263 if old_filename == new_filename:
264 return
45806d44 265 os.replace(old_filename, new_filename)
3bc2ddcc
JMF
266
267 def try_utime(self, filename, last_modified_hdr):
268 """Try to set the last-modified time of the given file."""
269 if last_modified_hdr is None:
270 return
271 if not os.path.isfile(encodeFilename(filename)):
272 return
273 timestr = last_modified_hdr
274 if timestr is None:
275 return
276 filetime = timeconvert(timestr)
277 if filetime is None:
278 return filetime
279 # Ignore obviously invalid dates
280 if filetime == 0:
281 return
19a03940 282 with contextlib.suppress(Exception):
3bc2ddcc 283 os.utime(filename, (time.time(), filetime))
3bc2ddcc
JMF
284 return filetime
285
286 def report_destination(self, filename):
287 """Report destination filename."""
b6b70730 288 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 289
819e0531 290 def _prepare_multiline_status(self, lines=1):
291 if self.params.get('noprogress'):
bd50a52b 292 self._multiline = QuietMultilinePrinter()
819e0531 293 elif self.ydl.params.get('logger'):
294 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
295 elif self.params.get('progress_with_newline'):
8a7f6d7a 296 self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines)
bd50a52b 297 else:
8a7f6d7a 298 self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet'))
8417f26b
SS
299 self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color'
300 self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out
bd50a52b
THD
301
302 def _finish_multiline_status(self):
819e0531 303 self._multiline.end()
304
19a03940 305 ProgressStyles = Namespace(
306 downloaded_bytes='light blue',
307 percent='light blue',
308 eta='yellow',
309 speed='green',
310 elapsed='bold white',
311 total_bytes='',
312 total_bytes_estimate='',
313 )
7578d77d 314
315 def _report_progress_status(self, s, default_template):
64fa820c 316 for name, style in self.ProgressStyles.items_:
7578d77d 317 name = f'_{name}_str'
318 if name not in s:
319 continue
320 s[name] = self._format_progress(s[name], style)
321 s['_default_template'] = default_template % s
322
819e0531 323 progress_dict = s.copy()
324 progress_dict.pop('info_dict')
325 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
326
327 progress_template = self.params.get('progress_template', {})
328 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
329 progress_template.get('download') or '[download] %(progress._default_template)s',
330 progress_dict), s.get('progress_idx') or 0)
331 self.to_console_title(self.ydl.evaluate_outtmpl(
332 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
333 progress_dict))
3bc2ddcc 334
7578d77d 335 def _format_progress(self, *args, **kwargs):
336 return self.ydl._format_text(
337 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
338
5cda4eda 339 def report_progress(self, s):
11233f2a 340 def with_fields(*tups, default=''):
341 for *fields, tmpl in tups:
342 if all(s.get(f) is not None for f in fields):
343 return tmpl
344 return default
345
d5d1df8a 346 _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}'
a057779d 347
5cda4eda 348 if s['status'] == 'finished':
819e0531 349 if self.params.get('noprogress'):
5cda4eda 350 self.to_screen('[download] Download completed')
3df4f81d 351 speed = try_call(lambda: s['total_bytes'] / s['elapsed'])
11233f2a 352 s.update({
3df4f81d 353 'speed': speed,
354 '_speed_str': self.format_speed(speed).strip(),
d5d1df8a 355 '_total_bytes_str': _format_bytes('total_bytes'),
11233f2a 356 '_elapsed_str': self.format_seconds(s.get('elapsed')),
357 '_percent_str': self.format_percent(100),
358 })
359 self._report_progress_status(s, join_nonempty(
360 '100%%',
361 with_fields(('total_bytes', 'of %(_total_bytes_str)s')),
362 with_fields(('elapsed', 'in %(_elapsed_str)s')),
3df4f81d 363 with_fields(('speed', 'at %(_speed_str)s')),
11233f2a 364 delim=' '))
5cda4eda
PH
365
366 if s['status'] != 'downloading':
367 return
368
11233f2a 369 s.update({
a057779d 370 '_eta_str': self.format_eta(s.get('eta')).strip(),
11233f2a 371 '_speed_str': self.format_speed(s.get('speed')),
372 '_percent_str': self.format_percent(try_call(
373 lambda: 100 * s['downloaded_bytes'] / s['total_bytes'],
374 lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'],
375 lambda: s['downloaded_bytes'] == 0 and 0)),
d5d1df8a 376 '_total_bytes_str': _format_bytes('total_bytes'),
377 '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'),
378 '_downloaded_bytes_str': _format_bytes('downloaded_bytes'),
11233f2a 379 '_elapsed_str': self.format_seconds(s.get('elapsed')),
380 })
381
382 msg_template = with_fields(
383 ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'),
384 ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'),
385 ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'),
386 ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'),
387 default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s')
388
389 msg_template += with_fields(
390 ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'),
391 ('fragment_index', ' (frag %(fragment_index)s)'))
7578d77d 392 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
393
394 def report_resuming_byte(self, resume_len):
395 """Report attempt to resume at given byte."""
b6b70730 396 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 397
be5c1ae8 398 def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True):
399 """Report retry"""
400 is_frag = False if frag_index is NO_DEFAULT else 'fragment'
401 RetryManager.report_retry(
402 err, count, retries, info=self.__to_screen,
403 warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'),
404 error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'),
405 sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'),
406 suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None)
3bc2ddcc 407
3bc2ddcc
JMF
408 def report_unable_to_resume(self):
409 """Report it was impossible to resume download."""
b6b70730 410 self.to_screen('[download] Unable to resume')
3bc2ddcc 411
0a473f2f 412 @staticmethod
413 def supports_manifest(manifest):
414 """ Whether the downloader can download the fragments from the manifest.
415 Redefine in subclasses if needed. """
416 pass
417
9f448fcb 418 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
419 """Download to a filename using the info from info_dict
420 Return True on success and False otherwise
421 """
4340deca 422 nooverwrites_and_exists = (
9cc1a313 423 not self.params.get('overwrites', True)
3089bc74 424 and os.path.exists(encodeFilename(filename))
4340deca
P
425 )
426
75a24854
RA
427 if not hasattr(filename, 'write'):
428 continuedl_and_exists = (
3089bc74
S
429 self.params.get('continuedl', True)
430 and os.path.isfile(encodeFilename(filename))
431 and not self.params.get('nopart', False)
75a24854
RA
432 )
433
434 # Check file already present
435 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
436 self.report_file_already_downloaded(filename)
437 self._hook_progress({
438 'filename': filename,
439 'status': 'finished',
440 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 441 }, info_dict)
b69fd25c 442 self._finish_multiline_status()
a9e7f546 443 return True, False
dabc1273 444
19a03940 445 if subtitle:
446 sleep_interval = self.params.get('sleep_interval_subtitles') or 0
9f448fcb 447 else:
19a03940 448 min_sleep_interval = self.params.get('sleep_interval') or 0
449 sleep_interval = random.uniform(
43cc91ad 450 min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval)
19a03940 451 if sleep_interval > 0:
452 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
453 time.sleep(sleep_interval)
454
819e0531 455 ret = self.real_download(filename, info_dict)
456 self._finish_multiline_status()
457 return ret, True
3bc2ddcc
JMF
458
459 def real_download(self, filename, info_dict):
460 """Real download process. Redefine in subclasses."""
b6b70730 461 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 462
3ba7740d 463 def _hook_progress(self, status, info_dict):
f5ea4748 464 # Ideally we want to make a copy of the dict, but that is too slow
03b4de72 465 status['info_dict'] = info_dict
f45e6c11 466 # youtube-dl passes the same status object to all the hooks.
467 # Some third party scripts seems to be relying on this.
468 # So keep this behavior if possible
3bc2ddcc 469 for ph in self._progress_hooks:
f45e6c11 470 ph(status)
3bc2ddcc
JMF
471
472 def add_progress_hook(self, ph):
71b640cc
PH
473 # See YoutubeDl.py (search for progress_hooks) for a description of
474 # this interface
3bc2ddcc 475 self._progress_hooks.append(ph)
222516d9 476
cd8a07a7 477 def _debug_cmd(self, args, exe=None):
222516d9
PH
478 if not self.params.get('verbose', False):
479 return
480
cd8a07a7
S
481 str_args = [decodeArgument(a) for a in args]
482
222516d9 483 if exe is None:
cd8a07a7 484 exe = os.path.basename(str_args[0])
222516d9 485
86e5f3ed 486 self.write_debug(f'{exe} command line: {shell_quote(str_args)}')