]> jfr.im git - yt-dlp.git/blame - yt_dlp/downloader/common.py
[tiktok] Fix `extractor_key` used in archive
[yt-dlp.git] / yt_dlp / downloader / common.py
CommitLineData
5cda4eda 1from __future__ import division, unicode_literals
b6b70730 2
3bc2ddcc
JMF
3import os
4import re
3bc2ddcc 5import time
065bc354 6import random
205a0654 7import errno
3bc2ddcc
JMF
8
9from ..utils import (
1433734c 10 decodeArgument,
3bc2ddcc 11 encodeFilename,
9b9c5355 12 error_to_compat_str,
3bc2ddcc 13 format_bytes,
205a0654 14 sanitize_open,
1433734c 15 shell_quote,
e3ced9ed 16 timeconvert,
aa7785f8 17 timetuple_from_msec,
3bc2ddcc 18)
bd50a52b 19from ..minicurses import (
819e0531 20 MultilineLogger,
bd50a52b
THD
21 MultilinePrinter,
22 QuietMultilinePrinter,
23 BreaklineStatusPrinter
24)
3bc2ddcc
JMF
25
26
27class FileDownloader(object):
28 """File Downloader class.
29
30 File downloader objects are the ones responsible of downloading the
31 actual video file and writing it to disk.
32
33 File downloaders accept a lot of parameters. In order not to saturate
34 the object constructor with arguments, it receives a dictionary of
35 options instead.
36
37 Available options:
38
881e6a1f
PH
39 verbose: Print additional info to stdout.
40 quiet: Do not print messages to stdout.
41 ratelimit: Download speed limit, in bytes/sec.
51d9739f 42 throttledratelimit: Assume the download is being throttled below this speed (bytes/sec)
881e6a1f 43 retries: Number of times to retry for HTTP error 5xx
205a0654 44 file_access_retries: Number of times to retry on file access error
881e6a1f
PH
45 buffersize: Size of download buffer in bytes.
46 noresizebuffer: Do not automatically resize the download buffer.
47 continuedl: Try to continue downloads if possible.
48 noprogress: Do not print the progress bar.
881e6a1f
PH
49 nopart: Do not use temporary .part files.
50 updatetime: Use the Last-modified header to set output file timestamps.
51 test: Download only first bytes to test the downloader.
52 min_filesize: Skip files smaller than this size
53 max_filesize: Skip files larger than this size
54 xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
34488702 55 external_downloader_args: A dictionary of downloader keys (in lower case)
56 and a list of additional command-line arguments for the
57 executable. Use 'default' as the name for arguments to be
58 passed to all downloaders. For compatibility with youtube-dl,
59 a single list of args can also be used
7d106a65 60 hls_use_mpegts: Use the mpegts container for HLS videos.
073cca3d 61 http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be
b54d4a5c
S
62 useful for bypassing bandwidth throttling imposed by
63 a webserver (experimental)
819e0531 64 progress_template: See YoutubeDL.py
3bc2ddcc
JMF
65
66 Subclasses of this one must re-define the real_download method.
67 """
68
b686fc18 69 _TEST_FILE_SIZE = 10241
3bc2ddcc
JMF
70 params = None
71
72 def __init__(self, ydl, params):
73 """Create a FileDownloader object with the given options."""
74 self.ydl = ydl
75 self._progress_hooks = []
76 self.params = params
819e0531 77 self._prepare_multiline_status()
5cda4eda 78 self.add_progress_hook(self.report_progress)
3bc2ddcc
JMF
79
80 @staticmethod
81 def format_seconds(seconds):
aa7785f8 82 time = timetuple_from_msec(seconds * 1000)
83 if time.hours > 99:
3bc2ddcc 84 return '--:--:--'
aa7785f8 85 if not time.hours:
86 return '%02d:%02d' % time[1:-1]
87 return '%02d:%02d:%02d' % time[:-1]
3bc2ddcc
JMF
88
89 @staticmethod
90 def calc_percent(byte_counter, data_len):
91 if data_len is None:
92 return None
93 return float(byte_counter) / float(data_len) * 100.0
94
95 @staticmethod
96 def format_percent(percent):
97 if percent is None:
98 return '---.-%'
f304da8a 99 elif percent == 100:
100 return '100%'
3bc2ddcc
JMF
101 return '%6s' % ('%3.1f%%' % percent)
102
103 @staticmethod
104 def calc_eta(start, now, total, current):
105 if total is None:
106 return None
c7667c2d
S
107 if now is None:
108 now = time.time()
3bc2ddcc 109 dif = now - start
5f6a1245 110 if current == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
111 return None
112 rate = float(current) / dif
113 return int((float(total) - float(current)) / rate)
114
115 @staticmethod
116 def format_eta(eta):
117 if eta is None:
118 return '--:--'
119 return FileDownloader.format_seconds(eta)
120
121 @staticmethod
122 def calc_speed(start, now, bytes):
123 dif = now - start
5f6a1245 124 if bytes == 0 or dif < 0.001: # One millisecond
3bc2ddcc
JMF
125 return None
126 return float(bytes) / dif
127
128 @staticmethod
129 def format_speed(speed):
130 if speed is None:
131 return '%10s' % '---b/s'
132 return '%10s' % ('%s/s' % format_bytes(speed))
133
617e58d8
S
134 @staticmethod
135 def format_retries(retries):
136 return 'inf' if retries == float('inf') else '%.0f' % retries
137
3bc2ddcc
JMF
138 @staticmethod
139 def best_block_size(elapsed_time, bytes):
140 new_min = max(bytes / 2.0, 1.0)
5f6a1245 141 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
3bc2ddcc
JMF
142 if elapsed_time < 0.001:
143 return int(new_max)
144 rate = bytes / elapsed_time
145 if rate > new_max:
146 return int(new_max)
147 if rate < new_min:
148 return int(new_min)
149 return int(rate)
150
151 @staticmethod
152 def parse_bytes(bytestr):
153 """Parse a string indicating a byte quantity into an integer."""
154 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
155 if matchobj is None:
156 return None
157 number = float(matchobj.group(1))
158 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
159 return int(round(number * multiplier))
160
161 def to_screen(self, *args, **kargs):
848887eb 162 self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs)
3bc2ddcc
JMF
163
164 def to_stderr(self, message):
848887eb 165 self.ydl.to_stderr(message)
3bc2ddcc
JMF
166
167 def to_console_title(self, message):
168 self.ydl.to_console_title(message)
169
170 def trouble(self, *args, **kargs):
171 self.ydl.trouble(*args, **kargs)
172
173 def report_warning(self, *args, **kargs):
174 self.ydl.report_warning(*args, **kargs)
175
176 def report_error(self, *args, **kargs):
177 self.ydl.report_error(*args, **kargs)
178
856bb8f9 179 def write_debug(self, *args, **kargs):
180 self.ydl.write_debug(*args, **kargs)
181
c7667c2d 182 def slow_down(self, start_time, now, byte_counter):
3bc2ddcc 183 """Sleep if the download speed is over the rate limit."""
d800609c 184 rate_limit = self.params.get('ratelimit')
8a77e5e6 185 if rate_limit is None or byte_counter == 0:
3bc2ddcc 186 return
c7667c2d
S
187 if now is None:
188 now = time.time()
3bc2ddcc
JMF
189 elapsed = now - start_time
190 if elapsed <= 0.0:
191 return
192 speed = float(byte_counter) / elapsed
8a77e5e6 193 if speed > rate_limit:
1a01639b
S
194 sleep_time = float(byte_counter) / rate_limit - elapsed
195 if sleep_time > 0:
196 time.sleep(sleep_time)
3bc2ddcc
JMF
197
198 def temp_name(self, filename):
199 """Returns a temporary filename for the given filename."""
b6b70730 200 if self.params.get('nopart', False) or filename == '-' or \
3bc2ddcc
JMF
201 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
202 return filename
b6b70730 203 return filename + '.part'
3bc2ddcc
JMF
204
205 def undo_temp_name(self, filename):
b6b70730
PH
206 if filename.endswith('.part'):
207 return filename[:-len('.part')]
3bc2ddcc
JMF
208 return filename
209
ea0c2f21
RA
210 def ytdl_filename(self, filename):
211 return filename + '.ytdl'
212
205a0654
EH
213 def sanitize_open(self, filename, open_mode):
214 file_access_retries = self.params.get('file_access_retries', 10)
215 retry = 0
216 while True:
217 try:
218 return sanitize_open(filename, open_mode)
219 except (IOError, OSError) as err:
220 retry = retry + 1
221 if retry > file_access_retries or err.errno not in (errno.EACCES,):
222 raise
223 self.to_screen(
224 '[download] Got file access error. Retrying (attempt %d of %s) ...'
225 % (retry, self.format_retries(file_access_retries)))
226 time.sleep(0.01)
227
3bc2ddcc 228 def try_rename(self, old_filename, new_filename):
f775c831 229 if old_filename == new_filename:
230 return
3bc2ddcc 231 try:
d75201a8 232 os.replace(old_filename, new_filename)
3bc2ddcc 233 except (IOError, OSError) as err:
d75201a8 234 self.report_error(f'unable to rename file: {err}')
3bc2ddcc
JMF
235
236 def try_utime(self, filename, last_modified_hdr):
237 """Try to set the last-modified time of the given file."""
238 if last_modified_hdr is None:
239 return
240 if not os.path.isfile(encodeFilename(filename)):
241 return
242 timestr = last_modified_hdr
243 if timestr is None:
244 return
245 filetime = timeconvert(timestr)
246 if filetime is None:
247 return filetime
248 # Ignore obviously invalid dates
249 if filetime == 0:
250 return
251 try:
252 os.utime(filename, (time.time(), filetime))
70a1165b 253 except Exception:
3bc2ddcc
JMF
254 pass
255 return filetime
256
257 def report_destination(self, filename):
258 """Report destination filename."""
b6b70730 259 self.to_screen('[download] Destination: ' + filename)
3bc2ddcc 260
819e0531 261 def _prepare_multiline_status(self, lines=1):
262 if self.params.get('noprogress'):
bd50a52b 263 self._multiline = QuietMultilinePrinter()
819e0531 264 elif self.ydl.params.get('logger'):
265 self._multiline = MultilineLogger(self.ydl.params['logger'], lines)
266 elif self.params.get('progress_with_newline'):
d1d5c08f 267 self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines)
bd50a52b 268 else:
d1d5c08f 269 self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet'))
7578d77d 270 self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color')
bd50a52b
THD
271
272 def _finish_multiline_status(self):
819e0531 273 self._multiline.end()
274
7578d77d 275 _progress_styles = {
276 'downloaded_bytes': 'light blue',
277 'percent': 'light blue',
278 'eta': 'yellow',
279 'speed': 'green',
280 'elapsed': 'bold white',
281 'total_bytes': '',
282 'total_bytes_estimate': '',
283 }
284
285 def _report_progress_status(self, s, default_template):
286 for name, style in self._progress_styles.items():
287 name = f'_{name}_str'
288 if name not in s:
289 continue
290 s[name] = self._format_progress(s[name], style)
291 s['_default_template'] = default_template % s
292
819e0531 293 progress_dict = s.copy()
294 progress_dict.pop('info_dict')
295 progress_dict = {'info': s['info_dict'], 'progress': progress_dict}
296
297 progress_template = self.params.get('progress_template', {})
298 self._multiline.print_at_line(self.ydl.evaluate_outtmpl(
299 progress_template.get('download') or '[download] %(progress._default_template)s',
300 progress_dict), s.get('progress_idx') or 0)
301 self.to_console_title(self.ydl.evaluate_outtmpl(
302 progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s',
303 progress_dict))
3bc2ddcc 304
7578d77d 305 def _format_progress(self, *args, **kwargs):
306 return self.ydl._format_text(
307 self._multiline.stream, self._multiline.allow_colors, *args, **kwargs)
308
5cda4eda
PH
309 def report_progress(self, s):
310 if s['status'] == 'finished':
819e0531 311 if self.params.get('noprogress'):
5cda4eda 312 self.to_screen('[download] Download completed')
819e0531 313 msg_template = '100%%'
314 if s.get('total_bytes') is not None:
315 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
316 msg_template += ' of %(_total_bytes_str)s'
317 if s.get('elapsed') is not None:
318 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
319 msg_template += ' in %(_elapsed_str)s'
320 s['_percent_str'] = self.format_percent(100)
f304da8a 321 self._report_progress_status(s, msg_template)
3bc2ddcc 322 return
5cda4eda
PH
323
324 if s['status'] != 'downloading':
325 return
326
327 if s.get('eta') is not None:
328 s['_eta_str'] = self.format_eta(s['eta'])
3bc2ddcc 329 else:
f304da8a 330 s['_eta_str'] = 'Unknown'
3bc2ddcc 331
5cda4eda
PH
332 if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
333 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
334 elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
335 s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
336 else:
337 if s.get('downloaded_bytes') == 0:
338 s['_percent_str'] = self.format_percent(0)
339 else:
340 s['_percent_str'] = 'Unknown %'
3bc2ddcc 341
5cda4eda
PH
342 if s.get('speed') is not None:
343 s['_speed_str'] = self.format_speed(s['speed'])
344 else:
345 s['_speed_str'] = 'Unknown speed'
346
347 if s.get('total_bytes') is not None:
348 s['_total_bytes_str'] = format_bytes(s['total_bytes'])
349 msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
350 elif s.get('total_bytes_estimate') is not None:
351 s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
352 msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
3bc2ddcc 353 else:
5cda4eda
PH
354 if s.get('downloaded_bytes') is not None:
355 s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
356 if s.get('elapsed'):
357 s['_elapsed_str'] = self.format_seconds(s['elapsed'])
358 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
359 else:
360 msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
361 else:
f304da8a 362 msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s'
a4211baf 363 if s.get('fragment_index') and s.get('fragment_count'):
364 msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)'
365 elif s.get('fragment_index'):
bd93fd5d 366 msg_template += ' (frag %(fragment_index)s)'
7578d77d 367 self._report_progress_status(s, msg_template)
3bc2ddcc
JMF
368
369 def report_resuming_byte(self, resume_len):
370 """Report attempt to resume at given byte."""
b6b70730 371 self.to_screen('[download] Resuming download at byte %s' % resume_len)
3bc2ddcc 372
a3c3a1e1 373 def report_retry(self, err, count, retries):
3bc2ddcc 374 """Report retry in case of HTTP error 5xx"""
617e58d8 375 self.to_screen(
5ef7d9bd 376 '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...'
a3c3a1e1 377 % (error_to_compat_str(err), count, self.format_retries(retries)))
3bc2ddcc 378
b868936c 379 def report_file_already_downloaded(self, *args, **kwargs):
3bc2ddcc 380 """Report file has already been fully downloaded."""
b868936c 381 return self.ydl.report_file_already_downloaded(*args, **kwargs)
3bc2ddcc
JMF
382
383 def report_unable_to_resume(self):
384 """Report it was impossible to resume download."""
b6b70730 385 self.to_screen('[download] Unable to resume')
3bc2ddcc 386
0a473f2f 387 @staticmethod
388 def supports_manifest(manifest):
389 """ Whether the downloader can download the fragments from the manifest.
390 Redefine in subclasses if needed. """
391 pass
392
9f448fcb 393 def download(self, filename, info_dict, subtitle=False):
3bc2ddcc
JMF
394 """Download to a filename using the info from info_dict
395 Return True on success and False otherwise
396 """
5f0d813d 397
4340deca 398 nooverwrites_and_exists = (
9cc1a313 399 not self.params.get('overwrites', True)
3089bc74 400 and os.path.exists(encodeFilename(filename))
4340deca
P
401 )
402
75a24854
RA
403 if not hasattr(filename, 'write'):
404 continuedl_and_exists = (
3089bc74
S
405 self.params.get('continuedl', True)
406 and os.path.isfile(encodeFilename(filename))
407 and not self.params.get('nopart', False)
75a24854
RA
408 )
409
410 # Check file already present
411 if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
412 self.report_file_already_downloaded(filename)
413 self._hook_progress({
414 'filename': filename,
415 'status': 'finished',
416 'total_bytes': os.path.getsize(encodeFilename(filename)),
3ba7740d 417 }, info_dict)
b69fd25c 418 self._finish_multiline_status()
a9e7f546 419 return True, False
dabc1273 420
9f448fcb
U
421 if subtitle is False:
422 min_sleep_interval = self.params.get('sleep_interval')
423 if min_sleep_interval:
424 max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
425 sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
426 self.to_screen(
5ef7d9bd 427 '[download] Sleeping %s seconds ...' % (
9f448fcb
U
428 int(sleep_interval) if sleep_interval.is_integer()
429 else '%.2f' % sleep_interval))
430 time.sleep(sleep_interval)
431 else:
b860e4cc
NS
432 sleep_interval_sub = 0
433 if type(self.params.get('sleep_interval_subtitles')) is int:
31108ce9 434 sleep_interval_sub = self.params.get('sleep_interval_subtitles')
b860e4cc 435 if sleep_interval_sub > 0:
31108ce9 436 self.to_screen(
5ef7d9bd 437 '[download] Sleeping %s seconds ...' % (
31108ce9
U
438 sleep_interval_sub))
439 time.sleep(sleep_interval_sub)
819e0531 440 ret = self.real_download(filename, info_dict)
441 self._finish_multiline_status()
442 return ret, True
3bc2ddcc
JMF
443
444 def real_download(self, filename, info_dict):
445 """Real download process. Redefine in subclasses."""
b6b70730 446 raise NotImplementedError('This method must be implemented by subclasses')
3bc2ddcc 447
3ba7740d 448 def _hook_progress(self, status, info_dict):
449 if not self._progress_hooks:
450 return
03b4de72 451 status['info_dict'] = info_dict
f45e6c11 452 # youtube-dl passes the same status object to all the hooks.
453 # Some third party scripts seems to be relying on this.
454 # So keep this behavior if possible
3bc2ddcc 455 for ph in self._progress_hooks:
f45e6c11 456 ph(status)
3bc2ddcc
JMF
457
458 def add_progress_hook(self, ph):
71b640cc
PH
459 # See YoutubeDl.py (search for progress_hooks) for a description of
460 # this interface
3bc2ddcc 461 self._progress_hooks.append(ph)
222516d9 462
cd8a07a7 463 def _debug_cmd(self, args, exe=None):
222516d9
PH
464 if not self.params.get('verbose', False):
465 return
466
cd8a07a7
S
467 str_args = [decodeArgument(a) for a in args]
468
222516d9 469 if exe is None:
cd8a07a7 470 exe = os.path.basename(str_args[0])
222516d9 471
0760b0a7 472 self.write_debug('%s command line: %s' % (exe, shell_quote(str_args)))