]>
Commit | Line | Data |
---|---|---|
1 | import contextlib | |
2 | import errno | |
3 | import functools | |
4 | import os | |
5 | import random | |
6 | import re | |
7 | import threading | |
8 | import time | |
9 | ||
10 | from ..minicurses import ( | |
11 | BreaklineStatusPrinter, | |
12 | MultilineLogger, | |
13 | MultilinePrinter, | |
14 | QuietMultilinePrinter, | |
15 | ) | |
16 | from ..utils import ( | |
17 | IDENTITY, | |
18 | NO_DEFAULT, | |
19 | LockingUnsupportedError, | |
20 | Namespace, | |
21 | RetryManager, | |
22 | classproperty, | |
23 | decodeArgument, | |
24 | deprecation_warning, | |
25 | encodeFilename, | |
26 | format_bytes, | |
27 | join_nonempty, | |
28 | parse_bytes, | |
29 | remove_start, | |
30 | sanitize_open, | |
31 | shell_quote, | |
32 | timeconvert, | |
33 | timetuple_from_msec, | |
34 | try_call, | |
35 | ) | |
36 | ||
37 | ||
38 | class FileDownloader: | |
39 | """File Downloader class. | |
40 | ||
41 | File downloader objects are the ones responsible of downloading the | |
42 | actual video file and writing it to disk. | |
43 | ||
44 | File downloaders accept a lot of parameters. In order not to saturate | |
45 | the object constructor with arguments, it receives a dictionary of | |
46 | options instead. | |
47 | ||
48 | Available options: | |
49 | ||
50 | verbose: Print additional info to stdout. | |
51 | quiet: Do not print messages to stdout. | |
52 | ratelimit: Download speed limit, in bytes/sec. | |
53 | throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) | |
54 | retries: Number of times to retry for expected network errors. | |
55 | Default is 0 for API, but 10 for CLI | |
56 | file_access_retries: Number of times to retry on file access error (default: 3) | |
57 | buffersize: Size of download buffer in bytes. | |
58 | noresizebuffer: Do not automatically resize the download buffer. | |
59 | continuedl: Try to continue downloads if possible. | |
60 | noprogress: Do not print the progress bar. | |
61 | nopart: Do not use temporary .part files. | |
62 | updatetime: Use the Last-modified header to set output file timestamps. | |
63 | test: Download only first bytes to test the downloader. | |
64 | min_filesize: Skip files smaller than this size | |
65 | max_filesize: Skip files larger than this size | |
66 | xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. | |
67 | progress_delta: The minimum time between progress output, in seconds | |
68 | external_downloader_args: A dictionary of downloader keys (in lower case) | |
69 | and a list of additional command-line arguments for the | |
70 | executable. Use 'default' as the name for arguments to be | |
71 | passed to all downloaders. For compatibility with youtube-dl, | |
72 | a single list of args can also be used | |
73 | hls_use_mpegts: Use the mpegts container for HLS videos. | |
74 | http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be | |
75 | useful for bypassing bandwidth throttling imposed by | |
76 | a webserver (experimental) | |
77 | progress_template: See YoutubeDL.py | |
78 | retry_sleep_functions: See YoutubeDL.py | |
79 | ||
80 | Subclasses of this one must re-define the real_download method. | |
81 | """ | |
82 | ||
83 | _TEST_FILE_SIZE = 10241 | |
84 | params = None | |
85 | ||
86 | def __init__(self, ydl, params): | |
87 | """Create a FileDownloader object with the given options.""" | |
88 | self._set_ydl(ydl) | |
89 | self._progress_hooks = [] | |
90 | self.params = params | |
91 | self._prepare_multiline_status() | |
92 | self.add_progress_hook(self.report_progress) | |
93 | if self.params.get('progress_delta'): | |
94 | self._progress_delta_lock = threading.Lock() | |
95 | self._progress_delta_time = time.monotonic() | |
96 | ||
97 | def _set_ydl(self, ydl): | |
98 | self.ydl = ydl | |
99 | ||
100 | for func in ( | |
101 | 'deprecation_warning', | |
102 | 'deprecated_feature', | |
103 | 'report_error', | |
104 | 'report_file_already_downloaded', | |
105 | 'report_warning', | |
106 | 'to_console_title', | |
107 | 'to_stderr', | |
108 | 'trouble', | |
109 | 'write_debug', | |
110 | ): | |
111 | if not hasattr(self, func): | |
112 | setattr(self, func, getattr(ydl, func)) | |
113 | ||
114 | def to_screen(self, *args, **kargs): | |
115 | self.ydl.to_screen(*args, quiet=self.params.get('quiet'), **kargs) | |
116 | ||
117 | __to_screen = to_screen | |
118 | ||
119 | @classproperty | |
120 | def FD_NAME(cls): | |
121 | return re.sub(r'(?<=[a-z])(?=[A-Z])', '_', cls.__name__[:-2]).lower() | |
122 | ||
123 | @staticmethod | |
124 | def format_seconds(seconds): | |
125 | if seconds is None: | |
126 | return ' Unknown' | |
127 | time = timetuple_from_msec(seconds * 1000) | |
128 | if time.hours > 99: | |
129 | return '--:--:--' | |
130 | return '%02d:%02d:%02d' % time[:-1] | |
131 | ||
132 | @classmethod | |
133 | def format_eta(cls, seconds): | |
134 | return f'{remove_start(cls.format_seconds(seconds), "00:"):>8s}' | |
135 | ||
136 | @staticmethod | |
137 | def calc_percent(byte_counter, data_len): | |
138 | if data_len is None: | |
139 | return None | |
140 | return float(byte_counter) / float(data_len) * 100.0 | |
141 | ||
142 | @staticmethod | |
143 | def format_percent(percent): | |
144 | return ' N/A%' if percent is None else f'{percent:>5.1f}%' | |
145 | ||
146 | @classmethod | |
147 | def calc_eta(cls, start_or_rate, now_or_remaining, total=NO_DEFAULT, current=NO_DEFAULT): | |
148 | if total is NO_DEFAULT: | |
149 | rate, remaining = start_or_rate, now_or_remaining | |
150 | if None in (rate, remaining): | |
151 | return None | |
152 | return int(float(remaining) / rate) | |
153 | ||
154 | start, now = start_or_rate, now_or_remaining | |
155 | if total is None: | |
156 | return None | |
157 | if now is None: | |
158 | now = time.time() | |
159 | rate = cls.calc_speed(start, now, current) | |
160 | return rate and int((float(total) - float(current)) / rate) | |
161 | ||
162 | @staticmethod | |
163 | def calc_speed(start, now, bytes): | |
164 | dif = now - start | |
165 | if bytes == 0 or dif < 0.001: # One millisecond | |
166 | return None | |
167 | return float(bytes) / dif | |
168 | ||
169 | @staticmethod | |
170 | def format_speed(speed): | |
171 | return ' Unknown B/s' if speed is None else f'{format_bytes(speed):>10s}/s' | |
172 | ||
173 | @staticmethod | |
174 | def format_retries(retries): | |
175 | return 'inf' if retries == float('inf') else int(retries) | |
176 | ||
177 | @staticmethod | |
178 | def filesize_or_none(unencoded_filename): | |
179 | if os.path.isfile(unencoded_filename): | |
180 | return os.path.getsize(unencoded_filename) | |
181 | return 0 | |
182 | ||
183 | @staticmethod | |
184 | def best_block_size(elapsed_time, bytes): | |
185 | new_min = max(bytes / 2.0, 1.0) | |
186 | new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB | |
187 | if elapsed_time < 0.001: | |
188 | return int(new_max) | |
189 | rate = bytes / elapsed_time | |
190 | if rate > new_max: | |
191 | return int(new_max) | |
192 | if rate < new_min: | |
193 | return int(new_min) | |
194 | return int(rate) | |
195 | ||
196 | @staticmethod | |
197 | def parse_bytes(bytestr): | |
198 | """Parse a string indicating a byte quantity into an integer.""" | |
199 | deprecation_warning('yt_dlp.FileDownloader.parse_bytes is deprecated and ' | |
200 | 'may be removed in the future. Use yt_dlp.utils.parse_bytes instead') | |
201 | return parse_bytes(bytestr) | |
202 | ||
203 | def slow_down(self, start_time, now, byte_counter): | |
204 | """Sleep if the download speed is over the rate limit.""" | |
205 | rate_limit = self.params.get('ratelimit') | |
206 | if rate_limit is None or byte_counter == 0: | |
207 | return | |
208 | if now is None: | |
209 | now = time.time() | |
210 | elapsed = now - start_time | |
211 | if elapsed <= 0.0: | |
212 | return | |
213 | speed = float(byte_counter) / elapsed | |
214 | if speed > rate_limit: | |
215 | sleep_time = float(byte_counter) / rate_limit - elapsed | |
216 | if sleep_time > 0: | |
217 | time.sleep(sleep_time) | |
218 | ||
219 | def temp_name(self, filename): | |
220 | """Returns a temporary filename for the given filename.""" | |
221 | if self.params.get('nopart', False) or filename == '-' or \ | |
222 | (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): | |
223 | return filename | |
224 | return filename + '.part' | |
225 | ||
226 | def undo_temp_name(self, filename): | |
227 | if filename.endswith('.part'): | |
228 | return filename[:-len('.part')] | |
229 | return filename | |
230 | ||
231 | def ytdl_filename(self, filename): | |
232 | return filename + '.ytdl' | |
233 | ||
234 | def wrap_file_access(action, *, fatal=False): | |
235 | def error_callback(err, count, retries, *, fd): | |
236 | return RetryManager.report_retry( | |
237 | err, count, retries, info=fd.__to_screen, | |
238 | warn=lambda e: (time.sleep(0.01), fd.to_screen(f'[download] Unable to {action} file: {e}')), | |
239 | error=None if fatal else lambda e: fd.report_error(f'Unable to {action} file: {e}'), | |
240 | sleep_func=fd.params.get('retry_sleep_functions', {}).get('file_access')) | |
241 | ||
242 | def wrapper(self, func, *args, **kwargs): | |
243 | for retry in RetryManager(self.params.get('file_access_retries', 3), error_callback, fd=self): | |
244 | try: | |
245 | return func(self, *args, **kwargs) | |
246 | except OSError as err: | |
247 | if err.errno in (errno.EACCES, errno.EINVAL): | |
248 | retry.error = err | |
249 | continue | |
250 | retry.error_callback(err, 1, 0) | |
251 | ||
252 | return functools.partial(functools.partialmethod, wrapper) | |
253 | ||
254 | @wrap_file_access('open', fatal=True) | |
255 | def sanitize_open(self, filename, open_mode): | |
256 | f, filename = sanitize_open(filename, open_mode) | |
257 | if not getattr(f, 'locked', None): | |
258 | self.write_debug(f'{LockingUnsupportedError.msg}. Proceeding without locking', only_once=True) | |
259 | return f, filename | |
260 | ||
261 | @wrap_file_access('remove') | |
262 | def try_remove(self, filename): | |
263 | if os.path.isfile(filename): | |
264 | os.remove(filename) | |
265 | ||
266 | @wrap_file_access('rename') | |
267 | def try_rename(self, old_filename, new_filename): | |
268 | if old_filename == new_filename: | |
269 | return | |
270 | os.replace(old_filename, new_filename) | |
271 | ||
272 | def try_utime(self, filename, last_modified_hdr): | |
273 | """Try to set the last-modified time of the given file.""" | |
274 | if last_modified_hdr is None: | |
275 | return | |
276 | if not os.path.isfile(encodeFilename(filename)): | |
277 | return | |
278 | timestr = last_modified_hdr | |
279 | if timestr is None: | |
280 | return | |
281 | filetime = timeconvert(timestr) | |
282 | if filetime is None: | |
283 | return filetime | |
284 | # Ignore obviously invalid dates | |
285 | if filetime == 0: | |
286 | return | |
287 | with contextlib.suppress(Exception): | |
288 | os.utime(filename, (time.time(), filetime)) | |
289 | return filetime | |
290 | ||
291 | def report_destination(self, filename): | |
292 | """Report destination filename.""" | |
293 | self.to_screen('[download] Destination: ' + filename) | |
294 | ||
295 | def _prepare_multiline_status(self, lines=1): | |
296 | if self.params.get('noprogress'): | |
297 | self._multiline = QuietMultilinePrinter() | |
298 | elif self.ydl.params.get('logger'): | |
299 | self._multiline = MultilineLogger(self.ydl.params['logger'], lines) | |
300 | elif self.params.get('progress_with_newline'): | |
301 | self._multiline = BreaklineStatusPrinter(self.ydl._out_files.out, lines) | |
302 | else: | |
303 | self._multiline = MultilinePrinter(self.ydl._out_files.out, lines, not self.params.get('quiet')) | |
304 | self._multiline.allow_colors = self.ydl._allow_colors.out and self.ydl._allow_colors.out != 'no_color' | |
305 | self._multiline._HAVE_FULLCAP = self.ydl._allow_colors.out | |
306 | ||
307 | def _finish_multiline_status(self): | |
308 | self._multiline.end() | |
309 | ||
310 | ProgressStyles = Namespace( | |
311 | downloaded_bytes='light blue', | |
312 | percent='light blue', | |
313 | eta='yellow', | |
314 | speed='green', | |
315 | elapsed='bold white', | |
316 | total_bytes='', | |
317 | total_bytes_estimate='', | |
318 | ) | |
319 | ||
320 | def _report_progress_status(self, s, default_template): | |
321 | for name, style in self.ProgressStyles.items_: | |
322 | name = f'_{name}_str' | |
323 | if name not in s: | |
324 | continue | |
325 | s[name] = self._format_progress(s[name], style) | |
326 | s['_default_template'] = default_template % s | |
327 | ||
328 | progress_dict = s.copy() | |
329 | progress_dict.pop('info_dict') | |
330 | progress_dict = {'info': s['info_dict'], 'progress': progress_dict} | |
331 | ||
332 | progress_template = self.params.get('progress_template', {}) | |
333 | self._multiline.print_at_line(self.ydl.evaluate_outtmpl( | |
334 | progress_template.get('download') or '[download] %(progress._default_template)s', | |
335 | progress_dict), s.get('progress_idx') or 0) | |
336 | self.to_console_title(self.ydl.evaluate_outtmpl( | |
337 | progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', | |
338 | progress_dict)) | |
339 | ||
340 | def _format_progress(self, *args, **kwargs): | |
341 | return self.ydl._format_text( | |
342 | self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) | |
343 | ||
344 | def report_progress(self, s): | |
345 | def with_fields(*tups, default=''): | |
346 | for *fields, tmpl in tups: | |
347 | if all(s.get(f) is not None for f in fields): | |
348 | return tmpl | |
349 | return default | |
350 | ||
351 | _format_bytes = lambda k: f'{format_bytes(s.get(k)):>10s}' | |
352 | ||
353 | if s['status'] == 'finished': | |
354 | if self.params.get('noprogress'): | |
355 | self.to_screen('[download] Download completed') | |
356 | speed = try_call(lambda: s['total_bytes'] / s['elapsed']) | |
357 | s.update({ | |
358 | 'speed': speed, | |
359 | '_speed_str': self.format_speed(speed).strip(), | |
360 | '_total_bytes_str': _format_bytes('total_bytes'), | |
361 | '_elapsed_str': self.format_seconds(s.get('elapsed')), | |
362 | '_percent_str': self.format_percent(100), | |
363 | }) | |
364 | self._report_progress_status(s, join_nonempty( | |
365 | '100%%', | |
366 | with_fields(('total_bytes', 'of %(_total_bytes_str)s')), | |
367 | with_fields(('elapsed', 'in %(_elapsed_str)s')), | |
368 | with_fields(('speed', 'at %(_speed_str)s')), | |
369 | delim=' ')) | |
370 | ||
371 | if s['status'] != 'downloading': | |
372 | return | |
373 | ||
374 | if update_delta := self.params.get('progress_delta'): | |
375 | with self._progress_delta_lock: | |
376 | if time.monotonic() < self._progress_delta_time: | |
377 | return | |
378 | self._progress_delta_time += update_delta | |
379 | ||
380 | s.update({ | |
381 | '_eta_str': self.format_eta(s.get('eta')).strip(), | |
382 | '_speed_str': self.format_speed(s.get('speed')), | |
383 | '_percent_str': self.format_percent(try_call( | |
384 | lambda: 100 * s['downloaded_bytes'] / s['total_bytes'], | |
385 | lambda: 100 * s['downloaded_bytes'] / s['total_bytes_estimate'], | |
386 | lambda: s['downloaded_bytes'] == 0 and 0)), | |
387 | '_total_bytes_str': _format_bytes('total_bytes'), | |
388 | '_total_bytes_estimate_str': _format_bytes('total_bytes_estimate'), | |
389 | '_downloaded_bytes_str': _format_bytes('downloaded_bytes'), | |
390 | '_elapsed_str': self.format_seconds(s.get('elapsed')), | |
391 | }) | |
392 | ||
393 | msg_template = with_fields( | |
394 | ('total_bytes', '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'), | |
395 | ('total_bytes_estimate', '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'), | |
396 | ('downloaded_bytes', 'elapsed', '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'), | |
397 | ('downloaded_bytes', '%(_downloaded_bytes_str)s at %(_speed_str)s'), | |
398 | default='%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s') | |
399 | ||
400 | msg_template += with_fields( | |
401 | ('fragment_index', 'fragment_count', ' (frag %(fragment_index)s/%(fragment_count)s)'), | |
402 | ('fragment_index', ' (frag %(fragment_index)s)')) | |
403 | self._report_progress_status(s, msg_template) | |
404 | ||
405 | def report_resuming_byte(self, resume_len): | |
406 | """Report attempt to resume at given byte.""" | |
407 | self.to_screen(f'[download] Resuming download at byte {resume_len}') | |
408 | ||
409 | def report_retry(self, err, count, retries, frag_index=NO_DEFAULT, fatal=True): | |
410 | """Report retry""" | |
411 | is_frag = False if frag_index is NO_DEFAULT else 'fragment' | |
412 | RetryManager.report_retry( | |
413 | err, count, retries, info=self.__to_screen, | |
414 | warn=lambda msg: self.__to_screen(f'[download] Got error: {msg}'), | |
415 | error=IDENTITY if not fatal else lambda e: self.report_error(f'\r[download] Got error: {e}'), | |
416 | sleep_func=self.params.get('retry_sleep_functions', {}).get(is_frag or 'http'), | |
417 | suffix=f'fragment{"s" if frag_index is None else f" {frag_index}"}' if is_frag else None) | |
418 | ||
419 | def report_unable_to_resume(self): | |
420 | """Report it was impossible to resume download.""" | |
421 | self.to_screen('[download] Unable to resume') | |
422 | ||
423 | @staticmethod | |
424 | def supports_manifest(manifest): | |
425 | """ Whether the downloader can download the fragments from the manifest. | |
426 | Redefine in subclasses if needed. """ | |
427 | pass | |
428 | ||
429 | def download(self, filename, info_dict, subtitle=False): | |
430 | """Download to a filename using the info from info_dict | |
431 | Return True on success and False otherwise | |
432 | """ | |
433 | nooverwrites_and_exists = ( | |
434 | not self.params.get('overwrites', True) | |
435 | and os.path.exists(encodeFilename(filename)) | |
436 | ) | |
437 | ||
438 | if not hasattr(filename, 'write'): | |
439 | continuedl_and_exists = ( | |
440 | self.params.get('continuedl', True) | |
441 | and os.path.isfile(encodeFilename(filename)) | |
442 | and not self.params.get('nopart', False) | |
443 | ) | |
444 | ||
445 | # Check file already present | |
446 | if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): | |
447 | self.report_file_already_downloaded(filename) | |
448 | self._hook_progress({ | |
449 | 'filename': filename, | |
450 | 'status': 'finished', | |
451 | 'total_bytes': os.path.getsize(encodeFilename(filename)), | |
452 | }, info_dict) | |
453 | self._finish_multiline_status() | |
454 | return True, False | |
455 | ||
456 | if subtitle: | |
457 | sleep_interval = self.params.get('sleep_interval_subtitles') or 0 | |
458 | else: | |
459 | min_sleep_interval = self.params.get('sleep_interval') or 0 | |
460 | sleep_interval = random.uniform( | |
461 | min_sleep_interval, self.params.get('max_sleep_interval') or min_sleep_interval) | |
462 | if sleep_interval > 0: | |
463 | self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...') | |
464 | time.sleep(sleep_interval) | |
465 | ||
466 | ret = self.real_download(filename, info_dict) | |
467 | self._finish_multiline_status() | |
468 | return ret, True | |
469 | ||
470 | def real_download(self, filename, info_dict): | |
471 | """Real download process. Redefine in subclasses.""" | |
472 | raise NotImplementedError('This method must be implemented by subclasses') | |
473 | ||
474 | def _hook_progress(self, status, info_dict): | |
475 | # Ideally we want to make a copy of the dict, but that is too slow | |
476 | status['info_dict'] = info_dict | |
477 | # youtube-dl passes the same status object to all the hooks. | |
478 | # Some third party scripts seems to be relying on this. | |
479 | # So keep this behavior if possible | |
480 | for ph in self._progress_hooks: | |
481 | ph(status) | |
482 | ||
483 | def add_progress_hook(self, ph): | |
484 | # See YoutubeDl.py (search for progress_hooks) for a description of | |
485 | # this interface | |
486 | self._progress_hooks.append(ph) | |
487 | ||
488 | def _debug_cmd(self, args, exe=None): | |
489 | if not self.params.get('verbose', False): | |
490 | return | |
491 | ||
492 | str_args = [decodeArgument(a) for a in args] | |
493 | ||
494 | if exe is None: | |
495 | exe = os.path.basename(str_args[0]) | |
496 | ||
497 | self.write_debug(f'{exe} command line: {shell_quote(str_args)}') |