]>
Commit | Line | Data |
---|---|---|
5cda4eda | 1 | from __future__ import division, unicode_literals |
b6b70730 | 2 | |
3bc2ddcc JMF |
3 | import os |
4 | import re | |
3bc2ddcc | 5 | import time |
065bc354 | 6 | import random |
205a0654 | 7 | import errno |
3bc2ddcc JMF |
8 | |
9 | from ..utils import ( | |
1433734c | 10 | decodeArgument, |
3bc2ddcc | 11 | encodeFilename, |
9b9c5355 | 12 | error_to_compat_str, |
3bc2ddcc | 13 | format_bytes, |
205a0654 | 14 | sanitize_open, |
1433734c | 15 | shell_quote, |
e3ced9ed | 16 | timeconvert, |
aa7785f8 | 17 | timetuple_from_msec, |
3bc2ddcc | 18 | ) |
bd50a52b | 19 | from ..minicurses import ( |
819e0531 | 20 | MultilineLogger, |
bd50a52b THD |
21 | MultilinePrinter, |
22 | QuietMultilinePrinter, | |
23 | BreaklineStatusPrinter | |
24 | ) | |
3bc2ddcc JMF |
25 | |
26 | ||
27 | class FileDownloader(object): | |
28 | """File Downloader class. | |
29 | ||
30 | File downloader objects are the ones responsible of downloading the | |
31 | actual video file and writing it to disk. | |
32 | ||
33 | File downloaders accept a lot of parameters. In order not to saturate | |
34 | the object constructor with arguments, it receives a dictionary of | |
35 | options instead. | |
36 | ||
37 | Available options: | |
38 | ||
881e6a1f PH |
39 | verbose: Print additional info to stdout. |
40 | quiet: Do not print messages to stdout. | |
41 | ratelimit: Download speed limit, in bytes/sec. | |
51d9739f | 42 | throttledratelimit: Assume the download is being throttled below this speed (bytes/sec) |
881e6a1f | 43 | retries: Number of times to retry for HTTP error 5xx |
205a0654 | 44 | file_access_retries: Number of times to retry on file access error |
881e6a1f PH |
45 | buffersize: Size of download buffer in bytes. |
46 | noresizebuffer: Do not automatically resize the download buffer. | |
47 | continuedl: Try to continue downloads if possible. | |
48 | noprogress: Do not print the progress bar. | |
881e6a1f PH |
49 | nopart: Do not use temporary .part files. |
50 | updatetime: Use the Last-modified header to set output file timestamps. | |
51 | test: Download only first bytes to test the downloader. | |
52 | min_filesize: Skip files smaller than this size | |
53 | max_filesize: Skip files larger than this size | |
54 | xattr_set_filesize: Set ytdl.filesize user xattribute with expected size. | |
34488702 | 55 | external_downloader_args: A dictionary of downloader keys (in lower case) |
56 | and a list of additional command-line arguments for the | |
57 | executable. Use 'default' as the name for arguments to be | |
58 | passed to all downloaders. For compatibility with youtube-dl, | |
59 | a single list of args can also be used | |
7d106a65 | 60 | hls_use_mpegts: Use the mpegts container for HLS videos. |
073cca3d | 61 | http_chunk_size: Size of a chunk for chunk-based HTTP downloading. May be |
b54d4a5c S |
62 | useful for bypassing bandwidth throttling imposed by |
63 | a webserver (experimental) | |
819e0531 | 64 | progress_template: See YoutubeDL.py |
3bc2ddcc JMF |
65 | |
66 | Subclasses of this one must re-define the real_download method. | |
67 | """ | |
68 | ||
b686fc18 | 69 | _TEST_FILE_SIZE = 10241 |
3bc2ddcc JMF |
70 | params = None |
71 | ||
72 | def __init__(self, ydl, params): | |
73 | """Create a FileDownloader object with the given options.""" | |
74 | self.ydl = ydl | |
75 | self._progress_hooks = [] | |
76 | self.params = params | |
819e0531 | 77 | self._prepare_multiline_status() |
5cda4eda | 78 | self.add_progress_hook(self.report_progress) |
3bc2ddcc JMF |
79 | |
80 | @staticmethod | |
81 | def format_seconds(seconds): | |
aa7785f8 | 82 | time = timetuple_from_msec(seconds * 1000) |
83 | if time.hours > 99: | |
3bc2ddcc | 84 | return '--:--:--' |
aa7785f8 | 85 | if not time.hours: |
86 | return '%02d:%02d' % time[1:-1] | |
87 | return '%02d:%02d:%02d' % time[:-1] | |
3bc2ddcc JMF |
88 | |
89 | @staticmethod | |
90 | def calc_percent(byte_counter, data_len): | |
91 | if data_len is None: | |
92 | return None | |
93 | return float(byte_counter) / float(data_len) * 100.0 | |
94 | ||
95 | @staticmethod | |
96 | def format_percent(percent): | |
97 | if percent is None: | |
98 | return '---.-%' | |
f304da8a | 99 | elif percent == 100: |
100 | return '100%' | |
3bc2ddcc JMF |
101 | return '%6s' % ('%3.1f%%' % percent) |
102 | ||
103 | @staticmethod | |
104 | def calc_eta(start, now, total, current): | |
105 | if total is None: | |
106 | return None | |
c7667c2d S |
107 | if now is None: |
108 | now = time.time() | |
3bc2ddcc | 109 | dif = now - start |
5f6a1245 | 110 | if current == 0 or dif < 0.001: # One millisecond |
3bc2ddcc JMF |
111 | return None |
112 | rate = float(current) / dif | |
113 | return int((float(total) - float(current)) / rate) | |
114 | ||
115 | @staticmethod | |
116 | def format_eta(eta): | |
117 | if eta is None: | |
118 | return '--:--' | |
119 | return FileDownloader.format_seconds(eta) | |
120 | ||
121 | @staticmethod | |
122 | def calc_speed(start, now, bytes): | |
123 | dif = now - start | |
5f6a1245 | 124 | if bytes == 0 or dif < 0.001: # One millisecond |
3bc2ddcc JMF |
125 | return None |
126 | return float(bytes) / dif | |
127 | ||
128 | @staticmethod | |
129 | def format_speed(speed): | |
130 | if speed is None: | |
131 | return '%10s' % '---b/s' | |
132 | return '%10s' % ('%s/s' % format_bytes(speed)) | |
133 | ||
617e58d8 S |
134 | @staticmethod |
135 | def format_retries(retries): | |
136 | return 'inf' if retries == float('inf') else '%.0f' % retries | |
137 | ||
3bc2ddcc JMF |
138 | @staticmethod |
139 | def best_block_size(elapsed_time, bytes): | |
140 | new_min = max(bytes / 2.0, 1.0) | |
5f6a1245 | 141 | new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB |
3bc2ddcc JMF |
142 | if elapsed_time < 0.001: |
143 | return int(new_max) | |
144 | rate = bytes / elapsed_time | |
145 | if rate > new_max: | |
146 | return int(new_max) | |
147 | if rate < new_min: | |
148 | return int(new_min) | |
149 | return int(rate) | |
150 | ||
151 | @staticmethod | |
152 | def parse_bytes(bytestr): | |
153 | """Parse a string indicating a byte quantity into an integer.""" | |
154 | matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) | |
155 | if matchobj is None: | |
156 | return None | |
157 | number = float(matchobj.group(1)) | |
158 | multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | |
159 | return int(round(number * multiplier)) | |
160 | ||
161 | def to_screen(self, *args, **kargs): | |
848887eb | 162 | self.ydl.to_stdout(*args, quiet=self.params.get('quiet'), **kargs) |
3bc2ddcc JMF |
163 | |
164 | def to_stderr(self, message): | |
848887eb | 165 | self.ydl.to_stderr(message) |
3bc2ddcc JMF |
166 | |
167 | def to_console_title(self, message): | |
168 | self.ydl.to_console_title(message) | |
169 | ||
170 | def trouble(self, *args, **kargs): | |
171 | self.ydl.trouble(*args, **kargs) | |
172 | ||
173 | def report_warning(self, *args, **kargs): | |
174 | self.ydl.report_warning(*args, **kargs) | |
175 | ||
176 | def report_error(self, *args, **kargs): | |
177 | self.ydl.report_error(*args, **kargs) | |
178 | ||
856bb8f9 | 179 | def write_debug(self, *args, **kargs): |
180 | self.ydl.write_debug(*args, **kargs) | |
181 | ||
c7667c2d | 182 | def slow_down(self, start_time, now, byte_counter): |
3bc2ddcc | 183 | """Sleep if the download speed is over the rate limit.""" |
d800609c | 184 | rate_limit = self.params.get('ratelimit') |
8a77e5e6 | 185 | if rate_limit is None or byte_counter == 0: |
3bc2ddcc | 186 | return |
c7667c2d S |
187 | if now is None: |
188 | now = time.time() | |
3bc2ddcc JMF |
189 | elapsed = now - start_time |
190 | if elapsed <= 0.0: | |
191 | return | |
192 | speed = float(byte_counter) / elapsed | |
8a77e5e6 | 193 | if speed > rate_limit: |
1a01639b S |
194 | sleep_time = float(byte_counter) / rate_limit - elapsed |
195 | if sleep_time > 0: | |
196 | time.sleep(sleep_time) | |
3bc2ddcc JMF |
197 | |
198 | def temp_name(self, filename): | |
199 | """Returns a temporary filename for the given filename.""" | |
b6b70730 | 200 | if self.params.get('nopart', False) or filename == '-' or \ |
3bc2ddcc JMF |
201 | (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): |
202 | return filename | |
b6b70730 | 203 | return filename + '.part' |
3bc2ddcc JMF |
204 | |
205 | def undo_temp_name(self, filename): | |
b6b70730 PH |
206 | if filename.endswith('.part'): |
207 | return filename[:-len('.part')] | |
3bc2ddcc JMF |
208 | return filename |
209 | ||
ea0c2f21 RA |
210 | def ytdl_filename(self, filename): |
211 | return filename + '.ytdl' | |
212 | ||
205a0654 EH |
213 | def sanitize_open(self, filename, open_mode): |
214 | file_access_retries = self.params.get('file_access_retries', 10) | |
215 | retry = 0 | |
216 | while True: | |
217 | try: | |
218 | return sanitize_open(filename, open_mode) | |
219 | except (IOError, OSError) as err: | |
220 | retry = retry + 1 | |
221 | if retry > file_access_retries or err.errno not in (errno.EACCES,): | |
222 | raise | |
223 | self.to_screen( | |
224 | '[download] Got file access error. Retrying (attempt %d of %s) ...' | |
225 | % (retry, self.format_retries(file_access_retries))) | |
226 | time.sleep(0.01) | |
227 | ||
3bc2ddcc | 228 | def try_rename(self, old_filename, new_filename): |
f775c831 | 229 | if old_filename == new_filename: |
230 | return | |
3bc2ddcc | 231 | try: |
d75201a8 | 232 | os.replace(old_filename, new_filename) |
3bc2ddcc | 233 | except (IOError, OSError) as err: |
d75201a8 | 234 | self.report_error(f'unable to rename file: {err}') |
3bc2ddcc JMF |
235 | |
236 | def try_utime(self, filename, last_modified_hdr): | |
237 | """Try to set the last-modified time of the given file.""" | |
238 | if last_modified_hdr is None: | |
239 | return | |
240 | if not os.path.isfile(encodeFilename(filename)): | |
241 | return | |
242 | timestr = last_modified_hdr | |
243 | if timestr is None: | |
244 | return | |
245 | filetime = timeconvert(timestr) | |
246 | if filetime is None: | |
247 | return filetime | |
248 | # Ignore obviously invalid dates | |
249 | if filetime == 0: | |
250 | return | |
251 | try: | |
252 | os.utime(filename, (time.time(), filetime)) | |
70a1165b | 253 | except Exception: |
3bc2ddcc JMF |
254 | pass |
255 | return filetime | |
256 | ||
257 | def report_destination(self, filename): | |
258 | """Report destination filename.""" | |
b6b70730 | 259 | self.to_screen('[download] Destination: ' + filename) |
3bc2ddcc | 260 | |
819e0531 | 261 | def _prepare_multiline_status(self, lines=1): |
262 | if self.params.get('noprogress'): | |
bd50a52b | 263 | self._multiline = QuietMultilinePrinter() |
819e0531 | 264 | elif self.ydl.params.get('logger'): |
265 | self._multiline = MultilineLogger(self.ydl.params['logger'], lines) | |
266 | elif self.params.get('progress_with_newline'): | |
d1d5c08f | 267 | self._multiline = BreaklineStatusPrinter(self.ydl._screen_file, lines) |
bd50a52b | 268 | else: |
d1d5c08f | 269 | self._multiline = MultilinePrinter(self.ydl._screen_file, lines, not self.params.get('quiet')) |
7578d77d | 270 | self._multiline.allow_colors = self._multiline._HAVE_FULLCAP and not self.params.get('no_color') |
bd50a52b THD |
271 | |
272 | def _finish_multiline_status(self): | |
819e0531 | 273 | self._multiline.end() |
274 | ||
7578d77d | 275 | _progress_styles = { |
276 | 'downloaded_bytes': 'light blue', | |
277 | 'percent': 'light blue', | |
278 | 'eta': 'yellow', | |
279 | 'speed': 'green', | |
280 | 'elapsed': 'bold white', | |
281 | 'total_bytes': '', | |
282 | 'total_bytes_estimate': '', | |
283 | } | |
284 | ||
285 | def _report_progress_status(self, s, default_template): | |
286 | for name, style in self._progress_styles.items(): | |
287 | name = f'_{name}_str' | |
288 | if name not in s: | |
289 | continue | |
290 | s[name] = self._format_progress(s[name], style) | |
291 | s['_default_template'] = default_template % s | |
292 | ||
819e0531 | 293 | progress_dict = s.copy() |
294 | progress_dict.pop('info_dict') | |
295 | progress_dict = {'info': s['info_dict'], 'progress': progress_dict} | |
296 | ||
297 | progress_template = self.params.get('progress_template', {}) | |
298 | self._multiline.print_at_line(self.ydl.evaluate_outtmpl( | |
299 | progress_template.get('download') or '[download] %(progress._default_template)s', | |
300 | progress_dict), s.get('progress_idx') or 0) | |
301 | self.to_console_title(self.ydl.evaluate_outtmpl( | |
302 | progress_template.get('download-title') or 'yt-dlp %(progress._default_template)s', | |
303 | progress_dict)) | |
3bc2ddcc | 304 | |
7578d77d | 305 | def _format_progress(self, *args, **kwargs): |
306 | return self.ydl._format_text( | |
307 | self._multiline.stream, self._multiline.allow_colors, *args, **kwargs) | |
308 | ||
5cda4eda PH |
309 | def report_progress(self, s): |
310 | if s['status'] == 'finished': | |
819e0531 | 311 | if self.params.get('noprogress'): |
5cda4eda | 312 | self.to_screen('[download] Download completed') |
819e0531 | 313 | msg_template = '100%%' |
314 | if s.get('total_bytes') is not None: | |
315 | s['_total_bytes_str'] = format_bytes(s['total_bytes']) | |
316 | msg_template += ' of %(_total_bytes_str)s' | |
317 | if s.get('elapsed') is not None: | |
318 | s['_elapsed_str'] = self.format_seconds(s['elapsed']) | |
319 | msg_template += ' in %(_elapsed_str)s' | |
320 | s['_percent_str'] = self.format_percent(100) | |
f304da8a | 321 | self._report_progress_status(s, msg_template) |
3bc2ddcc | 322 | return |
5cda4eda PH |
323 | |
324 | if s['status'] != 'downloading': | |
325 | return | |
326 | ||
327 | if s.get('eta') is not None: | |
328 | s['_eta_str'] = self.format_eta(s['eta']) | |
3bc2ddcc | 329 | else: |
f304da8a | 330 | s['_eta_str'] = 'Unknown' |
3bc2ddcc | 331 | |
5cda4eda PH |
332 | if s.get('total_bytes') and s.get('downloaded_bytes') is not None: |
333 | s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes']) | |
334 | elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None: | |
335 | s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate']) | |
336 | else: | |
337 | if s.get('downloaded_bytes') == 0: | |
338 | s['_percent_str'] = self.format_percent(0) | |
339 | else: | |
340 | s['_percent_str'] = 'Unknown %' | |
3bc2ddcc | 341 | |
5cda4eda PH |
342 | if s.get('speed') is not None: |
343 | s['_speed_str'] = self.format_speed(s['speed']) | |
344 | else: | |
345 | s['_speed_str'] = 'Unknown speed' | |
346 | ||
347 | if s.get('total_bytes') is not None: | |
348 | s['_total_bytes_str'] = format_bytes(s['total_bytes']) | |
349 | msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s' | |
350 | elif s.get('total_bytes_estimate') is not None: | |
351 | s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate']) | |
352 | msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s' | |
3bc2ddcc | 353 | else: |
5cda4eda PH |
354 | if s.get('downloaded_bytes') is not None: |
355 | s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes']) | |
356 | if s.get('elapsed'): | |
357 | s['_elapsed_str'] = self.format_seconds(s['elapsed']) | |
358 | msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)' | |
359 | else: | |
360 | msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s' | |
361 | else: | |
f304da8a | 362 | msg_template = '%(_percent_str)s at %(_speed_str)s ETA %(_eta_str)s' |
a4211baf | 363 | if s.get('fragment_index') and s.get('fragment_count'): |
364 | msg_template += ' (frag %(fragment_index)s/%(fragment_count)s)' | |
365 | elif s.get('fragment_index'): | |
bd93fd5d | 366 | msg_template += ' (frag %(fragment_index)s)' |
7578d77d | 367 | self._report_progress_status(s, msg_template) |
3bc2ddcc JMF |
368 | |
369 | def report_resuming_byte(self, resume_len): | |
370 | """Report attempt to resume at given byte.""" | |
b6b70730 | 371 | self.to_screen('[download] Resuming download at byte %s' % resume_len) |
3bc2ddcc | 372 | |
a3c3a1e1 | 373 | def report_retry(self, err, count, retries): |
3bc2ddcc | 374 | """Report retry in case of HTTP error 5xx""" |
617e58d8 | 375 | self.to_screen( |
5ef7d9bd | 376 | '[download] Got server HTTP error: %s. Retrying (attempt %d of %s) ...' |
a3c3a1e1 | 377 | % (error_to_compat_str(err), count, self.format_retries(retries))) |
3bc2ddcc | 378 | |
b868936c | 379 | def report_file_already_downloaded(self, *args, **kwargs): |
3bc2ddcc | 380 | """Report file has already been fully downloaded.""" |
b868936c | 381 | return self.ydl.report_file_already_downloaded(*args, **kwargs) |
3bc2ddcc JMF |
382 | |
383 | def report_unable_to_resume(self): | |
384 | """Report it was impossible to resume download.""" | |
b6b70730 | 385 | self.to_screen('[download] Unable to resume') |
3bc2ddcc | 386 | |
0a473f2f | 387 | @staticmethod |
388 | def supports_manifest(manifest): | |
389 | """ Whether the downloader can download the fragments from the manifest. | |
390 | Redefine in subclasses if needed. """ | |
391 | pass | |
392 | ||
9f448fcb | 393 | def download(self, filename, info_dict, subtitle=False): |
3bc2ddcc JMF |
394 | """Download to a filename using the info from info_dict |
395 | Return True on success and False otherwise | |
396 | """ | |
5f0d813d | 397 | |
4340deca | 398 | nooverwrites_and_exists = ( |
9cc1a313 | 399 | not self.params.get('overwrites', True) |
3089bc74 | 400 | and os.path.exists(encodeFilename(filename)) |
4340deca P |
401 | ) |
402 | ||
75a24854 RA |
403 | if not hasattr(filename, 'write'): |
404 | continuedl_and_exists = ( | |
3089bc74 S |
405 | self.params.get('continuedl', True) |
406 | and os.path.isfile(encodeFilename(filename)) | |
407 | and not self.params.get('nopart', False) | |
75a24854 RA |
408 | ) |
409 | ||
410 | # Check file already present | |
411 | if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists): | |
412 | self.report_file_already_downloaded(filename) | |
413 | self._hook_progress({ | |
414 | 'filename': filename, | |
415 | 'status': 'finished', | |
416 | 'total_bytes': os.path.getsize(encodeFilename(filename)), | |
3ba7740d | 417 | }, info_dict) |
b69fd25c | 418 | self._finish_multiline_status() |
a9e7f546 | 419 | return True, False |
dabc1273 | 420 | |
9f448fcb U |
421 | if subtitle is False: |
422 | min_sleep_interval = self.params.get('sleep_interval') | |
423 | if min_sleep_interval: | |
424 | max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval) | |
425 | sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval) | |
426 | self.to_screen( | |
5ef7d9bd | 427 | '[download] Sleeping %s seconds ...' % ( |
9f448fcb U |
428 | int(sleep_interval) if sleep_interval.is_integer() |
429 | else '%.2f' % sleep_interval)) | |
430 | time.sleep(sleep_interval) | |
431 | else: | |
b860e4cc NS |
432 | sleep_interval_sub = 0 |
433 | if type(self.params.get('sleep_interval_subtitles')) is int: | |
31108ce9 | 434 | sleep_interval_sub = self.params.get('sleep_interval_subtitles') |
b860e4cc | 435 | if sleep_interval_sub > 0: |
31108ce9 | 436 | self.to_screen( |
5ef7d9bd | 437 | '[download] Sleeping %s seconds ...' % ( |
31108ce9 U |
438 | sleep_interval_sub)) |
439 | time.sleep(sleep_interval_sub) | |
819e0531 | 440 | ret = self.real_download(filename, info_dict) |
441 | self._finish_multiline_status() | |
442 | return ret, True | |
3bc2ddcc JMF |
443 | |
444 | def real_download(self, filename, info_dict): | |
445 | """Real download process. Redefine in subclasses.""" | |
b6b70730 | 446 | raise NotImplementedError('This method must be implemented by subclasses') |
3bc2ddcc | 447 | |
3ba7740d | 448 | def _hook_progress(self, status, info_dict): |
449 | if not self._progress_hooks: | |
450 | return | |
03b4de72 | 451 | status['info_dict'] = info_dict |
f45e6c11 | 452 | # youtube-dl passes the same status object to all the hooks. |
453 | # Some third party scripts seems to be relying on this. | |
454 | # So keep this behavior if possible | |
3bc2ddcc | 455 | for ph in self._progress_hooks: |
f45e6c11 | 456 | ph(status) |
3bc2ddcc JMF |
457 | |
458 | def add_progress_hook(self, ph): | |
71b640cc PH |
459 | # See YoutubeDl.py (search for progress_hooks) for a description of |
460 | # this interface | |
3bc2ddcc | 461 | self._progress_hooks.append(ph) |
222516d9 | 462 | |
cd8a07a7 | 463 | def _debug_cmd(self, args, exe=None): |
222516d9 PH |
464 | if not self.params.get('verbose', False): |
465 | return | |
466 | ||
cd8a07a7 S |
467 | str_args = [decodeArgument(a) for a in args] |
468 | ||
222516d9 | 469 | if exe is None: |
cd8a07a7 | 470 | exe = os.path.basename(str_args[0]) |
222516d9 | 471 | |
0760b0a7 | 472 | self.write_debug('%s command line: %s' % (exe, shell_quote(str_args))) |