]> jfr.im git - yt-dlp.git/blame - youtube_dl/FileDownloader.py
Merge pull request #1997 from rg3/simplify-url_basename
[yt-dlp.git] / youtube_dl / FileDownloader.py
CommitLineData
d77c3dfd
FV
1import os
2import re
d77c3dfd
FV
3import subprocess
4import sys
5import time
d77c3dfd 6
76e67c2c
PH
7from .utils import (
8 compat_urllib_error,
9 compat_urllib_request,
10 ContentTooShortError,
11 determine_ext,
12 encodeFilename,
02dbf93f 13 format_bytes,
76e67c2c
PH
14 sanitize_open,
15 timeconvert,
16)
d77c3dfd
FV
17
18
19class FileDownloader(object):
59ae15a5
PH
20 """File Downloader class.
21
22 File downloader objects are the ones responsible of downloading the
8222d8de 23 actual video file and writing it to disk.
59ae15a5
PH
24
25 File downloaders accept a lot of parameters. In order not to saturate
26 the object constructor with arguments, it receives a dictionary of
8222d8de 27 options instead.
59ae15a5
PH
28
29 Available options:
30
8222d8de 31 verbose: Print additional info to stdout.
59ae15a5 32 quiet: Do not print messages to stdout.
59ae15a5 33 ratelimit: Download speed limit, in bytes/sec.
59ae15a5
PH
34 retries: Number of times to retry for HTTP error 5xx
35 buffersize: Size of download buffer in bytes.
36 noresizebuffer: Do not automatically resize the download buffer.
37 continuedl: Try to continue downloads if possible.
38 noprogress: Do not print the progress bar.
59ae15a5
PH
39 logtostderr: Log messages to stderr instead of stdout.
40 consoletitle: Display progress in console window's titlebar.
41 nopart: Do not use temporary .part files.
42 updatetime: Use the Last-modified header to set output file timestamps.
37c8fd48 43 test: Download only first bytes to test the downloader.
9e982f9e
JC
44 min_filesize: Skip files smaller than this size
45 max_filesize: Skip files larger than this size
59ae15a5
PH
46 """
47
48 params = None
59ae15a5 49
8222d8de 50 def __init__(self, ydl, params):
59ae15a5 51 """Create a FileDownloader object with the given options."""
8222d8de 52 self.ydl = ydl
bffbd5f0 53 self._progress_hooks = []
59ae15a5
PH
54 self.params = params
55
af8bd6a8
JMF
56 @staticmethod
57 def format_seconds(seconds):
58 (mins, secs) = divmod(seconds, 60)
061b2889 59 (hours, mins) = divmod(mins, 60)
af8bd6a8
JMF
60 if hours > 99:
61 return '--:--:--'
62 if hours == 0:
63 return '%02d:%02d' % (mins, secs)
64 else:
65 return '%02d:%02d:%02d' % (hours, mins, secs)
66
59ae15a5
PH
67 @staticmethod
68 def calc_percent(byte_counter, data_len):
69 if data_len is None:
4ae72004
JMF
70 return None
71 return float(byte_counter) / float(data_len) * 100.0
72
73 @staticmethod
74 def format_percent(percent):
75 if percent is None:
59ae15a5 76 return '---.-%'
4ae72004 77 return '%6s' % ('%3.1f%%' % percent)
59ae15a5
PH
78
79 @staticmethod
80 def calc_eta(start, now, total, current):
81 if total is None:
4ae72004 82 return None
59ae15a5
PH
83 dif = now - start
84 if current == 0 or dif < 0.001: # One millisecond
4ae72004 85 return None
59ae15a5 86 rate = float(current) / dif
4ae72004
JMF
87 return int((float(total) - float(current)) / rate)
88
89 @staticmethod
90 def format_eta(eta):
91 if eta is None:
92 return '--:--'
af8bd6a8 93 return FileDownloader.format_seconds(eta)
59ae15a5
PH
94
95 @staticmethod
96 def calc_speed(start, now, bytes):
97 dif = now - start
98 if bytes == 0 or dif < 0.001: # One millisecond
4ae72004
JMF
99 return None
100 return float(bytes) / dif
101
102 @staticmethod
103 def format_speed(speed):
104 if speed is None:
59ae15a5 105 return '%10s' % '---b/s'
02dbf93f 106 return '%10s' % ('%s/s' % format_bytes(speed))
59ae15a5
PH
107
108 @staticmethod
109 def best_block_size(elapsed_time, bytes):
110 new_min = max(bytes / 2.0, 1.0)
111 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
112 if elapsed_time < 0.001:
113 return int(new_max)
114 rate = bytes / elapsed_time
115 if rate > new_max:
116 return int(new_max)
117 if rate < new_min:
118 return int(new_min)
119 return int(rate)
120
121 @staticmethod
122 def parse_bytes(bytestr):
123 """Parse a string indicating a byte quantity into an integer."""
124 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
125 if matchobj is None:
126 return None
127 number = float(matchobj.group(1))
128 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
129 return int(round(number * multiplier))
130
8222d8de
JMF
131 def to_screen(self, *args, **kargs):
132 self.ydl.to_screen(*args, **kargs)
59ae15a5
PH
133
134 def to_stderr(self, message):
8222d8de 135 self.ydl.to_screen(message)
59ae15a5 136
1e5b9a95
PH
137 def to_console_title(self, message):
138 self.ydl.to_console_title(message)
59ae15a5 139
8222d8de
JMF
140 def trouble(self, *args, **kargs):
141 self.ydl.trouble(*args, **kargs)
142
143 def report_warning(self, *args, **kargs):
144 self.ydl.report_warning(*args, **kargs)
145
146 def report_error(self, *args, **kargs):
2e325280 147 self.ydl.report_error(*args, **kargs)
4e1582f3 148
59ae15a5
PH
149 def slow_down(self, start_time, byte_counter):
150 """Sleep if the download speed is over the rate limit."""
151 rate_limit = self.params.get('ratelimit', None)
152 if rate_limit is None or byte_counter == 0:
153 return
154 now = time.time()
155 elapsed = now - start_time
156 if elapsed <= 0.0:
157 return
158 speed = float(byte_counter) / elapsed
159 if speed > rate_limit:
160 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
161
162 def temp_name(self, filename):
163 """Returns a temporary filename for the given filename."""
164 if self.params.get('nopart', False) or filename == u'-' or \
165 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
166 return filename
167 return filename + u'.part'
168
169 def undo_temp_name(self, filename):
170 if filename.endswith(u'.part'):
171 return filename[:-len(u'.part')]
172 return filename
173
174 def try_rename(self, old_filename, new_filename):
175 try:
176 if old_filename == new_filename:
177 return
178 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
76e67c2c 179 except (IOError, OSError):
6622d22c 180 self.report_error(u'unable to rename file')
59ae15a5
PH
181
182 def try_utime(self, filename, last_modified_hdr):
183 """Try to set the last-modified time of the given file."""
184 if last_modified_hdr is None:
185 return
186 if not os.path.isfile(encodeFilename(filename)):
187 return
188 timestr = last_modified_hdr
189 if timestr is None:
190 return
191 filetime = timeconvert(timestr)
192 if filetime is None:
193 return filetime
bb474376
PH
194 # Ignore obviously invalid dates
195 if filetime == 0:
196 return
59ae15a5
PH
197 try:
198 os.utime(filename, (time.time(), filetime))
199 except:
200 pass
201 return filetime
202
59ae15a5
PH
203 def report_destination(self, filename):
204 """Report destination filename."""
205 self.to_screen(u'[download] Destination: ' + filename)
206
a213880a 207 def _report_progress_status(self, msg, is_last_line=False):
4c521606 208 fullmsg = u'[download] ' + msg
a213880a 209 if self.params.get('progress_with_newline', False):
4c521606 210 self.to_screen(fullmsg)
a213880a 211 else:
4c521606
PH
212 if os.name == 'nt':
213 prev_len = getattr(self, '_report_progress_prev_line_length',
214 0)
215 if prev_len > len(fullmsg):
216 fullmsg += u' ' * (prev_len - len(fullmsg))
217 self._report_progress_prev_line_length = len(fullmsg)
218 clear_line = u'\r'
219 else:
220 clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
221 self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
a213880a
PH
222 self.to_console_title(u'youtube-dl ' + msg)
223
4ae72004 224 def report_progress(self, percent, data_len_str, speed, eta):
59ae15a5
PH
225 """Report download progress."""
226 if self.params.get('noprogress', False):
227 return
4ac5306a
JMF
228 if eta is not None:
229 eta_str = self.format_eta(eta)
230 else:
231 eta_str = 'Unknown ETA'
232 if percent is not None:
233 percent_str = self.format_percent(percent)
234 else:
235 percent_str = 'Unknown %'
4ae72004 236 speed_str = self.format_speed(speed)
a213880a
PH
237
238 msg = (u'%s of %s at %s ETA %s' %
239 (percent_str, data_len_str, speed_str, eta_str))
240 self._report_progress_status(msg)
241
55f6597c 242 def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
243 if self.params.get('noprogress', False):
244 return
55f6597c 245 downloaded_str = format_bytes(downloaded_data_len)
246 speed_str = self.format_speed(speed)
247 elapsed_str = FileDownloader.format_seconds(elapsed)
693b8b2d
PH
248 msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
249 self._report_progress_status(msg)
250
a213880a
PH
251 def report_finish(self, data_len_str, tot_time):
252 """Report download finished."""
253 if self.params.get('noprogress', False):
254 self.to_screen(u'[download] Download completed')
5717d91a 255 else:
a213880a
PH
256 self._report_progress_status(
257 (u'100%% of %s in %s' %
258 (data_len_str, self.format_seconds(tot_time))),
259 is_last_line=True)
59ae15a5
PH
260
261 def report_resuming_byte(self, resume_len):
262 """Report attempt to resume at given byte."""
263 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
264
265 def report_retry(self, count, retries):
266 """Report retry in case of HTTP error 5xx"""
267 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
268
269 def report_file_already_downloaded(self, file_name):
270 """Report file has already been fully downloaded."""
271 try:
272 self.to_screen(u'[download] %s has already been downloaded' % file_name)
76e67c2c 273 except UnicodeEncodeError:
59ae15a5
PH
274 self.to_screen(u'[download] The file has already been downloaded')
275
276 def report_unable_to_resume(self):
277 """Report it was impossible to resume download."""
278 self.to_screen(u'[download] Unable to resume')
279
55f6597c 280 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live, conn):
4894fe8c 281 def run_rtmpdump(args):
282 start = time.time()
283 resume_percent = None
284 resume_downloaded_data_len = None
285 proc = subprocess.Popen(args, stderr=subprocess.PIPE)
286 cursor_in_new_line = True
287 proc_stderr_closed = False
288 while not proc_stderr_closed:
289 # read line from stderr
290 line = u''
291 while True:
292 char = proc.stderr.read(1)
293 if not char:
294 proc_stderr_closed = True
295 break
296 if char in [b'\r', b'\n']:
297 break
298 line += char.decode('ascii', 'replace')
299 if not line:
300 # proc_stderr_closed is True
301 continue
302 mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line)
303 if mobj:
304 downloaded_data_len = int(float(mobj.group(1))*1024)
305 percent = float(mobj.group(2))
306 if not resume_percent:
307 resume_percent = percent
308 resume_downloaded_data_len = downloaded_data_len
309 eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent)
310 speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len)
311 data_len = None
312 if percent > 0:
313 data_len = int(downloaded_data_len * 100 / percent)
d0d2b49a 314 data_len_str = u'~' + format_bytes(data_len)
4894fe8c 315 self.report_progress(percent, data_len_str, speed, eta)
316 cursor_in_new_line = False
317 self._hook_progress({
318 'downloaded_bytes': downloaded_data_len,
319 'total_bytes': data_len,
320 'tmpfilename': tmpfilename,
321 'filename': filename,
322 'status': 'downloading',
323 'eta': eta,
324 'speed': speed,
325 })
55f6597c 326 else:
327 # no percent for live streams
328 mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec', line)
329 if mobj:
330 downloaded_data_len = int(float(mobj.group(1))*1024)
331 time_now = time.time()
332 speed = self.calc_speed(start, time_now, downloaded_data_len)
333 self.report_progress_live_stream(downloaded_data_len, speed, time_now - start)
334 cursor_in_new_line = False
335 self._hook_progress({
336 'downloaded_bytes': downloaded_data_len,
337 'tmpfilename': tmpfilename,
338 'filename': filename,
339 'status': 'downloading',
340 'speed': speed,
341 })
342 elif self.params.get('verbose', False):
343 if not cursor_in_new_line:
344 self.to_screen(u'')
345 cursor_in_new_line = True
346 self.to_screen(u'[rtmpdump] '+line)
4894fe8c 347 proc.wait()
348 if not cursor_in_new_line:
349 self.to_screen(u'')
350 return proc.returncode
351
59ae15a5
PH
352 self.report_destination(filename)
353 tmpfilename = self.temp_name(filename)
9026dd38 354 test = self.params.get('test', False)
59ae15a5
PH
355
356 # Check for rtmpdump first
357 try:
967897fd 358 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
59ae15a5 359 except (OSError, IOError):
6622d22c 360 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
59ae15a5
PH
361 return False
362
363 # Download using rtmpdump. rtmpdump returns exit code 2 when
364 # the connection was interrumpted and resuming appears to be
365 # possible. This is part of rtmpdump's normal usage, AFAIK.
4894fe8c 366 basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename]
f5ebb614 367 if player_url is not None:
8cd252f1 368 basic_args += ['--swfVfy', player_url]
f5ebb614
PH
369 if page_url is not None:
370 basic_args += ['--pageUrl', page_url]
adb029ed 371 if play_path is not None:
8cd252f1 372 basic_args += ['--playpath', play_path]
de5d66d4 373 if tc_url is not None:
374 basic_args += ['--tcUrl', url]
9026dd38 375 if test:
ad7a071a 376 basic_args += ['--stop', '1']
31366066 377 if live:
378 basic_args += ['--live']
55f6597c 379 if conn:
380 basic_args += ['--conn', conn]
8cd252f1 381 args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
d9b011f2
PH
382
383 if sys.platform == 'win32' and sys.version_info < (3, 0):
384 # Windows subprocess module does not actually support Unicode
385 # on Python 2.x
386 # See http://stackoverflow.com/a/9951851/35070
387 subprocess_encoding = sys.getfilesystemencoding()
388 args = [a.encode(subprocess_encoding, 'ignore') for a in args]
389 else:
390 subprocess_encoding = None
391
59ae15a5 392 if self.params.get('verbose', False):
d9b011f2
PH
393 if subprocess_encoding:
394 str_args = [
395 a.decode(subprocess_encoding) if isinstance(a, bytes) else a
396 for a in args]
397 else:
398 str_args = args
59ae15a5
PH
399 try:
400 import pipes
d9b011f2 401 shell_quote = lambda args: ' '.join(map(pipes.quote, str_args))
59ae15a5
PH
402 except ImportError:
403 shell_quote = repr
d9b011f2 404 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args))
4894fe8c 405
406 retval = run_rtmpdump(args)
407
9026dd38 408 while (retval == 2 or retval == 1) and not test:
59ae15a5 409 prevsize = os.path.getsize(encodeFilename(tmpfilename))
4894fe8c 410 self.to_screen(u'[rtmpdump] %s bytes' % prevsize)
59ae15a5 411 time.sleep(5.0) # This seems to be needed
4894fe8c 412 retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
59ae15a5
PH
413 cursize = os.path.getsize(encodeFilename(tmpfilename))
414 if prevsize == cursize and retval == 1:
415 break
416 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
417 if prevsize == cursize and retval == 2 and cursize > 1024:
4894fe8c 418 self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
59ae15a5
PH
419 retval = 0
420 break
9026dd38 421 if retval == 0 or (test and retval == 2):
bffbd5f0 422 fsize = os.path.getsize(encodeFilename(tmpfilename))
4894fe8c 423 self.to_screen(u'[rtmpdump] %s bytes' % fsize)
59ae15a5 424 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
425 self._hook_progress({
426 'downloaded_bytes': fsize,
427 'total_bytes': fsize,
428 'filename': filename,
429 'status': 'finished',
430 })
59ae15a5
PH
431 return True
432 else:
6622d22c
JMF
433 self.to_stderr(u"\n")
434 self.report_error(u'rtmpdump exited with code %d' % retval)
59ae15a5
PH
435 return False
436
f2cd958c 437 def _download_with_mplayer(self, filename, url):
438 self.report_destination(filename)
439 tmpfilename = self.temp_name(filename)
440
f2cd958c 441 args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
442 # Check for mplayer first
443 try:
3054ff0c 444 subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
f2cd958c 445 except (OSError, IOError):
446 self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
447 return False
448
449 # Download using mplayer.
450 retval = subprocess.call(args)
451 if retval == 0:
452 fsize = os.path.getsize(encodeFilename(tmpfilename))
453 self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
454 self.try_rename(tmpfilename, filename)
455 self._hook_progress({
456 'downloaded_bytes': fsize,
457 'total_bytes': fsize,
458 'filename': filename,
459 'status': 'finished',
460 })
461 return True
462 else:
463 self.to_stderr(u"\n")
3054ff0c 464 self.report_error(u'mplayer exited with code %d' % retval)
f2cd958c 465 return False
466
b15d4f62
JMF
467 def _download_m3u8_with_ffmpeg(self, filename, url):
468 self.report_destination(filename)
469 tmpfilename = self.temp_name(filename)
470
801dbbdf
JMF
471 args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy',
472 '-bsf:a', 'aac_adtstoasc', tmpfilename]
b15d4f62 473
801dbbdf
JMF
474 for program in ['avconv', 'ffmpeg']:
475 try:
476 subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
477 break
478 except (OSError, IOError):
479 pass
480 else:
481 self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found')
482 cmd = [program] + args
483
484 retval = subprocess.call(cmd)
b15d4f62
JMF
485 if retval == 0:
486 fsize = os.path.getsize(encodeFilename(tmpfilename))
487 self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
488 self.try_rename(tmpfilename, filename)
489 self._hook_progress({
490 'downloaded_bytes': fsize,
491 'total_bytes': fsize,
492 'filename': filename,
493 'status': 'finished',
494 })
495 return True
496 else:
497 self.to_stderr(u"\n")
498 self.report_error(u'ffmpeg exited with code %d' % retval)
499 return False
500
f2cd958c 501
59ae15a5
PH
502 def _do_download(self, filename, info_dict):
503 url = info_dict['url']
59ae15a5
PH
504
505 # Check file already present
506 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
507 self.report_file_already_downloaded(filename)
bffbd5f0
PH
508 self._hook_progress({
509 'filename': filename,
510 'status': 'finished',
dd5d2eb0 511 'total_bytes': os.path.getsize(encodeFilename(filename)),
bffbd5f0 512 })
59ae15a5
PH
513 return True
514
515 # Attempt to download using rtmpdump
516 if url.startswith('rtmp'):
f5ebb614
PH
517 return self._download_with_rtmpdump(filename, url,
518 info_dict.get('player_url', None),
adb029ed 519 info_dict.get('page_url', None),
de5d66d4 520 info_dict.get('play_path', None),
31366066 521 info_dict.get('tc_url', None),
55f6597c 522 info_dict.get('rtmp_live', False),
523 info_dict.get('rtmp_conn', None))
59ae15a5 524
f2cd958c 525 # Attempt to download using mplayer
526 if url.startswith('mms') or url.startswith('rtsp'):
527 return self._download_with_mplayer(filename, url)
528
b15d4f62
JMF
529 # m3u8 manifest are downloaded with ffmpeg
530 if determine_ext(url) == u'm3u8':
531 return self._download_m3u8_with_ffmpeg(filename, url)
532
59ae15a5
PH
533 tmpfilename = self.temp_name(filename)
534 stream = None
535
536 # Do not include the Accept-Encoding header
537 headers = {'Youtubedl-no-compression': 'True'}
3446dfb7
PH
538 if 'user_agent' in info_dict:
539 headers['Youtubedl-user-agent'] = info_dict['user_agent']
59ae15a5
PH
540 basic_request = compat_urllib_request.Request(url, None, headers)
541 request = compat_urllib_request.Request(url, None, headers)
542
37c8fd48
FV
543 if self.params.get('test', False):
544 request.add_header('Range','bytes=0-10240')
545
59ae15a5
PH
546 # Establish possible resume length
547 if os.path.isfile(encodeFilename(tmpfilename)):
548 resume_len = os.path.getsize(encodeFilename(tmpfilename))
549 else:
550 resume_len = 0
551
552 open_mode = 'wb'
553 if resume_len != 0:
554 if self.params.get('continuedl', False):
555 self.report_resuming_byte(resume_len)
556 request.add_header('Range','bytes=%d-' % resume_len)
557 open_mode = 'ab'
558 else:
559 resume_len = 0
560
561 count = 0
562 retries = self.params.get('retries', 0)
563 while count <= retries:
564 # Establish connection
565 try:
566 if count == 0 and 'urlhandle' in info_dict:
567 data = info_dict['urlhandle']
568 data = compat_urllib_request.urlopen(request)
569 break
570 except (compat_urllib_error.HTTPError, ) as err:
571 if (err.code < 500 or err.code >= 600) and err.code != 416:
572 # Unexpected HTTP error
573 raise
574 elif err.code == 416:
575 # Unable to resume (requested range not satisfiable)
576 try:
577 # Open the connection again without the range header
578 data = compat_urllib_request.urlopen(basic_request)
579 content_length = data.info()['Content-Length']
580 except (compat_urllib_error.HTTPError, ) as err:
581 if err.code < 500 or err.code >= 600:
582 raise
583 else:
584 # Examine the reported length
585 if (content_length is not None and
586 (resume_len - 100 < int(content_length) < resume_len + 100)):
587 # The file had already been fully downloaded.
588 # Explanation to the above condition: in issue #175 it was revealed that
589 # YouTube sometimes adds or removes a few bytes from the end of the file,
590 # changing the file size slightly and causing problems for some users. So
591 # I decided to implement a suggested change and consider the file
592 # completely downloaded if the file size differs less than 100 bytes from
593 # the one in the hard drive.
594 self.report_file_already_downloaded(filename)
595 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
596 self._hook_progress({
597 'filename': filename,
598 'status': 'finished',
599 })
59ae15a5
PH
600 return True
601 else:
602 # The length does not match, we start the download over
603 self.report_unable_to_resume()
604 open_mode = 'wb'
605 break
606 # Retry
607 count += 1
608 if count <= retries:
609 self.report_retry(count, retries)
610
611 if count > retries:
6622d22c 612 self.report_error(u'giving up after %s retries' % retries)
59ae15a5
PH
613 return False
614
615 data_len = data.info().get('Content-length', None)
616 if data_len is not None:
617 data_len = int(data_len) + resume_len
9e982f9e
JC
618 min_data_len = self.params.get("min_filesize", None)
619 max_data_len = self.params.get("max_filesize", None)
620 if min_data_len is not None and data_len < min_data_len:
621 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
622 return False
623 if max_data_len is not None and data_len > max_data_len:
624 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
625 return False
626
02dbf93f 627 data_len_str = format_bytes(data_len)
59ae15a5
PH
628 byte_counter = 0 + resume_len
629 block_size = self.params.get('buffersize', 1024)
630 start = time.time()
631 while True:
632 # Download and write
633 before = time.time()
634 data_block = data.read(block_size)
635 after = time.time()
636 if len(data_block) == 0:
637 break
638 byte_counter += len(data_block)
639
640 # Open file just in time
641 if stream is None:
642 try:
643 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
644 assert stream is not None
645 filename = self.undo_temp_name(tmpfilename)
646 self.report_destination(filename)
647 except (OSError, IOError) as err:
6622d22c 648 self.report_error(u'unable to open for writing: %s' % str(err))
59ae15a5
PH
649 return False
650 try:
651 stream.write(data_block)
652 except (IOError, OSError) as err:
6622d22c
JMF
653 self.to_stderr(u"\n")
654 self.report_error(u'unable to write data: %s' % str(err))
59ae15a5
PH
655 return False
656 if not self.params.get('noresizebuffer', False):
657 block_size = self.best_block_size(after - before, len(data_block))
658
659 # Progress message
4ae72004 660 speed = self.calc_speed(start, time.time(), byte_counter - resume_len)
59ae15a5 661 if data_len is None:
4ac5306a 662 eta = percent = None
59ae15a5 663 else:
4ae72004
JMF
664 percent = self.calc_percent(byte_counter, data_len)
665 eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
4ac5306a 666 self.report_progress(percent, data_len_str, speed, eta)
59ae15a5 667
bffbd5f0
PH
668 self._hook_progress({
669 'downloaded_bytes': byte_counter,
670 'total_bytes': data_len,
671 'tmpfilename': tmpfilename,
672 'filename': filename,
673 'status': 'downloading',
4ae72004
JMF
674 'eta': eta,
675 'speed': speed,
bffbd5f0
PH
676 })
677
59ae15a5
PH
678 # Apply rate limit
679 self.slow_down(start, byte_counter - resume_len)
680
681 if stream is None:
6622d22c
JMF
682 self.to_stderr(u"\n")
683 self.report_error(u'Did not get any data blocks')
59ae15a5
PH
684 return False
685 stream.close()
968b5e01 686 self.report_finish(data_len_str, (time.time() - start))
59ae15a5
PH
687 if data_len is not None and byte_counter != data_len:
688 raise ContentTooShortError(byte_counter, int(data_len))
689 self.try_rename(tmpfilename, filename)
690
691 # Update file modification time
692 if self.params.get('updatetime', True):
693 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
694
bffbd5f0
PH
695 self._hook_progress({
696 'downloaded_bytes': byte_counter,
697 'total_bytes': byte_counter,
698 'filename': filename,
699 'status': 'finished',
700 })
701
59ae15a5 702 return True
bffbd5f0
PH
703
704 def _hook_progress(self, status):
705 for ph in self._progress_hooks:
706 ph(status)
707
708 def add_progress_hook(self, ph):
709 """ ph gets called on download progress, with a dictionary with the entries
710 * filename: The final filename
711 * status: One of "downloading" and "finished"
712
713 It can also have some of the following entries:
714
715 * downloaded_bytes: Bytes on disks
716 * total_bytes: Total bytes, None if unknown
717 * tmpfilename: The filename we're currently writing to
4ae72004
JMF
718 * eta: The estimated time in seconds, None if unknown
719 * speed: The download speed in bytes/second, None if unknown
bffbd5f0
PH
720
721 Hooks are guaranteed to be called at least once (with status "finished")
722 if the download is successful.
723 """
724 self._progress_hooks.append(ph)