]> jfr.im git - yt-dlp.git/blame - youtube_dl/FileDownloader.py
release 2013.07.17.1
[yt-dlp.git] / youtube_dl / FileDownloader.py
CommitLineData
d77c3dfd
FV
1import math
2import os
3import re
d77c3dfd
FV
4import subprocess
5import sys
6import time
59ce2019 7import traceback
d77c3dfd
FV
8
9if os.name == 'nt':
59ae15a5 10 import ctypes
3eec021a 11
9e8056d5 12from .utils import *
d77c3dfd
FV
13
14
15class FileDownloader(object):
59ae15a5
PH
16 """File Downloader class.
17
18 File downloader objects are the ones responsible of downloading the
8222d8de 19 actual video file and writing it to disk.
59ae15a5
PH
20
21 File downloaders accept a lot of parameters. In order not to saturate
22 the object constructor with arguments, it receives a dictionary of
8222d8de 23 options instead.
59ae15a5
PH
24
25 Available options:
26
8222d8de 27 verbose: Print additional info to stdout.
59ae15a5 28 quiet: Do not print messages to stdout.
59ae15a5 29 ratelimit: Download speed limit, in bytes/sec.
59ae15a5
PH
30 retries: Number of times to retry for HTTP error 5xx
31 buffersize: Size of download buffer in bytes.
32 noresizebuffer: Do not automatically resize the download buffer.
33 continuedl: Try to continue downloads if possible.
34 noprogress: Do not print the progress bar.
59ae15a5
PH
35 logtostderr: Log messages to stderr instead of stdout.
36 consoletitle: Display progress in console window's titlebar.
37 nopart: Do not use temporary .part files.
38 updatetime: Use the Last-modified header to set output file timestamps.
37c8fd48 39 test: Download only first bytes to test the downloader.
9e982f9e
JC
40 min_filesize: Skip files smaller than this size
41 max_filesize: Skip files larger than this size
59ae15a5
PH
42 """
43
44 params = None
59ae15a5 45
8222d8de 46 def __init__(self, ydl, params):
59ae15a5 47 """Create a FileDownloader object with the given options."""
8222d8de 48 self.ydl = ydl
bffbd5f0 49 self._progress_hooks = []
59ae15a5
PH
50 self.params = params
51
59ae15a5
PH
52 @staticmethod
53 def format_bytes(bytes):
54 if bytes is None:
55 return 'N/A'
56 if type(bytes) is str:
57 bytes = float(bytes)
58 if bytes == 0.0:
59 exponent = 0
60 else:
61 exponent = int(math.log(bytes, 1024.0))
b0936ef4 62 suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent]
59ae15a5
PH
63 converted = float(bytes) / float(1024 ** exponent)
64 return '%.2f%s' % (converted, suffix)
65
66 @staticmethod
67 def calc_percent(byte_counter, data_len):
68 if data_len is None:
69 return '---.-%'
70 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
71
72 @staticmethod
73 def calc_eta(start, now, total, current):
74 if total is None:
75 return '--:--'
76 dif = now - start
77 if current == 0 or dif < 0.001: # One millisecond
78 return '--:--'
79 rate = float(current) / dif
80 eta = int((float(total) - float(current)) / rate)
81 (eta_mins, eta_secs) = divmod(eta, 60)
82 if eta_mins > 99:
83 return '--:--'
84 return '%02d:%02d' % (eta_mins, eta_secs)
85
86 @staticmethod
87 def calc_speed(start, now, bytes):
88 dif = now - start
89 if bytes == 0 or dif < 0.001: # One millisecond
90 return '%10s' % '---b/s'
91 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
92
93 @staticmethod
94 def best_block_size(elapsed_time, bytes):
95 new_min = max(bytes / 2.0, 1.0)
96 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
97 if elapsed_time < 0.001:
98 return int(new_max)
99 rate = bytes / elapsed_time
100 if rate > new_max:
101 return int(new_max)
102 if rate < new_min:
103 return int(new_min)
104 return int(rate)
105
106 @staticmethod
107 def parse_bytes(bytestr):
108 """Parse a string indicating a byte quantity into an integer."""
109 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
110 if matchobj is None:
111 return None
112 number = float(matchobj.group(1))
113 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
114 return int(round(number * multiplier))
115
8222d8de
JMF
116 def to_screen(self, *args, **kargs):
117 self.ydl.to_screen(*args, **kargs)
59ae15a5
PH
118
119 def to_stderr(self, message):
8222d8de 120 self.ydl.to_screen(message)
59ae15a5
PH
121
122 def to_cons_title(self, message):
123 """Set console/terminal window title to message."""
124 if not self.params.get('consoletitle', False):
125 return
126 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
127 # c_wchar_p() might not be necessary if `message` is
128 # already of type unicode()
129 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
130 elif 'TERM' in os.environ:
906417c7 131 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
59ae15a5 132
8222d8de
JMF
133 def trouble(self, *args, **kargs):
134 self.ydl.trouble(*args, **kargs)
135
136 def report_warning(self, *args, **kargs):
137 self.ydl.report_warning(*args, **kargs)
138
139 def report_error(self, *args, **kargs):
2e325280 140 self.ydl.report_error(*args, **kargs)
4e1582f3 141
59ae15a5
PH
142 def slow_down(self, start_time, byte_counter):
143 """Sleep if the download speed is over the rate limit."""
144 rate_limit = self.params.get('ratelimit', None)
145 if rate_limit is None or byte_counter == 0:
146 return
147 now = time.time()
148 elapsed = now - start_time
149 if elapsed <= 0.0:
150 return
151 speed = float(byte_counter) / elapsed
152 if speed > rate_limit:
153 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
154
155 def temp_name(self, filename):
156 """Returns a temporary filename for the given filename."""
157 if self.params.get('nopart', False) or filename == u'-' or \
158 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
159 return filename
160 return filename + u'.part'
161
162 def undo_temp_name(self, filename):
163 if filename.endswith(u'.part'):
164 return filename[:-len(u'.part')]
165 return filename
166
167 def try_rename(self, old_filename, new_filename):
168 try:
169 if old_filename == new_filename:
170 return
171 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
172 except (IOError, OSError) as err:
6622d22c 173 self.report_error(u'unable to rename file')
59ae15a5
PH
174
175 def try_utime(self, filename, last_modified_hdr):
176 """Try to set the last-modified time of the given file."""
177 if last_modified_hdr is None:
178 return
179 if not os.path.isfile(encodeFilename(filename)):
180 return
181 timestr = last_modified_hdr
182 if timestr is None:
183 return
184 filetime = timeconvert(timestr)
185 if filetime is None:
186 return filetime
bb474376
PH
187 # Ignore obviously invalid dates
188 if filetime == 0:
189 return
59ae15a5
PH
190 try:
191 os.utime(filename, (time.time(), filetime))
192 except:
193 pass
194 return filetime
195
59ae15a5
PH
196 def report_destination(self, filename):
197 """Report destination filename."""
198 self.to_screen(u'[download] Destination: ' + filename)
199
200 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
201 """Report download progress."""
202 if self.params.get('noprogress', False):
203 return
4ae9e558 204 clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'')
5717d91a 205 if self.params.get('progress_with_newline', False):
1528d664 206 self.to_screen(u'[download] %s of %s at %s ETA %s' %
7311fef8 207 (percent_str, data_len_str, speed_str, eta_str))
5717d91a 208 else:
4ae9e558
PH
209 self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' %
210 (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
59ae15a5
PH
211 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
212 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
213
214 def report_resuming_byte(self, resume_len):
215 """Report attempt to resume at given byte."""
216 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
217
218 def report_retry(self, count, retries):
219 """Report retry in case of HTTP error 5xx"""
220 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
221
222 def report_file_already_downloaded(self, file_name):
223 """Report file has already been fully downloaded."""
224 try:
225 self.to_screen(u'[download] %s has already been downloaded' % file_name)
226 except (UnicodeEncodeError) as err:
227 self.to_screen(u'[download] The file has already been downloaded')
228
229 def report_unable_to_resume(self):
230 """Report it was impossible to resume download."""
231 self.to_screen(u'[download] Unable to resume')
232
233 def report_finish(self):
234 """Report download finished."""
235 if self.params.get('noprogress', False):
236 self.to_screen(u'[download] Download completed')
237 else:
238 self.to_screen(u'')
239
de5d66d4 240 def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url):
59ae15a5
PH
241 self.report_destination(filename)
242 tmpfilename = self.temp_name(filename)
243
244 # Check for rtmpdump first
245 try:
967897fd 246 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
59ae15a5 247 except (OSError, IOError):
6622d22c 248 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
59ae15a5 249 return False
8cd252f1 250 verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet'
59ae15a5
PH
251
252 # Download using rtmpdump. rtmpdump returns exit code 2 when
253 # the connection was interrumpted and resuming appears to be
254 # possible. This is part of rtmpdump's normal usage, AFAIK.
8cd252f1 255 basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename]
f5ebb614 256 if player_url is not None:
8cd252f1 257 basic_args += ['--swfVfy', player_url]
f5ebb614
PH
258 if page_url is not None:
259 basic_args += ['--pageUrl', page_url]
adb029ed 260 if play_path is not None:
8cd252f1 261 basic_args += ['--playpath', play_path]
de5d66d4 262 if tc_url is not None:
263 basic_args += ['--tcUrl', url]
8cd252f1 264 args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)]
59ae15a5
PH
265 if self.params.get('verbose', False):
266 try:
267 import pipes
268 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
269 except ImportError:
270 shell_quote = repr
271 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
272 retval = subprocess.call(args)
273 while retval == 2 or retval == 1:
274 prevsize = os.path.getsize(encodeFilename(tmpfilename))
275 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
276 time.sleep(5.0) # This seems to be needed
277 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
278 cursize = os.path.getsize(encodeFilename(tmpfilename))
279 if prevsize == cursize and retval == 1:
280 break
281 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
282 if prevsize == cursize and retval == 2 and cursize > 1024:
283 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
284 retval = 0
285 break
286 if retval == 0:
bffbd5f0
PH
287 fsize = os.path.getsize(encodeFilename(tmpfilename))
288 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
59ae15a5 289 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
290 self._hook_progress({
291 'downloaded_bytes': fsize,
292 'total_bytes': fsize,
293 'filename': filename,
294 'status': 'finished',
295 })
59ae15a5
PH
296 return True
297 else:
6622d22c
JMF
298 self.to_stderr(u"\n")
299 self.report_error(u'rtmpdump exited with code %d' % retval)
59ae15a5
PH
300 return False
301
f2cd958c 302 def _download_with_mplayer(self, filename, url):
303 self.report_destination(filename)
304 tmpfilename = self.temp_name(filename)
305
f2cd958c 306 args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url]
307 # Check for mplayer first
308 try:
3054ff0c 309 subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
f2cd958c 310 except (OSError, IOError):
311 self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] )
312 return False
313
314 # Download using mplayer.
315 retval = subprocess.call(args)
316 if retval == 0:
317 fsize = os.path.getsize(encodeFilename(tmpfilename))
318 self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize))
319 self.try_rename(tmpfilename, filename)
320 self._hook_progress({
321 'downloaded_bytes': fsize,
322 'total_bytes': fsize,
323 'filename': filename,
324 'status': 'finished',
325 })
326 return True
327 else:
328 self.to_stderr(u"\n")
3054ff0c 329 self.report_error(u'mplayer exited with code %d' % retval)
f2cd958c 330 return False
331
332
59ae15a5
PH
333 def _do_download(self, filename, info_dict):
334 url = info_dict['url']
59ae15a5
PH
335
336 # Check file already present
337 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
338 self.report_file_already_downloaded(filename)
bffbd5f0
PH
339 self._hook_progress({
340 'filename': filename,
341 'status': 'finished',
342 })
59ae15a5
PH
343 return True
344
345 # Attempt to download using rtmpdump
346 if url.startswith('rtmp'):
f5ebb614
PH
347 return self._download_with_rtmpdump(filename, url,
348 info_dict.get('player_url', None),
adb029ed 349 info_dict.get('page_url', None),
de5d66d4 350 info_dict.get('play_path', None),
351 info_dict.get('tc_url', None))
59ae15a5 352
f2cd958c 353 # Attempt to download using mplayer
354 if url.startswith('mms') or url.startswith('rtsp'):
355 return self._download_with_mplayer(filename, url)
356
59ae15a5
PH
357 tmpfilename = self.temp_name(filename)
358 stream = None
359
360 # Do not include the Accept-Encoding header
361 headers = {'Youtubedl-no-compression': 'True'}
3446dfb7
PH
362 if 'user_agent' in info_dict:
363 headers['Youtubedl-user-agent'] = info_dict['user_agent']
59ae15a5
PH
364 basic_request = compat_urllib_request.Request(url, None, headers)
365 request = compat_urllib_request.Request(url, None, headers)
366
37c8fd48
FV
367 if self.params.get('test', False):
368 request.add_header('Range','bytes=0-10240')
369
59ae15a5
PH
370 # Establish possible resume length
371 if os.path.isfile(encodeFilename(tmpfilename)):
372 resume_len = os.path.getsize(encodeFilename(tmpfilename))
373 else:
374 resume_len = 0
375
376 open_mode = 'wb'
377 if resume_len != 0:
378 if self.params.get('continuedl', False):
379 self.report_resuming_byte(resume_len)
380 request.add_header('Range','bytes=%d-' % resume_len)
381 open_mode = 'ab'
382 else:
383 resume_len = 0
384
385 count = 0
386 retries = self.params.get('retries', 0)
387 while count <= retries:
388 # Establish connection
389 try:
390 if count == 0 and 'urlhandle' in info_dict:
391 data = info_dict['urlhandle']
392 data = compat_urllib_request.urlopen(request)
393 break
394 except (compat_urllib_error.HTTPError, ) as err:
395 if (err.code < 500 or err.code >= 600) and err.code != 416:
396 # Unexpected HTTP error
397 raise
398 elif err.code == 416:
399 # Unable to resume (requested range not satisfiable)
400 try:
401 # Open the connection again without the range header
402 data = compat_urllib_request.urlopen(basic_request)
403 content_length = data.info()['Content-Length']
404 except (compat_urllib_error.HTTPError, ) as err:
405 if err.code < 500 or err.code >= 600:
406 raise
407 else:
408 # Examine the reported length
409 if (content_length is not None and
410 (resume_len - 100 < int(content_length) < resume_len + 100)):
411 # The file had already been fully downloaded.
412 # Explanation to the above condition: in issue #175 it was revealed that
413 # YouTube sometimes adds or removes a few bytes from the end of the file,
414 # changing the file size slightly and causing problems for some users. So
415 # I decided to implement a suggested change and consider the file
416 # completely downloaded if the file size differs less than 100 bytes from
417 # the one in the hard drive.
418 self.report_file_already_downloaded(filename)
419 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
420 self._hook_progress({
421 'filename': filename,
422 'status': 'finished',
423 })
59ae15a5
PH
424 return True
425 else:
426 # The length does not match, we start the download over
427 self.report_unable_to_resume()
428 open_mode = 'wb'
429 break
430 # Retry
431 count += 1
432 if count <= retries:
433 self.report_retry(count, retries)
434
435 if count > retries:
6622d22c 436 self.report_error(u'giving up after %s retries' % retries)
59ae15a5
PH
437 return False
438
439 data_len = data.info().get('Content-length', None)
440 if data_len is not None:
441 data_len = int(data_len) + resume_len
9e982f9e
JC
442 min_data_len = self.params.get("min_filesize", None)
443 max_data_len = self.params.get("max_filesize", None)
444 if min_data_len is not None and data_len < min_data_len:
445 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
446 return False
447 if max_data_len is not None and data_len > max_data_len:
448 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
449 return False
450
59ae15a5
PH
451 data_len_str = self.format_bytes(data_len)
452 byte_counter = 0 + resume_len
453 block_size = self.params.get('buffersize', 1024)
454 start = time.time()
455 while True:
456 # Download and write
457 before = time.time()
458 data_block = data.read(block_size)
459 after = time.time()
460 if len(data_block) == 0:
461 break
462 byte_counter += len(data_block)
463
464 # Open file just in time
465 if stream is None:
466 try:
467 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
468 assert stream is not None
469 filename = self.undo_temp_name(tmpfilename)
470 self.report_destination(filename)
471 except (OSError, IOError) as err:
6622d22c 472 self.report_error(u'unable to open for writing: %s' % str(err))
59ae15a5
PH
473 return False
474 try:
475 stream.write(data_block)
476 except (IOError, OSError) as err:
6622d22c
JMF
477 self.to_stderr(u"\n")
478 self.report_error(u'unable to write data: %s' % str(err))
59ae15a5
PH
479 return False
480 if not self.params.get('noresizebuffer', False):
481 block_size = self.best_block_size(after - before, len(data_block))
482
483 # Progress message
484 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
485 if data_len is None:
486 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
487 else:
488 percent_str = self.calc_percent(byte_counter, data_len)
489 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
490 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
491
bffbd5f0
PH
492 self._hook_progress({
493 'downloaded_bytes': byte_counter,
494 'total_bytes': data_len,
495 'tmpfilename': tmpfilename,
496 'filename': filename,
497 'status': 'downloading',
498 })
499
59ae15a5
PH
500 # Apply rate limit
501 self.slow_down(start, byte_counter - resume_len)
502
503 if stream is None:
6622d22c
JMF
504 self.to_stderr(u"\n")
505 self.report_error(u'Did not get any data blocks')
59ae15a5
PH
506 return False
507 stream.close()
508 self.report_finish()
509 if data_len is not None and byte_counter != data_len:
510 raise ContentTooShortError(byte_counter, int(data_len))
511 self.try_rename(tmpfilename, filename)
512
513 # Update file modification time
514 if self.params.get('updatetime', True):
515 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
516
bffbd5f0
PH
517 self._hook_progress({
518 'downloaded_bytes': byte_counter,
519 'total_bytes': byte_counter,
520 'filename': filename,
521 'status': 'finished',
522 })
523
59ae15a5 524 return True
bffbd5f0
PH
525
526 def _hook_progress(self, status):
527 for ph in self._progress_hooks:
528 ph(status)
529
530 def add_progress_hook(self, ph):
531 """ ph gets called on download progress, with a dictionary with the entries
532 * filename: The final filename
533 * status: One of "downloading" and "finished"
534
535 It can also have some of the following entries:
536
537 * downloaded_bytes: Bytes on disks
538 * total_bytes: Total bytes, None if unknown
539 * tmpfilename: The filename we're currently writing to
540
541 Hooks are guaranteed to be called at least once (with status "finished")
542 if the download is successful.
543 """
544 self._progress_hooks.append(ph)