]>
Commit | Line | Data |
---|---|---|
d77c3dfd FV |
1 | import os |
2 | import re | |
d77c3dfd FV |
3 | import subprocess |
4 | import sys | |
5 | import time | |
d77c3dfd | 6 | |
76e67c2c PH |
7 | from .utils import ( |
8 | compat_urllib_error, | |
9 | compat_urllib_request, | |
10 | ContentTooShortError, | |
11 | determine_ext, | |
12 | encodeFilename, | |
02dbf93f | 13 | format_bytes, |
76e67c2c PH |
14 | sanitize_open, |
15 | timeconvert, | |
16 | ) | |
d77c3dfd FV |
17 | |
18 | ||
19 | class FileDownloader(object): | |
59ae15a5 PH |
20 | """File Downloader class. |
21 | ||
22 | File downloader objects are the ones responsible of downloading the | |
8222d8de | 23 | actual video file and writing it to disk. |
59ae15a5 PH |
24 | |
25 | File downloaders accept a lot of parameters. In order not to saturate | |
26 | the object constructor with arguments, it receives a dictionary of | |
8222d8de | 27 | options instead. |
59ae15a5 PH |
28 | |
29 | Available options: | |
30 | ||
8222d8de | 31 | verbose: Print additional info to stdout. |
59ae15a5 | 32 | quiet: Do not print messages to stdout. |
59ae15a5 | 33 | ratelimit: Download speed limit, in bytes/sec. |
59ae15a5 PH |
34 | retries: Number of times to retry for HTTP error 5xx |
35 | buffersize: Size of download buffer in bytes. | |
36 | noresizebuffer: Do not automatically resize the download buffer. | |
37 | continuedl: Try to continue downloads if possible. | |
38 | noprogress: Do not print the progress bar. | |
59ae15a5 PH |
39 | logtostderr: Log messages to stderr instead of stdout. |
40 | consoletitle: Display progress in console window's titlebar. | |
41 | nopart: Do not use temporary .part files. | |
42 | updatetime: Use the Last-modified header to set output file timestamps. | |
37c8fd48 | 43 | test: Download only first bytes to test the downloader. |
9e982f9e JC |
44 | min_filesize: Skip files smaller than this size |
45 | max_filesize: Skip files larger than this size | |
59ae15a5 PH |
46 | """ |
47 | ||
48 | params = None | |
59ae15a5 | 49 | |
8222d8de | 50 | def __init__(self, ydl, params): |
59ae15a5 | 51 | """Create a FileDownloader object with the given options.""" |
8222d8de | 52 | self.ydl = ydl |
bffbd5f0 | 53 | self._progress_hooks = [] |
59ae15a5 PH |
54 | self.params = params |
55 | ||
af8bd6a8 JMF |
56 | @staticmethod |
57 | def format_seconds(seconds): | |
58 | (mins, secs) = divmod(seconds, 60) | |
061b2889 | 59 | (hours, mins) = divmod(mins, 60) |
af8bd6a8 JMF |
60 | if hours > 99: |
61 | return '--:--:--' | |
62 | if hours == 0: | |
63 | return '%02d:%02d' % (mins, secs) | |
64 | else: | |
65 | return '%02d:%02d:%02d' % (hours, mins, secs) | |
66 | ||
59ae15a5 PH |
67 | @staticmethod |
68 | def calc_percent(byte_counter, data_len): | |
69 | if data_len is None: | |
4ae72004 JMF |
70 | return None |
71 | return float(byte_counter) / float(data_len) * 100.0 | |
72 | ||
73 | @staticmethod | |
74 | def format_percent(percent): | |
75 | if percent is None: | |
59ae15a5 | 76 | return '---.-%' |
4ae72004 | 77 | return '%6s' % ('%3.1f%%' % percent) |
59ae15a5 PH |
78 | |
79 | @staticmethod | |
80 | def calc_eta(start, now, total, current): | |
81 | if total is None: | |
4ae72004 | 82 | return None |
59ae15a5 PH |
83 | dif = now - start |
84 | if current == 0 or dif < 0.001: # One millisecond | |
4ae72004 | 85 | return None |
59ae15a5 | 86 | rate = float(current) / dif |
4ae72004 JMF |
87 | return int((float(total) - float(current)) / rate) |
88 | ||
89 | @staticmethod | |
90 | def format_eta(eta): | |
91 | if eta is None: | |
92 | return '--:--' | |
af8bd6a8 | 93 | return FileDownloader.format_seconds(eta) |
59ae15a5 PH |
94 | |
95 | @staticmethod | |
96 | def calc_speed(start, now, bytes): | |
97 | dif = now - start | |
98 | if bytes == 0 or dif < 0.001: # One millisecond | |
4ae72004 JMF |
99 | return None |
100 | return float(bytes) / dif | |
101 | ||
102 | @staticmethod | |
103 | def format_speed(speed): | |
104 | if speed is None: | |
59ae15a5 | 105 | return '%10s' % '---b/s' |
02dbf93f | 106 | return '%10s' % ('%s/s' % format_bytes(speed)) |
59ae15a5 PH |
107 | |
108 | @staticmethod | |
109 | def best_block_size(elapsed_time, bytes): | |
110 | new_min = max(bytes / 2.0, 1.0) | |
111 | new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB | |
112 | if elapsed_time < 0.001: | |
113 | return int(new_max) | |
114 | rate = bytes / elapsed_time | |
115 | if rate > new_max: | |
116 | return int(new_max) | |
117 | if rate < new_min: | |
118 | return int(new_min) | |
119 | return int(rate) | |
120 | ||
121 | @staticmethod | |
122 | def parse_bytes(bytestr): | |
123 | """Parse a string indicating a byte quantity into an integer.""" | |
124 | matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) | |
125 | if matchobj is None: | |
126 | return None | |
127 | number = float(matchobj.group(1)) | |
128 | multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | |
129 | return int(round(number * multiplier)) | |
130 | ||
8222d8de JMF |
131 | def to_screen(self, *args, **kargs): |
132 | self.ydl.to_screen(*args, **kargs) | |
59ae15a5 PH |
133 | |
134 | def to_stderr(self, message): | |
8222d8de | 135 | self.ydl.to_screen(message) |
59ae15a5 | 136 | |
1e5b9a95 PH |
137 | def to_console_title(self, message): |
138 | self.ydl.to_console_title(message) | |
59ae15a5 | 139 | |
8222d8de JMF |
140 | def trouble(self, *args, **kargs): |
141 | self.ydl.trouble(*args, **kargs) | |
142 | ||
143 | def report_warning(self, *args, **kargs): | |
144 | self.ydl.report_warning(*args, **kargs) | |
145 | ||
146 | def report_error(self, *args, **kargs): | |
2e325280 | 147 | self.ydl.report_error(*args, **kargs) |
4e1582f3 | 148 | |
59ae15a5 PH |
149 | def slow_down(self, start_time, byte_counter): |
150 | """Sleep if the download speed is over the rate limit.""" | |
151 | rate_limit = self.params.get('ratelimit', None) | |
152 | if rate_limit is None or byte_counter == 0: | |
153 | return | |
154 | now = time.time() | |
155 | elapsed = now - start_time | |
156 | if elapsed <= 0.0: | |
157 | return | |
158 | speed = float(byte_counter) / elapsed | |
159 | if speed > rate_limit: | |
160 | time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) | |
161 | ||
162 | def temp_name(self, filename): | |
163 | """Returns a temporary filename for the given filename.""" | |
164 | if self.params.get('nopart', False) or filename == u'-' or \ | |
165 | (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): | |
166 | return filename | |
167 | return filename + u'.part' | |
168 | ||
169 | def undo_temp_name(self, filename): | |
170 | if filename.endswith(u'.part'): | |
171 | return filename[:-len(u'.part')] | |
172 | return filename | |
173 | ||
174 | def try_rename(self, old_filename, new_filename): | |
175 | try: | |
176 | if old_filename == new_filename: | |
177 | return | |
178 | os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | |
76e67c2c | 179 | except (IOError, OSError): |
6622d22c | 180 | self.report_error(u'unable to rename file') |
59ae15a5 PH |
181 | |
182 | def try_utime(self, filename, last_modified_hdr): | |
183 | """Try to set the last-modified time of the given file.""" | |
184 | if last_modified_hdr is None: | |
185 | return | |
186 | if not os.path.isfile(encodeFilename(filename)): | |
187 | return | |
188 | timestr = last_modified_hdr | |
189 | if timestr is None: | |
190 | return | |
191 | filetime = timeconvert(timestr) | |
192 | if filetime is None: | |
193 | return filetime | |
bb474376 PH |
194 | # Ignore obviously invalid dates |
195 | if filetime == 0: | |
196 | return | |
59ae15a5 PH |
197 | try: |
198 | os.utime(filename, (time.time(), filetime)) | |
199 | except: | |
200 | pass | |
201 | return filetime | |
202 | ||
59ae15a5 PH |
203 | def report_destination(self, filename): |
204 | """Report destination filename.""" | |
205 | self.to_screen(u'[download] Destination: ' + filename) | |
206 | ||
a213880a | 207 | def _report_progress_status(self, msg, is_last_line=False): |
4c521606 | 208 | fullmsg = u'[download] ' + msg |
a213880a | 209 | if self.params.get('progress_with_newline', False): |
4c521606 | 210 | self.to_screen(fullmsg) |
a213880a | 211 | else: |
4c521606 PH |
212 | if os.name == 'nt': |
213 | prev_len = getattr(self, '_report_progress_prev_line_length', | |
214 | 0) | |
215 | if prev_len > len(fullmsg): | |
216 | fullmsg += u' ' * (prev_len - len(fullmsg)) | |
217 | self._report_progress_prev_line_length = len(fullmsg) | |
218 | clear_line = u'\r' | |
219 | else: | |
220 | clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r') | |
221 | self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line) | |
a213880a PH |
222 | self.to_console_title(u'youtube-dl ' + msg) |
223 | ||
4ae72004 | 224 | def report_progress(self, percent, data_len_str, speed, eta): |
59ae15a5 PH |
225 | """Report download progress.""" |
226 | if self.params.get('noprogress', False): | |
227 | return | |
4ac5306a JMF |
228 | if eta is not None: |
229 | eta_str = self.format_eta(eta) | |
230 | else: | |
231 | eta_str = 'Unknown ETA' | |
232 | if percent is not None: | |
233 | percent_str = self.format_percent(percent) | |
234 | else: | |
235 | percent_str = 'Unknown %' | |
4ae72004 | 236 | speed_str = self.format_speed(speed) |
a213880a PH |
237 | |
238 | msg = (u'%s of %s at %s ETA %s' % | |
239 | (percent_str, data_len_str, speed_str, eta_str)) | |
240 | self._report_progress_status(msg) | |
241 | ||
242 | def report_finish(self, data_len_str, tot_time): | |
243 | """Report download finished.""" | |
244 | if self.params.get('noprogress', False): | |
245 | self.to_screen(u'[download] Download completed') | |
5717d91a | 246 | else: |
a213880a PH |
247 | self._report_progress_status( |
248 | (u'100%% of %s in %s' % | |
249 | (data_len_str, self.format_seconds(tot_time))), | |
250 | is_last_line=True) | |
59ae15a5 PH |
251 | |
252 | def report_resuming_byte(self, resume_len): | |
253 | """Report attempt to resume at given byte.""" | |
254 | self.to_screen(u'[download] Resuming download at byte %s' % resume_len) | |
255 | ||
256 | def report_retry(self, count, retries): | |
257 | """Report retry in case of HTTP error 5xx""" | |
258 | self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) | |
259 | ||
260 | def report_file_already_downloaded(self, file_name): | |
261 | """Report file has already been fully downloaded.""" | |
262 | try: | |
263 | self.to_screen(u'[download] %s has already been downloaded' % file_name) | |
76e67c2c | 264 | except UnicodeEncodeError: |
59ae15a5 PH |
265 | self.to_screen(u'[download] The file has already been downloaded') |
266 | ||
267 | def report_unable_to_resume(self): | |
268 | """Report it was impossible to resume download.""" | |
269 | self.to_screen(u'[download] Unable to resume') | |
270 | ||
31366066 | 271 | def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url, live): |
4894fe8c | 272 | def run_rtmpdump(args): |
273 | start = time.time() | |
274 | resume_percent = None | |
275 | resume_downloaded_data_len = None | |
276 | proc = subprocess.Popen(args, stderr=subprocess.PIPE) | |
277 | cursor_in_new_line = True | |
278 | proc_stderr_closed = False | |
279 | while not proc_stderr_closed: | |
280 | # read line from stderr | |
281 | line = u'' | |
282 | while True: | |
283 | char = proc.stderr.read(1) | |
284 | if not char: | |
285 | proc_stderr_closed = True | |
286 | break | |
287 | if char in [b'\r', b'\n']: | |
288 | break | |
289 | line += char.decode('ascii', 'replace') | |
290 | if not line: | |
291 | # proc_stderr_closed is True | |
292 | continue | |
293 | mobj = re.search(r'([0-9]+\.[0-9]{3}) kB / [0-9]+\.[0-9]{2} sec \(([0-9]{1,2}\.[0-9])%\)', line) | |
294 | if mobj: | |
295 | downloaded_data_len = int(float(mobj.group(1))*1024) | |
296 | percent = float(mobj.group(2)) | |
297 | if not resume_percent: | |
298 | resume_percent = percent | |
299 | resume_downloaded_data_len = downloaded_data_len | |
300 | eta = self.calc_eta(start, time.time(), 100-resume_percent, percent-resume_percent) | |
301 | speed = self.calc_speed(start, time.time(), downloaded_data_len-resume_downloaded_data_len) | |
302 | data_len = None | |
303 | if percent > 0: | |
304 | data_len = int(downloaded_data_len * 100 / percent) | |
d0d2b49a | 305 | data_len_str = u'~' + format_bytes(data_len) |
4894fe8c | 306 | self.report_progress(percent, data_len_str, speed, eta) |
307 | cursor_in_new_line = False | |
308 | self._hook_progress({ | |
309 | 'downloaded_bytes': downloaded_data_len, | |
310 | 'total_bytes': data_len, | |
311 | 'tmpfilename': tmpfilename, | |
312 | 'filename': filename, | |
313 | 'status': 'downloading', | |
314 | 'eta': eta, | |
315 | 'speed': speed, | |
316 | }) | |
317 | elif self.params.get('verbose', False): | |
318 | if not cursor_in_new_line: | |
319 | self.to_screen(u'') | |
320 | cursor_in_new_line = True | |
321 | self.to_screen(u'[rtmpdump] '+line) | |
322 | proc.wait() | |
323 | if not cursor_in_new_line: | |
324 | self.to_screen(u'') | |
325 | return proc.returncode | |
326 | ||
59ae15a5 PH |
327 | self.report_destination(filename) |
328 | tmpfilename = self.temp_name(filename) | |
9026dd38 | 329 | test = self.params.get('test', False) |
59ae15a5 PH |
330 | |
331 | # Check for rtmpdump first | |
332 | try: | |
967897fd | 333 | subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) |
59ae15a5 | 334 | except (OSError, IOError): |
6622d22c | 335 | self.report_error(u'RTMP download detected but "rtmpdump" could not be run') |
59ae15a5 PH |
336 | return False |
337 | ||
338 | # Download using rtmpdump. rtmpdump returns exit code 2 when | |
339 | # the connection was interrumpted and resuming appears to be | |
340 | # possible. This is part of rtmpdump's normal usage, AFAIK. | |
4894fe8c | 341 | basic_args = ['rtmpdump', '--verbose', '-r', url, '-o', tmpfilename] |
f5ebb614 | 342 | if player_url is not None: |
8cd252f1 | 343 | basic_args += ['--swfVfy', player_url] |
f5ebb614 PH |
344 | if page_url is not None: |
345 | basic_args += ['--pageUrl', page_url] | |
adb029ed | 346 | if play_path is not None: |
8cd252f1 | 347 | basic_args += ['--playpath', play_path] |
de5d66d4 | 348 | if tc_url is not None: |
349 | basic_args += ['--tcUrl', url] | |
9026dd38 | 350 | if test: |
ad7a071a | 351 | basic_args += ['--stop', '1'] |
31366066 | 352 | if live: |
353 | basic_args += ['--live'] | |
8cd252f1 | 354 | args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] |
d9b011f2 PH |
355 | |
356 | if sys.platform == 'win32' and sys.version_info < (3, 0): | |
357 | # Windows subprocess module does not actually support Unicode | |
358 | # on Python 2.x | |
359 | # See http://stackoverflow.com/a/9951851/35070 | |
360 | subprocess_encoding = sys.getfilesystemencoding() | |
361 | args = [a.encode(subprocess_encoding, 'ignore') for a in args] | |
362 | else: | |
363 | subprocess_encoding = None | |
364 | ||
59ae15a5 | 365 | if self.params.get('verbose', False): |
d9b011f2 PH |
366 | if subprocess_encoding: |
367 | str_args = [ | |
368 | a.decode(subprocess_encoding) if isinstance(a, bytes) else a | |
369 | for a in args] | |
370 | else: | |
371 | str_args = args | |
59ae15a5 PH |
372 | try: |
373 | import pipes | |
d9b011f2 | 374 | shell_quote = lambda args: ' '.join(map(pipes.quote, str_args)) |
59ae15a5 PH |
375 | except ImportError: |
376 | shell_quote = repr | |
d9b011f2 | 377 | self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(str_args)) |
4894fe8c | 378 | |
379 | retval = run_rtmpdump(args) | |
380 | ||
9026dd38 | 381 | while (retval == 2 or retval == 1) and not test: |
59ae15a5 | 382 | prevsize = os.path.getsize(encodeFilename(tmpfilename)) |
4894fe8c | 383 | self.to_screen(u'[rtmpdump] %s bytes' % prevsize) |
59ae15a5 | 384 | time.sleep(5.0) # This seems to be needed |
4894fe8c | 385 | retval = run_rtmpdump(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) |
59ae15a5 PH |
386 | cursize = os.path.getsize(encodeFilename(tmpfilename)) |
387 | if prevsize == cursize and retval == 1: | |
388 | break | |
389 | # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | |
390 | if prevsize == cursize and retval == 2 and cursize > 1024: | |
4894fe8c | 391 | self.to_screen(u'[rtmpdump] Could not download the whole video. This can happen for some advertisements.') |
59ae15a5 PH |
392 | retval = 0 |
393 | break | |
9026dd38 | 394 | if retval == 0 or (test and retval == 2): |
bffbd5f0 | 395 | fsize = os.path.getsize(encodeFilename(tmpfilename)) |
4894fe8c | 396 | self.to_screen(u'[rtmpdump] %s bytes' % fsize) |
59ae15a5 | 397 | self.try_rename(tmpfilename, filename) |
bffbd5f0 PH |
398 | self._hook_progress({ |
399 | 'downloaded_bytes': fsize, | |
400 | 'total_bytes': fsize, | |
401 | 'filename': filename, | |
402 | 'status': 'finished', | |
403 | }) | |
59ae15a5 PH |
404 | return True |
405 | else: | |
6622d22c JMF |
406 | self.to_stderr(u"\n") |
407 | self.report_error(u'rtmpdump exited with code %d' % retval) | |
59ae15a5 PH |
408 | return False |
409 | ||
f2cd958c | 410 | def _download_with_mplayer(self, filename, url): |
411 | self.report_destination(filename) | |
412 | tmpfilename = self.temp_name(filename) | |
413 | ||
f2cd958c | 414 | args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] |
415 | # Check for mplayer first | |
416 | try: | |
3054ff0c | 417 | subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) |
f2cd958c | 418 | except (OSError, IOError): |
419 | self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) | |
420 | return False | |
421 | ||
422 | # Download using mplayer. | |
423 | retval = subprocess.call(args) | |
424 | if retval == 0: | |
425 | fsize = os.path.getsize(encodeFilename(tmpfilename)) | |
426 | self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | |
427 | self.try_rename(tmpfilename, filename) | |
428 | self._hook_progress({ | |
429 | 'downloaded_bytes': fsize, | |
430 | 'total_bytes': fsize, | |
431 | 'filename': filename, | |
432 | 'status': 'finished', | |
433 | }) | |
434 | return True | |
435 | else: | |
436 | self.to_stderr(u"\n") | |
3054ff0c | 437 | self.report_error(u'mplayer exited with code %d' % retval) |
f2cd958c | 438 | return False |
439 | ||
b15d4f62 JMF |
440 | def _download_m3u8_with_ffmpeg(self, filename, url): |
441 | self.report_destination(filename) | |
442 | tmpfilename = self.temp_name(filename) | |
443 | ||
801dbbdf JMF |
444 | args = ['-y', '-i', url, '-f', 'mp4', '-c', 'copy', |
445 | '-bsf:a', 'aac_adtstoasc', tmpfilename] | |
b15d4f62 | 446 | |
801dbbdf JMF |
447 | for program in ['avconv', 'ffmpeg']: |
448 | try: | |
449 | subprocess.call([program, '-version'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | |
450 | break | |
451 | except (OSError, IOError): | |
452 | pass | |
453 | else: | |
454 | self.report_error(u'm3u8 download detected but ffmpeg or avconv could not be found') | |
455 | cmd = [program] + args | |
456 | ||
457 | retval = subprocess.call(cmd) | |
b15d4f62 JMF |
458 | if retval == 0: |
459 | fsize = os.path.getsize(encodeFilename(tmpfilename)) | |
460 | self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | |
461 | self.try_rename(tmpfilename, filename) | |
462 | self._hook_progress({ | |
463 | 'downloaded_bytes': fsize, | |
464 | 'total_bytes': fsize, | |
465 | 'filename': filename, | |
466 | 'status': 'finished', | |
467 | }) | |
468 | return True | |
469 | else: | |
470 | self.to_stderr(u"\n") | |
471 | self.report_error(u'ffmpeg exited with code %d' % retval) | |
472 | return False | |
473 | ||
f2cd958c | 474 | |
59ae15a5 PH |
475 | def _do_download(self, filename, info_dict): |
476 | url = info_dict['url'] | |
59ae15a5 PH |
477 | |
478 | # Check file already present | |
479 | if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): | |
480 | self.report_file_already_downloaded(filename) | |
bffbd5f0 PH |
481 | self._hook_progress({ |
482 | 'filename': filename, | |
483 | 'status': 'finished', | |
dd5d2eb0 | 484 | 'total_bytes': os.path.getsize(encodeFilename(filename)), |
bffbd5f0 | 485 | }) |
59ae15a5 PH |
486 | return True |
487 | ||
488 | # Attempt to download using rtmpdump | |
489 | if url.startswith('rtmp'): | |
f5ebb614 PH |
490 | return self._download_with_rtmpdump(filename, url, |
491 | info_dict.get('player_url', None), | |
adb029ed | 492 | info_dict.get('page_url', None), |
de5d66d4 | 493 | info_dict.get('play_path', None), |
31366066 | 494 | info_dict.get('tc_url', None), |
0ed05a1d | 495 | info_dict.get('rtmp_live', False)) |
59ae15a5 | 496 | |
f2cd958c | 497 | # Attempt to download using mplayer |
498 | if url.startswith('mms') or url.startswith('rtsp'): | |
499 | return self._download_with_mplayer(filename, url) | |
500 | ||
b15d4f62 JMF |
501 | # m3u8 manifest are downloaded with ffmpeg |
502 | if determine_ext(url) == u'm3u8': | |
503 | return self._download_m3u8_with_ffmpeg(filename, url) | |
504 | ||
59ae15a5 PH |
505 | tmpfilename = self.temp_name(filename) |
506 | stream = None | |
507 | ||
508 | # Do not include the Accept-Encoding header | |
509 | headers = {'Youtubedl-no-compression': 'True'} | |
3446dfb7 PH |
510 | if 'user_agent' in info_dict: |
511 | headers['Youtubedl-user-agent'] = info_dict['user_agent'] | |
59ae15a5 PH |
512 | basic_request = compat_urllib_request.Request(url, None, headers) |
513 | request = compat_urllib_request.Request(url, None, headers) | |
514 | ||
37c8fd48 FV |
515 | if self.params.get('test', False): |
516 | request.add_header('Range','bytes=0-10240') | |
517 | ||
59ae15a5 PH |
518 | # Establish possible resume length |
519 | if os.path.isfile(encodeFilename(tmpfilename)): | |
520 | resume_len = os.path.getsize(encodeFilename(tmpfilename)) | |
521 | else: | |
522 | resume_len = 0 | |
523 | ||
524 | open_mode = 'wb' | |
525 | if resume_len != 0: | |
526 | if self.params.get('continuedl', False): | |
527 | self.report_resuming_byte(resume_len) | |
528 | request.add_header('Range','bytes=%d-' % resume_len) | |
529 | open_mode = 'ab' | |
530 | else: | |
531 | resume_len = 0 | |
532 | ||
533 | count = 0 | |
534 | retries = self.params.get('retries', 0) | |
535 | while count <= retries: | |
536 | # Establish connection | |
537 | try: | |
538 | if count == 0 and 'urlhandle' in info_dict: | |
539 | data = info_dict['urlhandle'] | |
540 | data = compat_urllib_request.urlopen(request) | |
541 | break | |
542 | except (compat_urllib_error.HTTPError, ) as err: | |
543 | if (err.code < 500 or err.code >= 600) and err.code != 416: | |
544 | # Unexpected HTTP error | |
545 | raise | |
546 | elif err.code == 416: | |
547 | # Unable to resume (requested range not satisfiable) | |
548 | try: | |
549 | # Open the connection again without the range header | |
550 | data = compat_urllib_request.urlopen(basic_request) | |
551 | content_length = data.info()['Content-Length'] | |
552 | except (compat_urllib_error.HTTPError, ) as err: | |
553 | if err.code < 500 or err.code >= 600: | |
554 | raise | |
555 | else: | |
556 | # Examine the reported length | |
557 | if (content_length is not None and | |
558 | (resume_len - 100 < int(content_length) < resume_len + 100)): | |
559 | # The file had already been fully downloaded. | |
560 | # Explanation to the above condition: in issue #175 it was revealed that | |
561 | # YouTube sometimes adds or removes a few bytes from the end of the file, | |
562 | # changing the file size slightly and causing problems for some users. So | |
563 | # I decided to implement a suggested change and consider the file | |
564 | # completely downloaded if the file size differs less than 100 bytes from | |
565 | # the one in the hard drive. | |
566 | self.report_file_already_downloaded(filename) | |
567 | self.try_rename(tmpfilename, filename) | |
bffbd5f0 PH |
568 | self._hook_progress({ |
569 | 'filename': filename, | |
570 | 'status': 'finished', | |
571 | }) | |
59ae15a5 PH |
572 | return True |
573 | else: | |
574 | # The length does not match, we start the download over | |
575 | self.report_unable_to_resume() | |
576 | open_mode = 'wb' | |
577 | break | |
578 | # Retry | |
579 | count += 1 | |
580 | if count <= retries: | |
581 | self.report_retry(count, retries) | |
582 | ||
583 | if count > retries: | |
6622d22c | 584 | self.report_error(u'giving up after %s retries' % retries) |
59ae15a5 PH |
585 | return False |
586 | ||
587 | data_len = data.info().get('Content-length', None) | |
588 | if data_len is not None: | |
589 | data_len = int(data_len) + resume_len | |
9e982f9e JC |
590 | min_data_len = self.params.get("min_filesize", None) |
591 | max_data_len = self.params.get("max_filesize", None) | |
592 | if min_data_len is not None and data_len < min_data_len: | |
593 | self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) | |
594 | return False | |
595 | if max_data_len is not None and data_len > max_data_len: | |
596 | self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | |
597 | return False | |
598 | ||
02dbf93f | 599 | data_len_str = format_bytes(data_len) |
59ae15a5 PH |
600 | byte_counter = 0 + resume_len |
601 | block_size = self.params.get('buffersize', 1024) | |
602 | start = time.time() | |
603 | while True: | |
604 | # Download and write | |
605 | before = time.time() | |
606 | data_block = data.read(block_size) | |
607 | after = time.time() | |
608 | if len(data_block) == 0: | |
609 | break | |
610 | byte_counter += len(data_block) | |
611 | ||
612 | # Open file just in time | |
613 | if stream is None: | |
614 | try: | |
615 | (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) | |
616 | assert stream is not None | |
617 | filename = self.undo_temp_name(tmpfilename) | |
618 | self.report_destination(filename) | |
619 | except (OSError, IOError) as err: | |
6622d22c | 620 | self.report_error(u'unable to open for writing: %s' % str(err)) |
59ae15a5 PH |
621 | return False |
622 | try: | |
623 | stream.write(data_block) | |
624 | except (IOError, OSError) as err: | |
6622d22c JMF |
625 | self.to_stderr(u"\n") |
626 | self.report_error(u'unable to write data: %s' % str(err)) | |
59ae15a5 PH |
627 | return False |
628 | if not self.params.get('noresizebuffer', False): | |
629 | block_size = self.best_block_size(after - before, len(data_block)) | |
630 | ||
631 | # Progress message | |
4ae72004 | 632 | speed = self.calc_speed(start, time.time(), byte_counter - resume_len) |
59ae15a5 | 633 | if data_len is None: |
4ac5306a | 634 | eta = percent = None |
59ae15a5 | 635 | else: |
4ae72004 JMF |
636 | percent = self.calc_percent(byte_counter, data_len) |
637 | eta = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | |
4ac5306a | 638 | self.report_progress(percent, data_len_str, speed, eta) |
59ae15a5 | 639 | |
bffbd5f0 PH |
640 | self._hook_progress({ |
641 | 'downloaded_bytes': byte_counter, | |
642 | 'total_bytes': data_len, | |
643 | 'tmpfilename': tmpfilename, | |
644 | 'filename': filename, | |
645 | 'status': 'downloading', | |
4ae72004 JMF |
646 | 'eta': eta, |
647 | 'speed': speed, | |
bffbd5f0 PH |
648 | }) |
649 | ||
59ae15a5 PH |
650 | # Apply rate limit |
651 | self.slow_down(start, byte_counter - resume_len) | |
652 | ||
653 | if stream is None: | |
6622d22c JMF |
654 | self.to_stderr(u"\n") |
655 | self.report_error(u'Did not get any data blocks') | |
59ae15a5 PH |
656 | return False |
657 | stream.close() | |
968b5e01 | 658 | self.report_finish(data_len_str, (time.time() - start)) |
59ae15a5 PH |
659 | if data_len is not None and byte_counter != data_len: |
660 | raise ContentTooShortError(byte_counter, int(data_len)) | |
661 | self.try_rename(tmpfilename, filename) | |
662 | ||
663 | # Update file modification time | |
664 | if self.params.get('updatetime', True): | |
665 | info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) | |
666 | ||
bffbd5f0 PH |
667 | self._hook_progress({ |
668 | 'downloaded_bytes': byte_counter, | |
669 | 'total_bytes': byte_counter, | |
670 | 'filename': filename, | |
671 | 'status': 'finished', | |
672 | }) | |
673 | ||
59ae15a5 | 674 | return True |
bffbd5f0 PH |
675 | |
676 | def _hook_progress(self, status): | |
677 | for ph in self._progress_hooks: | |
678 | ph(status) | |
679 | ||
680 | def add_progress_hook(self, ph): | |
681 | """ ph gets called on download progress, with a dictionary with the entries | |
682 | * filename: The final filename | |
683 | * status: One of "downloading" and "finished" | |
684 | ||
685 | It can also have some of the following entries: | |
686 | ||
687 | * downloaded_bytes: Bytes on disks | |
688 | * total_bytes: Total bytes, None if unknown | |
689 | * tmpfilename: The filename we're currently writing to | |
4ae72004 JMF |
690 | * eta: The estimated time in seconds, None if unknown |
691 | * speed: The download speed in bytes/second, None if unknown | |
bffbd5f0 PH |
692 | |
693 | Hooks are guaranteed to be called at least once (with status "finished") | |
694 | if the download is successful. | |
695 | """ | |
696 | self._progress_hooks.append(ph) |