]> jfr.im git - yt-dlp.git/blame - youtube_dl/FileDownloader.py
Fix delayed title display in --console-title
[yt-dlp.git] / youtube_dl / FileDownloader.py
CommitLineData
d77c3dfd
FV
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
9e8056d5
PH
4from __future__ import absolute_import
5
d77c3dfd 6import math
ce4be3a9 7import io
d77c3dfd
FV
8import os
9import re
10import socket
11import subprocess
12import sys
13import time
59ce2019 14import traceback
d77c3dfd
FV
15
16if os.name == 'nt':
59ae15a5 17 import ctypes
3eec021a 18
9e8056d5 19from .utils import *
d77c3dfd
FV
20
21
22class FileDownloader(object):
59ae15a5
PH
23 """File Downloader class.
24
25 File downloader objects are the ones responsible of downloading the
26 actual video file and writing it to disk if the user has requested
27 it, among some other tasks. In most cases there should be one per
28 program. As, given a video URL, the downloader doesn't know how to
29 extract all the needed information, task that InfoExtractors do, it
30 has to pass the URL to one of them.
31
32 For this, file downloader objects have a method that allows
33 InfoExtractors to be registered in a given order. When it is passed
34 a URL, the file downloader handles it to the first InfoExtractor it
35 finds that reports being able to handle it. The InfoExtractor extracts
36 all the information about the video or videos the URL refers to, and
37 asks the FileDownloader to process the video information, possibly
38 downloading the video.
39
40 File downloaders accept a lot of parameters. In order not to saturate
41 the object constructor with arguments, it receives a dictionary of
42 options instead. These options are available through the params
43 attribute for the InfoExtractors to use. The FileDownloader also
44 registers itself as the downloader in charge for the InfoExtractors
45 that are added to it, so this is a "mutual registration".
46
47 Available options:
48
49 username: Username for authentication purposes.
50 password: Password for authentication purposes.
51 usenetrc: Use netrc for authentication instead.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forcethumbnail: Force printing thumbnail URL.
56 forcedescription: Force printing description.
57 forcefilename: Force printing final filename.
58 simulate: Do not download the video files.
59 format: Video format code.
60 format_limit: Highest quality format to try.
61 outtmpl: Template for output names.
62 restrictfilenames: Do not allow "&" and spaces in file names
63 ignoreerrors: Do not stop on download errors.
64 ratelimit: Download speed limit, in bytes/sec.
65 nooverwrites: Prevent overwriting files.
66 retries: Number of times to retry for HTTP error 5xx
67 buffersize: Size of download buffer in bytes.
68 noresizebuffer: Do not automatically resize the download buffer.
69 continuedl: Try to continue downloads if possible.
70 noprogress: Do not print the progress bar.
71 playliststart: Playlist item to start at.
72 playlistend: Playlist item to end at.
73 matchtitle: Download only matching titles.
74 rejecttitle: Reject downloads for matching titles.
75 logtostderr: Log messages to stderr instead of stdout.
76 consoletitle: Display progress in console window's titlebar.
77 nopart: Do not use temporary .part files.
78 updatetime: Use the Last-modified header to set output file timestamps.
79 writedescription: Write the video description to a .description file
80 writeinfojson: Write the video description to a .info.json file
81 writesubtitles: Write the video subtitles to a .srt file
82 subtitleslang: Language of the subtitles to download
37c8fd48 83 test: Download only first bytes to test the downloader.
7851b379 84 keepvideo: Keep the video file after post-processing
9e982f9e
JC
85 min_filesize: Skip files smaller than this size
86 max_filesize: Skip files larger than this size
59ae15a5
PH
87 """
88
89 params = None
90 _ies = []
91 _pps = []
92 _download_retcode = None
93 _num_downloads = None
94 _screen_file = None
95
96 def __init__(self, params):
97 """Create a FileDownloader object with the given options."""
98 self._ies = []
99 self._pps = []
bffbd5f0 100 self._progress_hooks = []
59ae15a5
PH
101 self._download_retcode = 0
102 self._num_downloads = 0
103 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
104 self.params = params
105
106 if '%(stitle)s' in self.params['outtmpl']:
107 self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
108
109 @staticmethod
110 def format_bytes(bytes):
111 if bytes is None:
112 return 'N/A'
113 if type(bytes) is str:
114 bytes = float(bytes)
115 if bytes == 0.0:
116 exponent = 0
117 else:
118 exponent = int(math.log(bytes, 1024.0))
119 suffix = 'bkMGTPEZY'[exponent]
120 converted = float(bytes) / float(1024 ** exponent)
121 return '%.2f%s' % (converted, suffix)
122
123 @staticmethod
124 def calc_percent(byte_counter, data_len):
125 if data_len is None:
126 return '---.-%'
127 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
128
129 @staticmethod
130 def calc_eta(start, now, total, current):
131 if total is None:
132 return '--:--'
133 dif = now - start
134 if current == 0 or dif < 0.001: # One millisecond
135 return '--:--'
136 rate = float(current) / dif
137 eta = int((float(total) - float(current)) / rate)
138 (eta_mins, eta_secs) = divmod(eta, 60)
139 if eta_mins > 99:
140 return '--:--'
141 return '%02d:%02d' % (eta_mins, eta_secs)
142
143 @staticmethod
144 def calc_speed(start, now, bytes):
145 dif = now - start
146 if bytes == 0 or dif < 0.001: # One millisecond
147 return '%10s' % '---b/s'
148 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
149
150 @staticmethod
151 def best_block_size(elapsed_time, bytes):
152 new_min = max(bytes / 2.0, 1.0)
153 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
154 if elapsed_time < 0.001:
155 return int(new_max)
156 rate = bytes / elapsed_time
157 if rate > new_max:
158 return int(new_max)
159 if rate < new_min:
160 return int(new_min)
161 return int(rate)
162
163 @staticmethod
164 def parse_bytes(bytestr):
165 """Parse a string indicating a byte quantity into an integer."""
166 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
167 if matchobj is None:
168 return None
169 number = float(matchobj.group(1))
170 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
171 return int(round(number * multiplier))
172
173 def add_info_extractor(self, ie):
174 """Add an InfoExtractor object to the end of the list."""
175 self._ies.append(ie)
176 ie.set_downloader(self)
177
178 def add_post_processor(self, pp):
179 """Add a PostProcessor object to the end of the chain."""
180 self._pps.append(pp)
181 pp.set_downloader(self)
182
183 def to_screen(self, message, skip_eol=False):
184 """Print message to stdout if not in quiet mode."""
185 assert type(message) == type(u'')
186 if not self.params.get('quiet', False):
187 terminator = [u'\n', u''][skip_eol]
188 output = message + terminator
189 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
190 output = output.encode(preferredencoding(), 'ignore')
191 self._screen_file.write(output)
192 self._screen_file.flush()
193
194 def to_stderr(self, message):
195 """Print message to stderr."""
196 assert type(message) == type(u'')
197 output = message + u'\n'
198 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
199 output = output.encode(preferredencoding())
200 sys.stderr.write(output)
201
202 def to_cons_title(self, message):
203 """Set console/terminal window title to message."""
204 if not self.params.get('consoletitle', False):
205 return
206 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
207 # c_wchar_p() might not be necessary if `message` is
208 # already of type unicode()
209 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
210 elif 'TERM' in os.environ:
906417c7 211 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
59ae15a5
PH
212
213 def fixed_template(self):
214 """Checks if the output template is fixed."""
215 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
216
1c256f70 217 def trouble(self, message=None, tb=None):
59ae15a5
PH
218 """Determine action to take when a download problem appears.
219
220 Depending on if the downloader has been configured to ignore
221 download errors or not, this method may throw an exception or
222 not when errors are found, after printing the message.
01951dda
PH
223
224 tb, if given, is additional traceback information.
59ae15a5
PH
225 """
226 if message is not None:
227 self.to_stderr(message)
59ce2019 228 if self.params.get('verbose'):
1c256f70 229 if tb is None:
01951dda
PH
230 tb_data = traceback.format_list(traceback.extract_stack())
231 tb = u''.join(tb_data)
1c256f70 232 self.to_stderr(tb)
59ae15a5
PH
233 if not self.params.get('ignoreerrors', False):
234 raise DownloadError(message)
235 self._download_retcode = 1
236
237 def slow_down(self, start_time, byte_counter):
238 """Sleep if the download speed is over the rate limit."""
239 rate_limit = self.params.get('ratelimit', None)
240 if rate_limit is None or byte_counter == 0:
241 return
242 now = time.time()
243 elapsed = now - start_time
244 if elapsed <= 0.0:
245 return
246 speed = float(byte_counter) / elapsed
247 if speed > rate_limit:
248 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
249
250 def temp_name(self, filename):
251 """Returns a temporary filename for the given filename."""
252 if self.params.get('nopart', False) or filename == u'-' or \
253 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
254 return filename
255 return filename + u'.part'
256
257 def undo_temp_name(self, filename):
258 if filename.endswith(u'.part'):
259 return filename[:-len(u'.part')]
260 return filename
261
262 def try_rename(self, old_filename, new_filename):
263 try:
264 if old_filename == new_filename:
265 return
266 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
267 except (IOError, OSError) as err:
268 self.trouble(u'ERROR: unable to rename file')
269
270 def try_utime(self, filename, last_modified_hdr):
271 """Try to set the last-modified time of the given file."""
272 if last_modified_hdr is None:
273 return
274 if not os.path.isfile(encodeFilename(filename)):
275 return
276 timestr = last_modified_hdr
277 if timestr is None:
278 return
279 filetime = timeconvert(timestr)
280 if filetime is None:
281 return filetime
282 try:
283 os.utime(filename, (time.time(), filetime))
284 except:
285 pass
286 return filetime
287
288 def report_writedescription(self, descfn):
289 """ Report that the description file is being written """
290 self.to_screen(u'[info] Writing video description to: ' + descfn)
291
292 def report_writesubtitles(self, srtfn):
293 """ Report that the subtitles file is being written """
294 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
295
296 def report_writeinfojson(self, infofn):
297 """ Report that the metadata file has been written """
298 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
299
300 def report_destination(self, filename):
301 """Report destination filename."""
302 self.to_screen(u'[download] Destination: ' + filename)
303
304 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
305 """Report download progress."""
306 if self.params.get('noprogress', False):
307 return
308 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
309 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
310 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
311 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
312
313 def report_resuming_byte(self, resume_len):
314 """Report attempt to resume at given byte."""
315 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
316
317 def report_retry(self, count, retries):
318 """Report retry in case of HTTP error 5xx"""
319 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
320
321 def report_file_already_downloaded(self, file_name):
322 """Report file has already been fully downloaded."""
323 try:
324 self.to_screen(u'[download] %s has already been downloaded' % file_name)
325 except (UnicodeEncodeError) as err:
326 self.to_screen(u'[download] The file has already been downloaded')
327
328 def report_unable_to_resume(self):
329 """Report it was impossible to resume download."""
330 self.to_screen(u'[download] Unable to resume')
331
332 def report_finish(self):
333 """Report download finished."""
334 if self.params.get('noprogress', False):
335 self.to_screen(u'[download] Download completed')
336 else:
337 self.to_screen(u'')
338
339 def increment_downloads(self):
340 """Increment the ordinal that assigns a number to each file."""
341 self._num_downloads += 1
342
343 def prepare_filename(self, info_dict):
344 """Generate the output filename."""
345 try:
346 template_dict = dict(info_dict)
347
348 template_dict['epoch'] = int(time.time())
349 template_dict['autonumber'] = u'%05d' % self._num_downloads
350
796173d0
PH
351 sanitize = lambda k,v: sanitize_filename(
352 u'NA' if v is None else compat_str(v),
353 restricted=self.params.get('restrictfilenames'),
354 is_id=(k==u'id'))
355 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
59ae15a5
PH
356
357 filename = self.params['outtmpl'] % template_dict
358 return filename
359 except (ValueError, KeyError) as err:
360 self.trouble(u'ERROR: invalid system charset or erroneous output template')
361 return None
362
363 def _match_entry(self, info_dict):
364 """ Returns None iff the file should be downloaded """
365
366 title = info_dict['title']
367 matchtitle = self.params.get('matchtitle', False)
368 if matchtitle:
369 matchtitle = matchtitle.decode('utf8')
370 if not re.search(matchtitle, title, re.IGNORECASE):
371 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
372 rejecttitle = self.params.get('rejecttitle', False)
373 if rejecttitle:
374 rejecttitle = rejecttitle.decode('utf8')
375 if re.search(rejecttitle, title, re.IGNORECASE):
376 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
377 return None
378
379 def process_info(self, info_dict):
380 """Process a single dictionary returned by an InfoExtractor."""
381
382 # Keep for backwards compatibility
383 info_dict['stitle'] = info_dict['title']
384
385 if not 'format' in info_dict:
386 info_dict['format'] = info_dict['ext']
387
388 reason = self._match_entry(info_dict)
389 if reason is not None:
390 self.to_screen(u'[download] ' + reason)
391 return
392
393 max_downloads = self.params.get('max_downloads')
394 if max_downloads is not None:
395 if self._num_downloads > int(max_downloads):
396 raise MaxDownloadsReached()
397
398 filename = self.prepare_filename(info_dict)
399
400 # Forced printings
401 if self.params.get('forcetitle', False):
402 compat_print(info_dict['title'])
403 if self.params.get('forceurl', False):
404 compat_print(info_dict['url'])
405 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
406 compat_print(info_dict['thumbnail'])
407 if self.params.get('forcedescription', False) and 'description' in info_dict:
408 compat_print(info_dict['description'])
409 if self.params.get('forcefilename', False) and filename is not None:
410 compat_print(filename)
411 if self.params.get('forceformat', False):
412 compat_print(info_dict['format'])
413
414 # Do nothing else if in simulate mode
415 if self.params.get('simulate', False):
416 return
417
418 if filename is None:
419 return
420
421 try:
422 dn = os.path.dirname(encodeFilename(filename))
423 if dn != '' and not os.path.exists(dn): # dn is already encoded
424 os.makedirs(dn)
425 except (OSError, IOError) as err:
426 self.trouble(u'ERROR: unable to create directory ' + compat_str(err))
427 return
428
429 if self.params.get('writedescription', False):
430 try:
431 descfn = filename + u'.description'
432 self.report_writedescription(descfn)
bfa6389b
PH
433 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
434 descfile.write(info_dict['description'])
59ae15a5
PH
435 except (OSError, IOError):
436 self.trouble(u'ERROR: Cannot write description file ' + descfn)
437 return
438
439 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
440 # subtitles download errors are already managed as troubles in relevant IE
441 # that way it will silently go on when used with unsupporting IE
442 try:
443 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
444 self.report_writesubtitles(srtfn)
1a2c3c0f
FV
445 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
446 srtfile.write(info_dict['subtitles'])
59ae15a5
PH
447 except (OSError, IOError):
448 self.trouble(u'ERROR: Cannot write subtitles file ' + descfn)
449 return
450
451 if self.params.get('writeinfojson', False):
452 infofn = filename + u'.info.json'
453 self.report_writeinfojson(infofn)
59ae15a5 454 try:
f4bfd65f
PH
455 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
456 write_json_file(json_info_dict, encodeFilename(infofn))
59ae15a5
PH
457 except (OSError, IOError):
458 self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn)
459 return
460
461 if not self.params.get('skip_download', False):
462 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
463 success = True
464 else:
465 try:
466 success = self._do_download(filename, info_dict)
467 except (OSError, IOError) as err:
6ad98fb3 468 raise UnavailableVideoError()
59ae15a5
PH
469 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
470 self.trouble(u'ERROR: unable to download video data: %s' % str(err))
471 return
472 except (ContentTooShortError, ) as err:
473 self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
474 return
475
476 if success:
477 try:
478 self.post_process(filename, info_dict)
479 except (PostProcessingError) as err:
480 self.trouble(u'ERROR: postprocessing: %s' % str(err))
481 return
482
483 def download(self, url_list):
484 """Download a given list of URLs."""
485 if len(url_list) > 1 and self.fixed_template():
486 raise SameFileError(self.params['outtmpl'])
487
488 for url in url_list:
489 suitable_found = False
490 for ie in self._ies:
491 # Go to next InfoExtractor if not suitable
492 if not ie.suitable(url):
493 continue
494
495 # Warn if the _WORKING attribute is False
496 if not ie.working():
1c256f70
PH
497 self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, '
498 u'and will probably not work. If you want to go on, use the -i option.')
59ae15a5
PH
499
500 # Suitable InfoExtractor found
501 suitable_found = True
502
503 # Extract information from URL and process it
1c256f70
PH
504 try:
505 videos = ie.extract(url)
506 except ExtractorError as de: # An error we somewhat expected
01951dda 507 self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
1c256f70
PH
508 break
509 except Exception as e:
510 if self.params.get('ignoreerrors', False):
511 self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc()))
512 break
513 else:
514 raise
95fedbf8 515
0214ce7c 516 if len(videos or []) > 1 and self.fixed_template():
95fedbf8
FV
517 raise SameFileError(self.params['outtmpl'])
518
59ae15a5
PH
519 for video in videos or []:
520 video['extractor'] = ie.IE_NAME
521 try:
522 self.increment_downloads()
523 self.process_info(video)
524 except UnavailableVideoError:
525 self.trouble(u'\nERROR: unable to download video')
526
527 # Suitable InfoExtractor had been found; go to next URL
528 break
529
530 if not suitable_found:
531 self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url)
532
533 return self._download_retcode
534
535 def post_process(self, filename, ie_info):
7851b379 536 """Run all the postprocessors on the given file."""
59ae15a5
PH
537 info = dict(ie_info)
538 info['filepath'] = filename
7851b379 539 keep_video = None
59ae15a5 540 for pp in self._pps:
7851b379
PH
541 try:
542 keep_video_wish,new_info = pp.run(info)
543 if keep_video_wish is not None:
544 if keep_video_wish:
545 keep_video = keep_video_wish
546 elif keep_video is None:
547 # No clear decision yet, let IE decide
548 keep_video = keep_video_wish
549 except PostProcessingError as e:
550 self.to_stderr(u'ERROR: ' + e.msg)
1d16b0c3 551 if keep_video is False and not self.params.get('keepvideo', False):
7851b379
PH
552 try:
553 self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename)
554 os.remove(encodeFilename(filename))
555 except (IOError, OSError):
556 self.to_stderr(u'WARNING: Unable to remove downloaded video file')
59ae15a5 557
f5ebb614 558 def _download_with_rtmpdump(self, filename, url, player_url, page_url):
59ae15a5
PH
559 self.report_destination(filename)
560 tmpfilename = self.temp_name(filename)
561
562 # Check for rtmpdump first
563 try:
564 subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
565 except (OSError, IOError):
566 self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run')
567 return False
568
569 # Download using rtmpdump. rtmpdump returns exit code 2 when
570 # the connection was interrumpted and resuming appears to be
571 # possible. This is part of rtmpdump's normal usage, AFAIK.
f5ebb614
PH
572 basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
573 if player_url is not None:
574 basic_args += ['-W', player_url]
575 if page_url is not None:
576 basic_args += ['--pageUrl', page_url]
59ae15a5
PH
577 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
578 if self.params.get('verbose', False):
579 try:
580 import pipes
581 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
582 except ImportError:
583 shell_quote = repr
584 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
585 retval = subprocess.call(args)
586 while retval == 2 or retval == 1:
587 prevsize = os.path.getsize(encodeFilename(tmpfilename))
588 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
589 time.sleep(5.0) # This seems to be needed
590 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
591 cursize = os.path.getsize(encodeFilename(tmpfilename))
592 if prevsize == cursize and retval == 1:
593 break
594 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
595 if prevsize == cursize and retval == 2 and cursize > 1024:
596 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
597 retval = 0
598 break
599 if retval == 0:
bffbd5f0
PH
600 fsize = os.path.getsize(encodeFilename(tmpfilename))
601 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
59ae15a5 602 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
603 self._hook_progress({
604 'downloaded_bytes': fsize,
605 'total_bytes': fsize,
606 'filename': filename,
607 'status': 'finished',
608 })
59ae15a5
PH
609 return True
610 else:
611 self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval)
612 return False
613
614 def _do_download(self, filename, info_dict):
615 url = info_dict['url']
59ae15a5
PH
616
617 # Check file already present
618 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
619 self.report_file_already_downloaded(filename)
bffbd5f0
PH
620 self._hook_progress({
621 'filename': filename,
622 'status': 'finished',
623 })
59ae15a5
PH
624 return True
625
626 # Attempt to download using rtmpdump
627 if url.startswith('rtmp'):
f5ebb614
PH
628 return self._download_with_rtmpdump(filename, url,
629 info_dict.get('player_url', None),
630 info_dict.get('page_url', None))
59ae15a5
PH
631
632 tmpfilename = self.temp_name(filename)
633 stream = None
634
635 # Do not include the Accept-Encoding header
636 headers = {'Youtubedl-no-compression': 'True'}
3446dfb7
PH
637 if 'user_agent' in info_dict:
638 headers['Youtubedl-user-agent'] = info_dict['user_agent']
59ae15a5
PH
639 basic_request = compat_urllib_request.Request(url, None, headers)
640 request = compat_urllib_request.Request(url, None, headers)
641
37c8fd48
FV
642 if self.params.get('test', False):
643 request.add_header('Range','bytes=0-10240')
644
59ae15a5
PH
645 # Establish possible resume length
646 if os.path.isfile(encodeFilename(tmpfilename)):
647 resume_len = os.path.getsize(encodeFilename(tmpfilename))
648 else:
649 resume_len = 0
650
651 open_mode = 'wb'
652 if resume_len != 0:
653 if self.params.get('continuedl', False):
654 self.report_resuming_byte(resume_len)
655 request.add_header('Range','bytes=%d-' % resume_len)
656 open_mode = 'ab'
657 else:
658 resume_len = 0
659
660 count = 0
661 retries = self.params.get('retries', 0)
662 while count <= retries:
663 # Establish connection
664 try:
665 if count == 0 and 'urlhandle' in info_dict:
666 data = info_dict['urlhandle']
667 data = compat_urllib_request.urlopen(request)
668 break
669 except (compat_urllib_error.HTTPError, ) as err:
670 if (err.code < 500 or err.code >= 600) and err.code != 416:
671 # Unexpected HTTP error
672 raise
673 elif err.code == 416:
674 # Unable to resume (requested range not satisfiable)
675 try:
676 # Open the connection again without the range header
677 data = compat_urllib_request.urlopen(basic_request)
678 content_length = data.info()['Content-Length']
679 except (compat_urllib_error.HTTPError, ) as err:
680 if err.code < 500 or err.code >= 600:
681 raise
682 else:
683 # Examine the reported length
684 if (content_length is not None and
685 (resume_len - 100 < int(content_length) < resume_len + 100)):
686 # The file had already been fully downloaded.
687 # Explanation to the above condition: in issue #175 it was revealed that
688 # YouTube sometimes adds or removes a few bytes from the end of the file,
689 # changing the file size slightly and causing problems for some users. So
690 # I decided to implement a suggested change and consider the file
691 # completely downloaded if the file size differs less than 100 bytes from
692 # the one in the hard drive.
693 self.report_file_already_downloaded(filename)
694 self.try_rename(tmpfilename, filename)
bffbd5f0
PH
695 self._hook_progress({
696 'filename': filename,
697 'status': 'finished',
698 })
59ae15a5
PH
699 return True
700 else:
701 # The length does not match, we start the download over
702 self.report_unable_to_resume()
703 open_mode = 'wb'
704 break
705 # Retry
706 count += 1
707 if count <= retries:
708 self.report_retry(count, retries)
709
710 if count > retries:
711 self.trouble(u'ERROR: giving up after %s retries' % retries)
712 return False
713
714 data_len = data.info().get('Content-length', None)
715 if data_len is not None:
716 data_len = int(data_len) + resume_len
9e982f9e
JC
717 min_data_len = self.params.get("min_filesize", None)
718 max_data_len = self.params.get("max_filesize", None)
719 if min_data_len is not None and data_len < min_data_len:
720 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
721 return False
722 if max_data_len is not None and data_len > max_data_len:
723 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
724 return False
725
59ae15a5
PH
726 data_len_str = self.format_bytes(data_len)
727 byte_counter = 0 + resume_len
728 block_size = self.params.get('buffersize', 1024)
729 start = time.time()
730 while True:
731 # Download and write
732 before = time.time()
733 data_block = data.read(block_size)
734 after = time.time()
735 if len(data_block) == 0:
736 break
737 byte_counter += len(data_block)
738
739 # Open file just in time
740 if stream is None:
741 try:
742 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
743 assert stream is not None
744 filename = self.undo_temp_name(tmpfilename)
745 self.report_destination(filename)
746 except (OSError, IOError) as err:
747 self.trouble(u'ERROR: unable to open for writing: %s' % str(err))
748 return False
749 try:
750 stream.write(data_block)
751 except (IOError, OSError) as err:
752 self.trouble(u'\nERROR: unable to write data: %s' % str(err))
753 return False
754 if not self.params.get('noresizebuffer', False):
755 block_size = self.best_block_size(after - before, len(data_block))
756
757 # Progress message
758 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
759 if data_len is None:
760 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
761 else:
762 percent_str = self.calc_percent(byte_counter, data_len)
763 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
764 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
765
bffbd5f0
PH
766 self._hook_progress({
767 'downloaded_bytes': byte_counter,
768 'total_bytes': data_len,
769 'tmpfilename': tmpfilename,
770 'filename': filename,
771 'status': 'downloading',
772 })
773
59ae15a5
PH
774 # Apply rate limit
775 self.slow_down(start, byte_counter - resume_len)
776
777 if stream is None:
778 self.trouble(u'\nERROR: Did not get any data blocks')
779 return False
780 stream.close()
781 self.report_finish()
782 if data_len is not None and byte_counter != data_len:
783 raise ContentTooShortError(byte_counter, int(data_len))
784 self.try_rename(tmpfilename, filename)
785
786 # Update file modification time
787 if self.params.get('updatetime', True):
788 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
789
bffbd5f0
PH
790 self._hook_progress({
791 'downloaded_bytes': byte_counter,
792 'total_bytes': byte_counter,
793 'filename': filename,
794 'status': 'finished',
795 })
796
59ae15a5 797 return True
bffbd5f0
PH
798
799 def _hook_progress(self, status):
800 for ph in self._progress_hooks:
801 ph(status)
802
803 def add_progress_hook(self, ph):
804 """ ph gets called on download progress, with a dictionary with the entries
805 * filename: The final filename
806 * status: One of "downloading" and "finished"
807
808 It can also have some of the following entries:
809
810 * downloaded_bytes: Bytes on disks
811 * total_bytes: Total bytes, None if unknown
812 * tmpfilename: The filename we're currently writing to
813
814 Hooks are guaranteed to be called at least once (with status "finished")
815 if the download is successful.
816 """
817 self._progress_hooks.append(ph)