]> jfr.im git - yt-dlp.git/blob - youtube_dl/FileDownloader.py
More trouble calls changed in InfoExtractors.py
[yt-dlp.git] / youtube_dl / FileDownloader.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import math
7 import io
8 import os
9 import re
10 import socket
11 import subprocess
12 import sys
13 import time
14 import traceback
15
16 if os.name == 'nt':
17 import ctypes
18
19 from .utils import *
20
21
22 class FileDownloader(object):
23 """File Downloader class.
24
25 File downloader objects are the ones responsible of downloading the
26 actual video file and writing it to disk if the user has requested
27 it, among some other tasks. In most cases there should be one per
28 program. As, given a video URL, the downloader doesn't know how to
29 extract all the needed information, task that InfoExtractors do, it
30 has to pass the URL to one of them.
31
32 For this, file downloader objects have a method that allows
33 InfoExtractors to be registered in a given order. When it is passed
34 a URL, the file downloader handles it to the first InfoExtractor it
35 finds that reports being able to handle it. The InfoExtractor extracts
36 all the information about the video or videos the URL refers to, and
37 asks the FileDownloader to process the video information, possibly
38 downloading the video.
39
40 File downloaders accept a lot of parameters. In order not to saturate
41 the object constructor with arguments, it receives a dictionary of
42 options instead. These options are available through the params
43 attribute for the InfoExtractors to use. The FileDownloader also
44 registers itself as the downloader in charge for the InfoExtractors
45 that are added to it, so this is a "mutual registration".
46
47 Available options:
48
49 username: Username for authentication purposes.
50 password: Password for authentication purposes.
51 usenetrc: Use netrc for authentication instead.
52 quiet: Do not print messages to stdout.
53 forceurl: Force printing final URL.
54 forcetitle: Force printing title.
55 forcethumbnail: Force printing thumbnail URL.
56 forcedescription: Force printing description.
57 forcefilename: Force printing final filename.
58 simulate: Do not download the video files.
59 format: Video format code.
60 format_limit: Highest quality format to try.
61 outtmpl: Template for output names.
62 restrictfilenames: Do not allow "&" and spaces in file names
63 ignoreerrors: Do not stop on download errors.
64 ratelimit: Download speed limit, in bytes/sec.
65 nooverwrites: Prevent overwriting files.
66 retries: Number of times to retry for HTTP error 5xx
67 buffersize: Size of download buffer in bytes.
68 noresizebuffer: Do not automatically resize the download buffer.
69 continuedl: Try to continue downloads if possible.
70 noprogress: Do not print the progress bar.
71 playliststart: Playlist item to start at.
72 playlistend: Playlist item to end at.
73 matchtitle: Download only matching titles.
74 rejecttitle: Reject downloads for matching titles.
75 logtostderr: Log messages to stderr instead of stdout.
76 consoletitle: Display progress in console window's titlebar.
77 nopart: Do not use temporary .part files.
78 updatetime: Use the Last-modified header to set output file timestamps.
79 writedescription: Write the video description to a .description file
80 writeinfojson: Write the video description to a .info.json file
81 writesubtitles: Write the video subtitles to a .srt file
82 subtitleslang: Language of the subtitles to download
83 test: Download only first bytes to test the downloader.
84 keepvideo: Keep the video file after post-processing
85 min_filesize: Skip files smaller than this size
86 max_filesize: Skip files larger than this size
87 """
88
89 params = None
90 _ies = []
91 _pps = []
92 _download_retcode = None
93 _num_downloads = None
94 _screen_file = None
95
96 def __init__(self, params):
97 """Create a FileDownloader object with the given options."""
98 self._ies = []
99 self._pps = []
100 self._progress_hooks = []
101 self._download_retcode = 0
102 self._num_downloads = 0
103 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
104 self.params = params
105
106 if '%(stitle)s' in self.params['outtmpl']:
107 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
108
109 @staticmethod
110 def format_bytes(bytes):
111 if bytes is None:
112 return 'N/A'
113 if type(bytes) is str:
114 bytes = float(bytes)
115 if bytes == 0.0:
116 exponent = 0
117 else:
118 exponent = int(math.log(bytes, 1024.0))
119 suffix = 'bkMGTPEZY'[exponent]
120 converted = float(bytes) / float(1024 ** exponent)
121 return '%.2f%s' % (converted, suffix)
122
123 @staticmethod
124 def calc_percent(byte_counter, data_len):
125 if data_len is None:
126 return '---.-%'
127 return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0))
128
129 @staticmethod
130 def calc_eta(start, now, total, current):
131 if total is None:
132 return '--:--'
133 dif = now - start
134 if current == 0 or dif < 0.001: # One millisecond
135 return '--:--'
136 rate = float(current) / dif
137 eta = int((float(total) - float(current)) / rate)
138 (eta_mins, eta_secs) = divmod(eta, 60)
139 if eta_mins > 99:
140 return '--:--'
141 return '%02d:%02d' % (eta_mins, eta_secs)
142
143 @staticmethod
144 def calc_speed(start, now, bytes):
145 dif = now - start
146 if bytes == 0 or dif < 0.001: # One millisecond
147 return '%10s' % '---b/s'
148 return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif))
149
150 @staticmethod
151 def best_block_size(elapsed_time, bytes):
152 new_min = max(bytes / 2.0, 1.0)
153 new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
154 if elapsed_time < 0.001:
155 return int(new_max)
156 rate = bytes / elapsed_time
157 if rate > new_max:
158 return int(new_max)
159 if rate < new_min:
160 return int(new_min)
161 return int(rate)
162
163 @staticmethod
164 def parse_bytes(bytestr):
165 """Parse a string indicating a byte quantity into an integer."""
166 matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
167 if matchobj is None:
168 return None
169 number = float(matchobj.group(1))
170 multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
171 return int(round(number * multiplier))
172
173 def add_info_extractor(self, ie):
174 """Add an InfoExtractor object to the end of the list."""
175 self._ies.append(ie)
176 ie.set_downloader(self)
177
178 def add_post_processor(self, pp):
179 """Add a PostProcessor object to the end of the chain."""
180 self._pps.append(pp)
181 pp.set_downloader(self)
182
183 def to_screen(self, message, skip_eol=False):
184 """Print message to stdout if not in quiet mode."""
185 assert type(message) == type(u'')
186 if not self.params.get('quiet', False):
187 terminator = [u'\n', u''][skip_eol]
188 output = message + terminator
189 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
190 output = output.encode(preferredencoding(), 'ignore')
191 self._screen_file.write(output)
192 self._screen_file.flush()
193
194 def to_stderr(self, message):
195 """Print message to stderr."""
196 assert type(message) == type(u'')
197 output = message + u'\n'
198 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
199 output = output.encode(preferredencoding())
200 sys.stderr.write(output)
201
202 def to_cons_title(self, message):
203 """Set console/terminal window title to message."""
204 if not self.params.get('consoletitle', False):
205 return
206 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
207 # c_wchar_p() might not be necessary if `message` is
208 # already of type unicode()
209 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
210 elif 'TERM' in os.environ:
211 self.to_screen('\033]0;%s\007' % message, skip_eol=True)
212
213 def fixed_template(self):
214 """Checks if the output template is fixed."""
215 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
216
217 def trouble(self, message=None, tb=None):
218 """Determine action to take when a download problem appears.
219
220 Depending on if the downloader has been configured to ignore
221 download errors or not, this method may throw an exception or
222 not when errors are found, after printing the message.
223
224 tb, if given, is additional traceback information.
225 """
226 if message is not None:
227 self.to_stderr(message)
228 if self.params.get('verbose'):
229 if tb is None:
230 tb_data = traceback.format_list(traceback.extract_stack())
231 tb = u''.join(tb_data)
232 self.to_stderr(tb)
233 if not self.params.get('ignoreerrors', False):
234 raise DownloadError(message)
235 self._download_retcode = 1
236
237 def report_warning(self, message):
238 '''
239 Print the message to stderr, it will be prefixed with 'WARNING:'
240 If stderr is a tty file the 'WARNING:' will be colored
241 '''
242 if sys.stderr.isatty():
243 _msg_header=u'\033[0;33mWARNING:\033[0m'
244 else:
245 _msg_header=u'WARNING:'
246 warning_message=u'%s %s' % (_msg_header,message)
247 self.to_stderr(warning_message)
248
249 def report_error(self, message, tb=None):
250 '''
251 Do the same as trouble, but prefixes the message with 'ERROR:', colored
252 in red if stderr is a tty file.
253 '''
254 if sys.stderr.isatty():
255 _msg_header = u'\033[0;31mERROR:\033[0m'
256 else:
257 _msg_header = u'ERROR:'
258 error_message = u'%s %s' % (_msg_header, message)
259 self.trouble(error_message, tb)
260
261 def slow_down(self, start_time, byte_counter):
262 """Sleep if the download speed is over the rate limit."""
263 rate_limit = self.params.get('ratelimit', None)
264 if rate_limit is None or byte_counter == 0:
265 return
266 now = time.time()
267 elapsed = now - start_time
268 if elapsed <= 0.0:
269 return
270 speed = float(byte_counter) / elapsed
271 if speed > rate_limit:
272 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
273
274 def temp_name(self, filename):
275 """Returns a temporary filename for the given filename."""
276 if self.params.get('nopart', False) or filename == u'-' or \
277 (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
278 return filename
279 return filename + u'.part'
280
281 def undo_temp_name(self, filename):
282 if filename.endswith(u'.part'):
283 return filename[:-len(u'.part')]
284 return filename
285
286 def try_rename(self, old_filename, new_filename):
287 try:
288 if old_filename == new_filename:
289 return
290 os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
291 except (IOError, OSError) as err:
292 self.report_error(u'unable to rename file')
293
294 def try_utime(self, filename, last_modified_hdr):
295 """Try to set the last-modified time of the given file."""
296 if last_modified_hdr is None:
297 return
298 if not os.path.isfile(encodeFilename(filename)):
299 return
300 timestr = last_modified_hdr
301 if timestr is None:
302 return
303 filetime = timeconvert(timestr)
304 if filetime is None:
305 return filetime
306 try:
307 os.utime(filename, (time.time(), filetime))
308 except:
309 pass
310 return filetime
311
312 def report_writedescription(self, descfn):
313 """ Report that the description file is being written """
314 self.to_screen(u'[info] Writing video description to: ' + descfn)
315
316 def report_writesubtitles(self, srtfn):
317 """ Report that the subtitles file is being written """
318 self.to_screen(u'[info] Writing video subtitles to: ' + srtfn)
319
320 def report_writeinfojson(self, infofn):
321 """ Report that the metadata file has been written """
322 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
323
324 def report_destination(self, filename):
325 """Report destination filename."""
326 self.to_screen(u'[download] Destination: ' + filename)
327
328 def report_progress(self, percent_str, data_len_str, speed_str, eta_str):
329 """Report download progress."""
330 if self.params.get('noprogress', False):
331 return
332 if self.params.get('progress_with_newline', False):
333 self.to_screen(u'[download] %s of %s at %s ETA %s' %
334 (percent_str, data_len_str, speed_str, eta_str))
335 else:
336 self.to_screen(u'\r[download] %s of %s at %s ETA %s' %
337 (percent_str, data_len_str, speed_str, eta_str), skip_eol=True)
338 self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' %
339 (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip()))
340
341 def report_resuming_byte(self, resume_len):
342 """Report attempt to resume at given byte."""
343 self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
344
345 def report_retry(self, count, retries):
346 """Report retry in case of HTTP error 5xx"""
347 self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
348
349 def report_file_already_downloaded(self, file_name):
350 """Report file has already been fully downloaded."""
351 try:
352 self.to_screen(u'[download] %s has already been downloaded' % file_name)
353 except (UnicodeEncodeError) as err:
354 self.to_screen(u'[download] The file has already been downloaded')
355
356 def report_unable_to_resume(self):
357 """Report it was impossible to resume download."""
358 self.to_screen(u'[download] Unable to resume')
359
360 def report_finish(self):
361 """Report download finished."""
362 if self.params.get('noprogress', False):
363 self.to_screen(u'[download] Download completed')
364 else:
365 self.to_screen(u'')
366
367 def increment_downloads(self):
368 """Increment the ordinal that assigns a number to each file."""
369 self._num_downloads += 1
370
371 def prepare_filename(self, info_dict):
372 """Generate the output filename."""
373 try:
374 template_dict = dict(info_dict)
375
376 template_dict['epoch'] = int(time.time())
377 template_dict['autonumber'] = u'%05d' % self._num_downloads
378
379 sanitize = lambda k,v: sanitize_filename(
380 u'NA' if v is None else compat_str(v),
381 restricted=self.params.get('restrictfilenames'),
382 is_id=(k==u'id'))
383 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
384
385 filename = self.params['outtmpl'] % template_dict
386 return filename
387 except (ValueError, KeyError) as err:
388 self.report_error(u'invalid system charset or erroneous output template')
389 return None
390
391 def _match_entry(self, info_dict):
392 """ Returns None iff the file should be downloaded """
393
394 title = info_dict['title']
395 matchtitle = self.params.get('matchtitle', False)
396 if matchtitle:
397 if not re.search(matchtitle, title, re.IGNORECASE):
398 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
399 rejecttitle = self.params.get('rejecttitle', False)
400 if rejecttitle:
401 if re.search(rejecttitle, title, re.IGNORECASE):
402 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
403 return None
404
405 def process_info(self, info_dict):
406 """Process a single dictionary returned by an InfoExtractor."""
407
408 # Keep for backwards compatibility
409 info_dict['stitle'] = info_dict['title']
410
411 if not 'format' in info_dict:
412 info_dict['format'] = info_dict['ext']
413
414 reason = self._match_entry(info_dict)
415 if reason is not None:
416 self.to_screen(u'[download] ' + reason)
417 return
418
419 max_downloads = self.params.get('max_downloads')
420 if max_downloads is not None:
421 if self._num_downloads > int(max_downloads):
422 raise MaxDownloadsReached()
423
424 filename = self.prepare_filename(info_dict)
425
426 # Forced printings
427 if self.params.get('forcetitle', False):
428 compat_print(info_dict['title'])
429 if self.params.get('forceurl', False):
430 compat_print(info_dict['url'])
431 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
432 compat_print(info_dict['thumbnail'])
433 if self.params.get('forcedescription', False) and 'description' in info_dict:
434 compat_print(info_dict['description'])
435 if self.params.get('forcefilename', False) and filename is not None:
436 compat_print(filename)
437 if self.params.get('forceformat', False):
438 compat_print(info_dict['format'])
439
440 # Do nothing else if in simulate mode
441 if self.params.get('simulate', False):
442 return
443
444 if filename is None:
445 return
446
447 try:
448 dn = os.path.dirname(encodeFilename(filename))
449 if dn != '' and not os.path.exists(dn): # dn is already encoded
450 os.makedirs(dn)
451 except (OSError, IOError) as err:
452 self.report_error(u'unable to create directory ' + compat_str(err))
453 return
454
455 if self.params.get('writedescription', False):
456 try:
457 descfn = filename + u'.description'
458 self.report_writedescription(descfn)
459 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
460 descfile.write(info_dict['description'])
461 except (OSError, IOError):
462 self.report_error(u'Cannot write description file ' + descfn)
463 return
464
465 if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']:
466 # subtitles download errors are already managed as troubles in relevant IE
467 # that way it will silently go on when used with unsupporting IE
468 try:
469 srtfn = filename.rsplit('.', 1)[0] + u'.srt'
470 self.report_writesubtitles(srtfn)
471 with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile:
472 srtfile.write(info_dict['subtitles'])
473 except (OSError, IOError):
474 self.report_error(u'Cannot write subtitles file ' + descfn)
475 return
476
477 if self.params.get('writeinfojson', False):
478 infofn = filename + u'.info.json'
479 self.report_writeinfojson(infofn)
480 try:
481 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
482 write_json_file(json_info_dict, encodeFilename(infofn))
483 except (OSError, IOError):
484 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
485 return
486
487 if not self.params.get('skip_download', False):
488 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
489 success = True
490 else:
491 try:
492 success = self._do_download(filename, info_dict)
493 except (OSError, IOError) as err:
494 raise UnavailableVideoError()
495 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
496 self.report_error(u'unable to download video data: %s' % str(err))
497 return
498 except (ContentTooShortError, ) as err:
499 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
500 return
501
502 if success:
503 try:
504 self.post_process(filename, info_dict)
505 except (PostProcessingError) as err:
506 self.report_error(u'postprocessing: %s' % str(err))
507 return
508
509 def download(self, url_list):
510 """Download a given list of URLs."""
511 if len(url_list) > 1 and self.fixed_template():
512 raise SameFileError(self.params['outtmpl'])
513
514 for url in url_list:
515 suitable_found = False
516 for ie in self._ies:
517 # Go to next InfoExtractor if not suitable
518 if not ie.suitable(url):
519 continue
520
521 # Warn if the _WORKING attribute is False
522 if not ie.working():
523 self.report_warning(u'the program functionality for this site has been marked as broken, '
524 u'and will probably not work. If you want to go on, use the -i option.')
525
526 # Suitable InfoExtractor found
527 suitable_found = True
528
529 # Extract information from URL and process it
530 try:
531 videos = ie.extract(url)
532 except ExtractorError as de: # An error we somewhat expected
533 self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback())
534 break
535 except Exception as e:
536 if self.params.get('ignoreerrors', False):
537 self.report_error(u'' + compat_str(e), tb=compat_str(traceback.format_exc()))
538 break
539 else:
540 raise
541
542 if len(videos or []) > 1 and self.fixed_template():
543 raise SameFileError(self.params['outtmpl'])
544
545 for video in videos or []:
546 video['extractor'] = ie.IE_NAME
547 try:
548 self.increment_downloads()
549 self.process_info(video)
550 except UnavailableVideoError:
551 self.to_stderr(u"\n")
552 self.report_error(u'unable to download video')
553
554 # Suitable InfoExtractor had been found; go to next URL
555 break
556
557 if not suitable_found:
558 self.report_error(u'no suitable InfoExtractor: %s' % url)
559
560 return self._download_retcode
561
562 def post_process(self, filename, ie_info):
563 """Run all the postprocessors on the given file."""
564 info = dict(ie_info)
565 info['filepath'] = filename
566 keep_video = None
567 for pp in self._pps:
568 try:
569 keep_video_wish,new_info = pp.run(info)
570 if keep_video_wish is not None:
571 if keep_video_wish:
572 keep_video = keep_video_wish
573 elif keep_video is None:
574 # No clear decision yet, let IE decide
575 keep_video = keep_video_wish
576 except PostProcessingError as e:
577 self.to_stderr(u'ERROR: ' + e.msg)
578 if keep_video is False and not self.params.get('keepvideo', False):
579 try:
580 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
581 os.remove(encodeFilename(filename))
582 except (IOError, OSError):
583 self.report_warning(u'Unable to remove downloaded video file')
584
585 def _download_with_rtmpdump(self, filename, url, player_url, page_url):
586 self.report_destination(filename)
587 tmpfilename = self.temp_name(filename)
588
589 # Check for rtmpdump first
590 try:
591 subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT)
592 except (OSError, IOError):
593 self.report_error(u'RTMP download detected but "rtmpdump" could not be run')
594 return False
595
596 # Download using rtmpdump. rtmpdump returns exit code 2 when
597 # the connection was interrumpted and resuming appears to be
598 # possible. This is part of rtmpdump's normal usage, AFAIK.
599 basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename]
600 if player_url is not None:
601 basic_args += ['-W', player_url]
602 if page_url is not None:
603 basic_args += ['--pageUrl', page_url]
604 args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)]
605 if self.params.get('verbose', False):
606 try:
607 import pipes
608 shell_quote = lambda args: ' '.join(map(pipes.quote, args))
609 except ImportError:
610 shell_quote = repr
611 self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args))
612 retval = subprocess.call(args)
613 while retval == 2 or retval == 1:
614 prevsize = os.path.getsize(encodeFilename(tmpfilename))
615 self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True)
616 time.sleep(5.0) # This seems to be needed
617 retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1])
618 cursize = os.path.getsize(encodeFilename(tmpfilename))
619 if prevsize == cursize and retval == 1:
620 break
621 # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those
622 if prevsize == cursize and retval == 2 and cursize > 1024:
623 self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.')
624 retval = 0
625 break
626 if retval == 0:
627 fsize = os.path.getsize(encodeFilename(tmpfilename))
628 self.to_screen(u'\r[rtmpdump] %s bytes' % fsize)
629 self.try_rename(tmpfilename, filename)
630 self._hook_progress({
631 'downloaded_bytes': fsize,
632 'total_bytes': fsize,
633 'filename': filename,
634 'status': 'finished',
635 })
636 return True
637 else:
638 self.to_stderr(u"\n")
639 self.report_error(u'rtmpdump exited with code %d' % retval)
640 return False
641
642 def _do_download(self, filename, info_dict):
643 url = info_dict['url']
644
645 # Check file already present
646 if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
647 self.report_file_already_downloaded(filename)
648 self._hook_progress({
649 'filename': filename,
650 'status': 'finished',
651 })
652 return True
653
654 # Attempt to download using rtmpdump
655 if url.startswith('rtmp'):
656 return self._download_with_rtmpdump(filename, url,
657 info_dict.get('player_url', None),
658 info_dict.get('page_url', None))
659
660 tmpfilename = self.temp_name(filename)
661 stream = None
662
663 # Do not include the Accept-Encoding header
664 headers = {'Youtubedl-no-compression': 'True'}
665 if 'user_agent' in info_dict:
666 headers['Youtubedl-user-agent'] = info_dict['user_agent']
667 basic_request = compat_urllib_request.Request(url, None, headers)
668 request = compat_urllib_request.Request(url, None, headers)
669
670 if self.params.get('test', False):
671 request.add_header('Range','bytes=0-10240')
672
673 # Establish possible resume length
674 if os.path.isfile(encodeFilename(tmpfilename)):
675 resume_len = os.path.getsize(encodeFilename(tmpfilename))
676 else:
677 resume_len = 0
678
679 open_mode = 'wb'
680 if resume_len != 0:
681 if self.params.get('continuedl', False):
682 self.report_resuming_byte(resume_len)
683 request.add_header('Range','bytes=%d-' % resume_len)
684 open_mode = 'ab'
685 else:
686 resume_len = 0
687
688 count = 0
689 retries = self.params.get('retries', 0)
690 while count <= retries:
691 # Establish connection
692 try:
693 if count == 0 and 'urlhandle' in info_dict:
694 data = info_dict['urlhandle']
695 data = compat_urllib_request.urlopen(request)
696 break
697 except (compat_urllib_error.HTTPError, ) as err:
698 if (err.code < 500 or err.code >= 600) and err.code != 416:
699 # Unexpected HTTP error
700 raise
701 elif err.code == 416:
702 # Unable to resume (requested range not satisfiable)
703 try:
704 # Open the connection again without the range header
705 data = compat_urllib_request.urlopen(basic_request)
706 content_length = data.info()['Content-Length']
707 except (compat_urllib_error.HTTPError, ) as err:
708 if err.code < 500 or err.code >= 600:
709 raise
710 else:
711 # Examine the reported length
712 if (content_length is not None and
713 (resume_len - 100 < int(content_length) < resume_len + 100)):
714 # The file had already been fully downloaded.
715 # Explanation to the above condition: in issue #175 it was revealed that
716 # YouTube sometimes adds or removes a few bytes from the end of the file,
717 # changing the file size slightly and causing problems for some users. So
718 # I decided to implement a suggested change and consider the file
719 # completely downloaded if the file size differs less than 100 bytes from
720 # the one in the hard drive.
721 self.report_file_already_downloaded(filename)
722 self.try_rename(tmpfilename, filename)
723 self._hook_progress({
724 'filename': filename,
725 'status': 'finished',
726 })
727 return True
728 else:
729 # The length does not match, we start the download over
730 self.report_unable_to_resume()
731 open_mode = 'wb'
732 break
733 # Retry
734 count += 1
735 if count <= retries:
736 self.report_retry(count, retries)
737
738 if count > retries:
739 self.report_error(u'giving up after %s retries' % retries)
740 return False
741
742 data_len = data.info().get('Content-length', None)
743 if data_len is not None:
744 data_len = int(data_len) + resume_len
745 min_data_len = self.params.get("min_filesize", None)
746 max_data_len = self.params.get("max_filesize", None)
747 if min_data_len is not None and data_len < min_data_len:
748 self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len))
749 return False
750 if max_data_len is not None and data_len > max_data_len:
751 self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len))
752 return False
753
754 data_len_str = self.format_bytes(data_len)
755 byte_counter = 0 + resume_len
756 block_size = self.params.get('buffersize', 1024)
757 start = time.time()
758 while True:
759 # Download and write
760 before = time.time()
761 data_block = data.read(block_size)
762 after = time.time()
763 if len(data_block) == 0:
764 break
765 byte_counter += len(data_block)
766
767 # Open file just in time
768 if stream is None:
769 try:
770 (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode)
771 assert stream is not None
772 filename = self.undo_temp_name(tmpfilename)
773 self.report_destination(filename)
774 except (OSError, IOError) as err:
775 self.report_error(u'unable to open for writing: %s' % str(err))
776 return False
777 try:
778 stream.write(data_block)
779 except (IOError, OSError) as err:
780 self.to_stderr(u"\n")
781 self.report_error(u'unable to write data: %s' % str(err))
782 return False
783 if not self.params.get('noresizebuffer', False):
784 block_size = self.best_block_size(after - before, len(data_block))
785
786 # Progress message
787 speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len)
788 if data_len is None:
789 self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA')
790 else:
791 percent_str = self.calc_percent(byte_counter, data_len)
792 eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len)
793 self.report_progress(percent_str, data_len_str, speed_str, eta_str)
794
795 self._hook_progress({
796 'downloaded_bytes': byte_counter,
797 'total_bytes': data_len,
798 'tmpfilename': tmpfilename,
799 'filename': filename,
800 'status': 'downloading',
801 })
802
803 # Apply rate limit
804 self.slow_down(start, byte_counter - resume_len)
805
806 if stream is None:
807 self.to_stderr(u"\n")
808 self.report_error(u'Did not get any data blocks')
809 return False
810 stream.close()
811 self.report_finish()
812 if data_len is not None and byte_counter != data_len:
813 raise ContentTooShortError(byte_counter, int(data_len))
814 self.try_rename(tmpfilename, filename)
815
816 # Update file modification time
817 if self.params.get('updatetime', True):
818 info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None))
819
820 self._hook_progress({
821 'downloaded_bytes': byte_counter,
822 'total_bytes': byte_counter,
823 'filename': filename,
824 'status': 'finished',
825 })
826
827 return True
828
829 def _hook_progress(self, status):
830 for ph in self._progress_hooks:
831 ph(status)
832
833 def add_progress_hook(self, ph):
834 """ ph gets called on download progress, with a dictionary with the entries
835 * filename: The final filename
836 * status: One of "downloading" and "finished"
837
838 It can also have some of the following entries:
839
840 * downloaded_bytes: Bytes on disks
841 * total_bytes: Total bytes, None if unknown
842 * tmpfilename: The filename we're currently writing to
843
844 Hooks are guaranteed to be called at least once (with status "finished")
845 if the download is successful.
846 """
847 self._progress_hooks.append(ph)