]>
Commit | Line | Data |
---|---|---|
d77c3dfd | 1 | import math |
ce4be3a9 | 2 | import io |
d77c3dfd FV |
3 | import os |
4 | import re | |
11d9224e | 5 | import shutil |
d77c3dfd FV |
6 | import socket |
7 | import subprocess | |
8 | import sys | |
9 | import time | |
59ce2019 | 10 | import traceback |
d77c3dfd FV |
11 | |
12 | if os.name == 'nt': | |
59ae15a5 | 13 | import ctypes |
3eec021a | 14 | |
9e8056d5 | 15 | from .utils import * |
6de8f1af | 16 | from .InfoExtractors import get_info_extractor |
d77c3dfd FV |
17 | |
18 | ||
19 | class FileDownloader(object): | |
59ae15a5 PH |
20 | """File Downloader class. |
21 | ||
22 | File downloader objects are the ones responsible of downloading the | |
23 | actual video file and writing it to disk if the user has requested | |
24 | it, among some other tasks. In most cases there should be one per | |
25 | program. As, given a video URL, the downloader doesn't know how to | |
26 | extract all the needed information, task that InfoExtractors do, it | |
27 | has to pass the URL to one of them. | |
28 | ||
29 | For this, file downloader objects have a method that allows | |
30 | InfoExtractors to be registered in a given order. When it is passed | |
31 | a URL, the file downloader handles it to the first InfoExtractor it | |
32 | finds that reports being able to handle it. The InfoExtractor extracts | |
33 | all the information about the video or videos the URL refers to, and | |
34 | asks the FileDownloader to process the video information, possibly | |
35 | downloading the video. | |
36 | ||
37 | File downloaders accept a lot of parameters. In order not to saturate | |
38 | the object constructor with arguments, it receives a dictionary of | |
39 | options instead. These options are available through the params | |
40 | attribute for the InfoExtractors to use. The FileDownloader also | |
41 | registers itself as the downloader in charge for the InfoExtractors | |
42 | that are added to it, so this is a "mutual registration". | |
43 | ||
44 | Available options: | |
45 | ||
46 | username: Username for authentication purposes. | |
47 | password: Password for authentication purposes. | |
48 | usenetrc: Use netrc for authentication instead. | |
49 | quiet: Do not print messages to stdout. | |
50 | forceurl: Force printing final URL. | |
51 | forcetitle: Force printing title. | |
1a2adf3f | 52 | forceid: Force printing ID. |
59ae15a5 PH |
53 | forcethumbnail: Force printing thumbnail URL. |
54 | forcedescription: Force printing description. | |
55 | forcefilename: Force printing final filename. | |
56 | simulate: Do not download the video files. | |
57 | format: Video format code. | |
58 | format_limit: Highest quality format to try. | |
59 | outtmpl: Template for output names. | |
60 | restrictfilenames: Do not allow "&" and spaces in file names | |
61 | ignoreerrors: Do not stop on download errors. | |
62 | ratelimit: Download speed limit, in bytes/sec. | |
63 | nooverwrites: Prevent overwriting files. | |
64 | retries: Number of times to retry for HTTP error 5xx | |
65 | buffersize: Size of download buffer in bytes. | |
66 | noresizebuffer: Do not automatically resize the download buffer. | |
67 | continuedl: Try to continue downloads if possible. | |
68 | noprogress: Do not print the progress bar. | |
69 | playliststart: Playlist item to start at. | |
70 | playlistend: Playlist item to end at. | |
71 | matchtitle: Download only matching titles. | |
72 | rejecttitle: Reject downloads for matching titles. | |
73 | logtostderr: Log messages to stderr instead of stdout. | |
74 | consoletitle: Display progress in console window's titlebar. | |
75 | nopart: Do not use temporary .part files. | |
76 | updatetime: Use the Last-modified header to set output file timestamps. | |
77 | writedescription: Write the video description to a .description file | |
78 | writeinfojson: Write the video description to a .info.json file | |
11d9224e | 79 | writethumbnail: Write the thumbnail image to a file |
9e62bc44 | 80 | writesubtitles: Write the video subtitles to a file |
ae608b80 | 81 | allsubtitles: Downloads all the subtitles of the video |
2a4093ea | 82 | listsubtitles: Lists all available subtitles for the video |
9e62bc44 | 83 | subtitlesformat: Subtitle format [sbv/srt] (default=srt) |
59ae15a5 | 84 | subtitleslang: Language of the subtitles to download |
37c8fd48 | 85 | test: Download only first bytes to test the downloader. |
7851b379 | 86 | keepvideo: Keep the video file after post-processing |
9e982f9e JC |
87 | min_filesize: Skip files smaller than this size |
88 | max_filesize: Skip files larger than this size | |
bd558525 | 89 | daterange: A DateRange object, download only if the upload_date is in the range. |
1bd96c3a | 90 | skip_download: Skip the actual download of the video file |
59ae15a5 PH |
91 | """ |
92 | ||
93 | params = None | |
94 | _ies = [] | |
95 | _pps = [] | |
96 | _download_retcode = None | |
97 | _num_downloads = None | |
98 | _screen_file = None | |
99 | ||
100 | def __init__(self, params): | |
101 | """Create a FileDownloader object with the given options.""" | |
102 | self._ies = [] | |
103 | self._pps = [] | |
bffbd5f0 | 104 | self._progress_hooks = [] |
59ae15a5 PH |
105 | self._download_retcode = 0 |
106 | self._num_downloads = 0 | |
107 | self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | |
108 | self.params = params | |
109 | ||
110 | if '%(stitle)s' in self.params['outtmpl']: | |
8207626b | 111 | self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') |
59ae15a5 PH |
112 | |
113 | @staticmethod | |
114 | def format_bytes(bytes): | |
115 | if bytes is None: | |
116 | return 'N/A' | |
117 | if type(bytes) is str: | |
118 | bytes = float(bytes) | |
119 | if bytes == 0.0: | |
120 | exponent = 0 | |
121 | else: | |
122 | exponent = int(math.log(bytes, 1024.0)) | |
b0936ef4 | 123 | suffix = ['B','KiB','MiB','GiB','TiB','PiB','EiB','ZiB','YiB'][exponent] |
59ae15a5 PH |
124 | converted = float(bytes) / float(1024 ** exponent) |
125 | return '%.2f%s' % (converted, suffix) | |
126 | ||
127 | @staticmethod | |
128 | def calc_percent(byte_counter, data_len): | |
129 | if data_len is None: | |
130 | return '---.-%' | |
131 | return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) | |
132 | ||
133 | @staticmethod | |
134 | def calc_eta(start, now, total, current): | |
135 | if total is None: | |
136 | return '--:--' | |
137 | dif = now - start | |
138 | if current == 0 or dif < 0.001: # One millisecond | |
139 | return '--:--' | |
140 | rate = float(current) / dif | |
141 | eta = int((float(total) - float(current)) / rate) | |
142 | (eta_mins, eta_secs) = divmod(eta, 60) | |
143 | if eta_mins > 99: | |
144 | return '--:--' | |
145 | return '%02d:%02d' % (eta_mins, eta_secs) | |
146 | ||
147 | @staticmethod | |
148 | def calc_speed(start, now, bytes): | |
149 | dif = now - start | |
150 | if bytes == 0 or dif < 0.001: # One millisecond | |
151 | return '%10s' % '---b/s' | |
152 | return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) | |
153 | ||
154 | @staticmethod | |
155 | def best_block_size(elapsed_time, bytes): | |
156 | new_min = max(bytes / 2.0, 1.0) | |
157 | new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB | |
158 | if elapsed_time < 0.001: | |
159 | return int(new_max) | |
160 | rate = bytes / elapsed_time | |
161 | if rate > new_max: | |
162 | return int(new_max) | |
163 | if rate < new_min: | |
164 | return int(new_min) | |
165 | return int(rate) | |
166 | ||
167 | @staticmethod | |
168 | def parse_bytes(bytestr): | |
169 | """Parse a string indicating a byte quantity into an integer.""" | |
170 | matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) | |
171 | if matchobj is None: | |
172 | return None | |
173 | number = float(matchobj.group(1)) | |
174 | multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | |
175 | return int(round(number * multiplier)) | |
176 | ||
177 | def add_info_extractor(self, ie): | |
178 | """Add an InfoExtractor object to the end of the list.""" | |
179 | self._ies.append(ie) | |
180 | ie.set_downloader(self) | |
181 | ||
182 | def add_post_processor(self, pp): | |
183 | """Add a PostProcessor object to the end of the chain.""" | |
184 | self._pps.append(pp) | |
185 | pp.set_downloader(self) | |
186 | ||
187 | def to_screen(self, message, skip_eol=False): | |
188 | """Print message to stdout if not in quiet mode.""" | |
189 | assert type(message) == type(u'') | |
190 | if not self.params.get('quiet', False): | |
191 | terminator = [u'\n', u''][skip_eol] | |
192 | output = message + terminator | |
193 | if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | |
194 | output = output.encode(preferredencoding(), 'ignore') | |
195 | self._screen_file.write(output) | |
196 | self._screen_file.flush() | |
197 | ||
198 | def to_stderr(self, message): | |
199 | """Print message to stderr.""" | |
200 | assert type(message) == type(u'') | |
201 | output = message + u'\n' | |
202 | if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | |
203 | output = output.encode(preferredencoding()) | |
204 | sys.stderr.write(output) | |
205 | ||
206 | def to_cons_title(self, message): | |
207 | """Set console/terminal window title to message.""" | |
208 | if not self.params.get('consoletitle', False): | |
209 | return | |
210 | if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): | |
211 | # c_wchar_p() might not be necessary if `message` is | |
212 | # already of type unicode() | |
213 | ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) | |
214 | elif 'TERM' in os.environ: | |
906417c7 | 215 | self.to_screen('\033]0;%s\007' % message, skip_eol=True) |
59ae15a5 PH |
216 | |
217 | def fixed_template(self): | |
218 | """Checks if the output template is fixed.""" | |
219 | return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) | |
220 | ||
1c256f70 | 221 | def trouble(self, message=None, tb=None): |
59ae15a5 PH |
222 | """Determine action to take when a download problem appears. |
223 | ||
224 | Depending on if the downloader has been configured to ignore | |
225 | download errors or not, this method may throw an exception or | |
226 | not when errors are found, after printing the message. | |
01951dda PH |
227 | |
228 | tb, if given, is additional traceback information. | |
59ae15a5 PH |
229 | """ |
230 | if message is not None: | |
231 | self.to_stderr(message) | |
59ce2019 | 232 | if self.params.get('verbose'): |
1c256f70 | 233 | if tb is None: |
8cc83b8d FV |
234 | if sys.exc_info()[0]: # if .trouble has been called from an except block |
235 | tb = u'' | |
236 | if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: | |
237 | tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info)) | |
238 | tb += compat_str(traceback.format_exc()) | |
239 | else: | |
240 | tb_data = traceback.format_list(traceback.extract_stack()) | |
241 | tb = u''.join(tb_data) | |
1c256f70 | 242 | self.to_stderr(tb) |
59ae15a5 | 243 | if not self.params.get('ignoreerrors', False): |
8cc83b8d FV |
244 | if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]: |
245 | exc_info = sys.exc_info()[1].exc_info | |
246 | else: | |
247 | exc_info = sys.exc_info() | |
248 | raise DownloadError(message, exc_info) | |
59ae15a5 PH |
249 | self._download_retcode = 1 |
250 | ||
8207626b JMF |
251 | def report_warning(self, message): |
252 | ''' | |
253 | Print the message to stderr, it will be prefixed with 'WARNING:' | |
254 | If stderr is a tty file the 'WARNING:' will be colored | |
255 | ''' | |
9edb0916 | 256 | if sys.stderr.isatty() and os.name != 'nt': |
8207626b JMF |
257 | _msg_header=u'\033[0;33mWARNING:\033[0m' |
258 | else: | |
259 | _msg_header=u'WARNING:' | |
260 | warning_message=u'%s %s' % (_msg_header,message) | |
261 | self.to_stderr(warning_message) | |
262 | ||
4e1582f3 JMF |
263 | def report_error(self, message, tb=None): |
264 | ''' | |
265 | Do the same as trouble, but prefixes the message with 'ERROR:', colored | |
266 | in red if stderr is a tty file. | |
267 | ''' | |
9edb0916 | 268 | if sys.stderr.isatty() and os.name != 'nt': |
4e1582f3 JMF |
269 | _msg_header = u'\033[0;31mERROR:\033[0m' |
270 | else: | |
271 | _msg_header = u'ERROR:' | |
272 | error_message = u'%s %s' % (_msg_header, message) | |
273 | self.trouble(error_message, tb) | |
274 | ||
59ae15a5 PH |
275 | def slow_down(self, start_time, byte_counter): |
276 | """Sleep if the download speed is over the rate limit.""" | |
277 | rate_limit = self.params.get('ratelimit', None) | |
278 | if rate_limit is None or byte_counter == 0: | |
279 | return | |
280 | now = time.time() | |
281 | elapsed = now - start_time | |
282 | if elapsed <= 0.0: | |
283 | return | |
284 | speed = float(byte_counter) / elapsed | |
285 | if speed > rate_limit: | |
286 | time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) | |
287 | ||
288 | def temp_name(self, filename): | |
289 | """Returns a temporary filename for the given filename.""" | |
290 | if self.params.get('nopart', False) or filename == u'-' or \ | |
291 | (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): | |
292 | return filename | |
293 | return filename + u'.part' | |
294 | ||
295 | def undo_temp_name(self, filename): | |
296 | if filename.endswith(u'.part'): | |
297 | return filename[:-len(u'.part')] | |
298 | return filename | |
299 | ||
300 | def try_rename(self, old_filename, new_filename): | |
301 | try: | |
302 | if old_filename == new_filename: | |
303 | return | |
304 | os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | |
305 | except (IOError, OSError) as err: | |
6622d22c | 306 | self.report_error(u'unable to rename file') |
59ae15a5 PH |
307 | |
308 | def try_utime(self, filename, last_modified_hdr): | |
309 | """Try to set the last-modified time of the given file.""" | |
310 | if last_modified_hdr is None: | |
311 | return | |
312 | if not os.path.isfile(encodeFilename(filename)): | |
313 | return | |
314 | timestr = last_modified_hdr | |
315 | if timestr is None: | |
316 | return | |
317 | filetime = timeconvert(timestr) | |
318 | if filetime is None: | |
319 | return filetime | |
bb474376 PH |
320 | # Ignore obviously invalid dates |
321 | if filetime == 0: | |
322 | return | |
59ae15a5 PH |
323 | try: |
324 | os.utime(filename, (time.time(), filetime)) | |
325 | except: | |
326 | pass | |
327 | return filetime | |
328 | ||
329 | def report_writedescription(self, descfn): | |
330 | """ Report that the description file is being written """ | |
331 | self.to_screen(u'[info] Writing video description to: ' + descfn) | |
332 | ||
553d0974 | 333 | def report_writesubtitles(self, sub_filename): |
59ae15a5 | 334 | """ Report that the subtitles file is being written """ |
553d0974 | 335 | self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename) |
59ae15a5 PH |
336 | |
337 | def report_writeinfojson(self, infofn): | |
338 | """ Report that the metadata file has been written """ | |
339 | self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | |
340 | ||
341 | def report_destination(self, filename): | |
342 | """Report destination filename.""" | |
343 | self.to_screen(u'[download] Destination: ' + filename) | |
344 | ||
345 | def report_progress(self, percent_str, data_len_str, speed_str, eta_str): | |
346 | """Report download progress.""" | |
347 | if self.params.get('noprogress', False): | |
348 | return | |
4ae9e558 | 349 | clear_line = (u'\x1b[K' if sys.stderr.isatty() and os.name != 'nt' else u'') |
5717d91a | 350 | if self.params.get('progress_with_newline', False): |
1528d664 | 351 | self.to_screen(u'[download] %s of %s at %s ETA %s' % |
7311fef8 | 352 | (percent_str, data_len_str, speed_str, eta_str)) |
5717d91a | 353 | else: |
4ae9e558 PH |
354 | self.to_screen(u'\r%s[download] %s of %s at %s ETA %s' % |
355 | (clear_line, percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | |
59ae15a5 PH |
356 | self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % |
357 | (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) | |
358 | ||
359 | def report_resuming_byte(self, resume_len): | |
360 | """Report attempt to resume at given byte.""" | |
361 | self.to_screen(u'[download] Resuming download at byte %s' % resume_len) | |
362 | ||
363 | def report_retry(self, count, retries): | |
364 | """Report retry in case of HTTP error 5xx""" | |
365 | self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) | |
366 | ||
367 | def report_file_already_downloaded(self, file_name): | |
368 | """Report file has already been fully downloaded.""" | |
369 | try: | |
370 | self.to_screen(u'[download] %s has already been downloaded' % file_name) | |
371 | except (UnicodeEncodeError) as err: | |
372 | self.to_screen(u'[download] The file has already been downloaded') | |
373 | ||
374 | def report_unable_to_resume(self): | |
375 | """Report it was impossible to resume download.""" | |
376 | self.to_screen(u'[download] Unable to resume') | |
377 | ||
378 | def report_finish(self): | |
379 | """Report download finished.""" | |
380 | if self.params.get('noprogress', False): | |
381 | self.to_screen(u'[download] Download completed') | |
382 | else: | |
383 | self.to_screen(u'') | |
384 | ||
385 | def increment_downloads(self): | |
386 | """Increment the ordinal that assigns a number to each file.""" | |
387 | self._num_downloads += 1 | |
388 | ||
389 | def prepare_filename(self, info_dict): | |
390 | """Generate the output filename.""" | |
391 | try: | |
392 | template_dict = dict(info_dict) | |
393 | ||
394 | template_dict['epoch'] = int(time.time()) | |
e0fee250 PH |
395 | autonumber_size = self.params.get('autonumber_size') |
396 | if autonumber_size is None: | |
397 | autonumber_size = 5 | |
213c31ae SK |
398 | autonumber_templ = u'%0' + str(autonumber_size) + u'd' |
399 | template_dict['autonumber'] = autonumber_templ % self._num_downloads | |
d281274b JMF |
400 | if template_dict['playlist_index'] is not None: |
401 | template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index'] | |
59ae15a5 | 402 | |
796173d0 PH |
403 | sanitize = lambda k,v: sanitize_filename( |
404 | u'NA' if v is None else compat_str(v), | |
405 | restricted=self.params.get('restrictfilenames'), | |
406 | is_id=(k==u'id')) | |
407 | template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) | |
59ae15a5 PH |
408 | |
409 | filename = self.params['outtmpl'] % template_dict | |
410 | return filename | |
e5edd51d | 411 | except KeyError as err: |
613bf669 | 412 | self.report_error(u'Erroneous output template') |
e5edd51d PH |
413 | return None |
414 | except ValueError as err: | |
613bf669 | 415 | self.report_error(u'Insufficient system charset ' + repr(preferredencoding())) |
59ae15a5 PH |
416 | return None |
417 | ||
418 | def _match_entry(self, info_dict): | |
419 | """ Returns None iff the file should be downloaded """ | |
420 | ||
421 | title = info_dict['title'] | |
422 | matchtitle = self.params.get('matchtitle', False) | |
423 | if matchtitle: | |
59ae15a5 PH |
424 | if not re.search(matchtitle, title, re.IGNORECASE): |
425 | return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | |
426 | rejecttitle = self.params.get('rejecttitle', False) | |
427 | if rejecttitle: | |
59ae15a5 PH |
428 | if re.search(rejecttitle, title, re.IGNORECASE): |
429 | return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | |
bd558525 JMF |
430 | date = info_dict.get('upload_date', None) |
431 | if date is not None: | |
432 | dateRange = self.params.get('daterange', DateRange()) | |
433 | if date not in dateRange: | |
434 | return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange) | |
59ae15a5 | 435 | return None |
df8db1aa | 436 | |
7bdb17d4 | 437 | def extract_info(self, url, download=True, ie_key=None, extra_info={}): |
df8db1aa JMF |
438 | ''' |
439 | Returns a list with a dictionary for each video we find. | |
146c12a2 | 440 | If 'download', also downloads the videos. |
7bdb17d4 | 441 | extra_info is a dict containing the extra values to add to each result |
df8db1aa | 442 | ''' |
6de8f1af | 443 | |
b338f1b1 PH |
444 | if ie_key: |
445 | ie = get_info_extractor(ie_key)() | |
446 | ie.set_downloader(self) | |
447 | ies = [ie] | |
448 | else: | |
449 | ies = self._ies | |
6de8f1af JMF |
450 | |
451 | for ie in ies: | |
df8db1aa JMF |
452 | if not ie.suitable(url): |
453 | continue | |
454 | ||
df8db1aa | 455 | if not ie.working(): |
b338f1b1 PH |
456 | self.report_warning(u'The program functionality for this site has been marked as broken, ' |
457 | u'and will probably not work.') | |
df8db1aa | 458 | |
df8db1aa | 459 | try: |
b338f1b1 PH |
460 | ie_result = ie.extract(url) |
461 | if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here) | |
c681a039 | 462 | break |
b338f1b1 PH |
463 | if isinstance(ie_result, list): |
464 | # Backwards compatibility: old IE result format | |
7bdb17d4 JMF |
465 | for result in ie_result: |
466 | result.update(extra_info) | |
b338f1b1 PH |
467 | ie_result = { |
468 | '_type': 'compat_list', | |
469 | 'entries': ie_result, | |
470 | } | |
7bdb17d4 JMF |
471 | else: |
472 | ie_result.update(extra_info) | |
b338f1b1 PH |
473 | if 'extractor' not in ie_result: |
474 | ie_result['extractor'] = ie.IE_NAME | |
475 | return self.process_ie_result(ie_result, download=download) | |
df8db1aa | 476 | except ExtractorError as de: # An error we somewhat expected |
613bf669 | 477 | self.report_error(compat_str(de), de.format_traceback()) |
df8db1aa JMF |
478 | break |
479 | except Exception as e: | |
480 | if self.params.get('ignoreerrors', False): | |
613bf669 | 481 | self.report_error(compat_str(e), tb=compat_str(traceback.format_exc())) |
df8db1aa JMF |
482 | break |
483 | else: | |
484 | raise | |
b338f1b1 PH |
485 | else: |
486 | self.report_error(u'no suitable InfoExtractor: %s' % url) | |
6ac7f082 | 487 | |
7bdb17d4 | 488 | def process_ie_result(self, ie_result, download=True, extra_info={}): |
6ac7f082 | 489 | """ |
b338f1b1 PH |
490 | Take the result of the ie(may be modified) and resolve all unresolved |
491 | references (URLs, playlist items). | |
492 | ||
146c12a2 | 493 | It will also download the videos if 'download'. |
b338f1b1 | 494 | Returns the resolved ie_result. |
6ac7f082 | 495 | """ |
b338f1b1 PH |
496 | |
497 | result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system | |
146c12a2 JMF |
498 | if result_type == 'video': |
499 | if 'playlist' not in ie_result: | |
b338f1b1 | 500 | # It isn't part of a playlist |
146c12a2 | 501 | ie_result['playlist'] = None |
d281274b | 502 | ie_result['playlist_index'] = None |
146c12a2 | 503 | if download: |
146c12a2 JMF |
504 | self.process_info(ie_result) |
505 | return ie_result | |
506 | elif result_type == 'url': | |
7bdb17d4 JMF |
507 | # We have to add extra_info to the results because it may be |
508 | # contained in a playlist | |
509 | return self.extract_info(ie_result['url'], | |
510 | download, | |
511 | ie_key=ie_result.get('ie_key'), | |
512 | extra_info=extra_info) | |
146c12a2 | 513 | elif result_type == 'playlist': |
b338f1b1 | 514 | # We process each entry in the playlist |
146c12a2 JMF |
515 | playlist = ie_result.get('title', None) or ie_result.get('id', None) |
516 | self.to_screen(u'[download] Downloading playlist: %s' % playlist) | |
bce878a7 | 517 | |
146c12a2 | 518 | playlist_results = [] |
bce878a7 JMF |
519 | |
520 | n_all_entries = len(ie_result['entries']) | |
521 | playliststart = self.params.get('playliststart', 1) - 1 | |
522 | playlistend = self.params.get('playlistend', -1) | |
523 | ||
524 | if playlistend == -1: | |
525 | entries = ie_result['entries'][playliststart:] | |
526 | else: | |
527 | entries = ie_result['entries'][playliststart:playlistend] | |
528 | ||
529 | n_entries = len(entries) | |
530 | ||
531 | self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" % | |
532 | (ie_result['extractor'], playlist, n_all_entries, n_entries)) | |
533 | ||
534 | for i,entry in enumerate(entries,1): | |
535 | self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries)) | |
7bdb17d4 JMF |
536 | extra = { |
537 | 'playlist': playlist, | |
538 | 'playlist_index': i + playliststart, | |
539 | } | |
80271756 JMF |
540 | if not 'extractor' in entry: |
541 | # We set the extractor, if it's an url it will be set then to | |
542 | # the new extractor, but if it's already a video we must make | |
543 | # sure it's present: see issue #877 | |
544 | entry['extractor'] = ie_result['extractor'] | |
7bdb17d4 JMF |
545 | entry_result = self.process_ie_result(entry, |
546 | download=download, | |
547 | extra_info=extra) | |
146c12a2 | 548 | playlist_results.append(entry_result) |
b338f1b1 PH |
549 | ie_result['entries'] = playlist_results |
550 | return ie_result | |
551 | elif result_type == 'compat_list': | |
552 | def _fixup(r): | |
553 | r.setdefault('extractor', ie_result['extractor']) | |
554 | return r | |
555 | ie_result['entries'] = [ | |
556 | self.process_ie_result(_fixup(r), download=download) | |
557 | for r in ie_result['entries'] | |
558 | ] | |
559 | return ie_result | |
560 | else: | |
561 | raise Exception('Invalid result type: %s' % result_type) | |
59ae15a5 PH |
562 | |
563 | def process_info(self, info_dict): | |
b338f1b1 | 564 | """Process a single resolved IE result.""" |
59ae15a5 | 565 | |
b338f1b1 | 566 | assert info_dict.get('_type', 'video') == 'video' |
146c12a2 JMF |
567 | #We increment the download the download count here to match the previous behaviour. |
568 | self.increment_downloads() | |
b338f1b1 | 569 | |
0599ef8c PH |
570 | info_dict['fulltitle'] = info_dict['title'] |
571 | if len(info_dict['title']) > 200: | |
572 | info_dict['title'] = info_dict['title'][:197] + u'...' | |
573 | ||
59ae15a5 PH |
574 | # Keep for backwards compatibility |
575 | info_dict['stitle'] = info_dict['title'] | |
576 | ||
577 | if not 'format' in info_dict: | |
578 | info_dict['format'] = info_dict['ext'] | |
579 | ||
580 | reason = self._match_entry(info_dict) | |
581 | if reason is not None: | |
582 | self.to_screen(u'[download] ' + reason) | |
583 | return | |
584 | ||
585 | max_downloads = self.params.get('max_downloads') | |
586 | if max_downloads is not None: | |
587 | if self._num_downloads > int(max_downloads): | |
588 | raise MaxDownloadsReached() | |
589 | ||
590 | filename = self.prepare_filename(info_dict) | |
591 | ||
592 | # Forced printings | |
593 | if self.params.get('forcetitle', False): | |
594 | compat_print(info_dict['title']) | |
1a2adf3f | 595 | if self.params.get('forceid', False): |
596 | compat_print(info_dict['id']) | |
59ae15a5 PH |
597 | if self.params.get('forceurl', False): |
598 | compat_print(info_dict['url']) | |
599 | if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | |
600 | compat_print(info_dict['thumbnail']) | |
601 | if self.params.get('forcedescription', False) and 'description' in info_dict: | |
602 | compat_print(info_dict['description']) | |
603 | if self.params.get('forcefilename', False) and filename is not None: | |
604 | compat_print(filename) | |
605 | if self.params.get('forceformat', False): | |
606 | compat_print(info_dict['format']) | |
607 | ||
608 | # Do nothing else if in simulate mode | |
609 | if self.params.get('simulate', False): | |
610 | return | |
611 | ||
612 | if filename is None: | |
613 | return | |
614 | ||
615 | try: | |
616 | dn = os.path.dirname(encodeFilename(filename)) | |
1bd96c3a | 617 | if dn != '' and not os.path.exists(dn): |
59ae15a5 PH |
618 | os.makedirs(dn) |
619 | except (OSError, IOError) as err: | |
6622d22c | 620 | self.report_error(u'unable to create directory ' + compat_str(err)) |
59ae15a5 PH |
621 | return |
622 | ||
623 | if self.params.get('writedescription', False): | |
624 | try: | |
625 | descfn = filename + u'.description' | |
626 | self.report_writedescription(descfn) | |
bfa6389b PH |
627 | with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: |
628 | descfile.write(info_dict['description']) | |
59ae15a5 | 629 | except (OSError, IOError): |
6622d22c | 630 | self.report_error(u'Cannot write description file ' + descfn) |
59ae15a5 PH |
631 | return |
632 | ||
633 | if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | |
634 | # subtitles download errors are already managed as troubles in relevant IE | |
635 | # that way it will silently go on when used with unsupporting IE | |
ae608b80 | 636 | subtitle = info_dict['subtitles'][0] |
553d0974 | 637 | (sub_error, sub_lang, sub) = subtitle |
9e62bc44 | 638 | sub_format = self.params.get('subtitlesformat') |
bafeed9f JMF |
639 | if sub_error: |
640 | self.report_warning("Some error while getting the subtitles") | |
641 | else: | |
642 | try: | |
643 | sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | |
644 | self.report_writesubtitles(sub_filename) | |
645 | with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | |
646 | subfile.write(sub) | |
647 | except (OSError, IOError): | |
648 | self.report_error(u'Cannot write subtitles file ' + descfn) | |
649 | return | |
ae608b80 IM |
650 | |
651 | if self.params.get('allsubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | |
652 | subtitles = info_dict['subtitles'] | |
9e62bc44 | 653 | sub_format = self.params.get('subtitlesformat') |
ae608b80 | 654 | for subtitle in subtitles: |
553d0974 | 655 | (sub_error, sub_lang, sub) = subtitle |
bafeed9f JMF |
656 | if sub_error: |
657 | self.report_warning("Some error while getting the subtitles") | |
658 | else: | |
659 | try: | |
660 | sub_filename = filename.rsplit('.', 1)[0] + u'.' + sub_lang + u'.' + sub_format | |
661 | self.report_writesubtitles(sub_filename) | |
662 | with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile: | |
663 | subfile.write(sub) | |
664 | except (OSError, IOError): | |
613bf669 | 665 | self.report_error(u'Cannot write subtitles file ' + descfn) |
bafeed9f | 666 | return |
59ae15a5 PH |
667 | |
668 | if self.params.get('writeinfojson', False): | |
669 | infofn = filename + u'.info.json' | |
670 | self.report_writeinfojson(infofn) | |
59ae15a5 | 671 | try: |
f4bfd65f PH |
672 | json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) |
673 | write_json_file(json_info_dict, encodeFilename(infofn)) | |
59ae15a5 | 674 | except (OSError, IOError): |
6622d22c | 675 | self.report_error(u'Cannot write metadata to JSON file ' + infofn) |
59ae15a5 PH |
676 | return |
677 | ||
11d9224e PH |
678 | if self.params.get('writethumbnail', False): |
679 | if 'thumbnail' in info_dict: | |
680 | thumb_format = info_dict['thumbnail'].rpartition(u'/')[2].rpartition(u'.')[2] | |
681 | if not thumb_format: | |
682 | thumb_format = 'jpg' | |
683 | thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format | |
684 | self.to_screen(u'[%s] %s: Downloading thumbnail ...' % | |
685 | (info_dict['extractor'], info_dict['id'])) | |
686 | uf = compat_urllib_request.urlopen(info_dict['thumbnail']) | |
687 | with open(thumb_filename, 'wb') as thumbf: | |
688 | shutil.copyfileobj(uf, thumbf) | |
689 | self.to_screen(u'[%s] %s: Writing thumbnail to: %s' % | |
690 | (info_dict['extractor'], info_dict['id'], thumb_filename)) | |
691 | ||
59ae15a5 PH |
692 | if not self.params.get('skip_download', False): |
693 | if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): | |
694 | success = True | |
695 | else: | |
696 | try: | |
697 | success = self._do_download(filename, info_dict) | |
698 | except (OSError, IOError) as err: | |
6ad98fb3 | 699 | raise UnavailableVideoError() |
59ae15a5 | 700 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |
6622d22c | 701 | self.report_error(u'unable to download video data: %s' % str(err)) |
59ae15a5 PH |
702 | return |
703 | except (ContentTooShortError, ) as err: | |
6622d22c | 704 | self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) |
59ae15a5 PH |
705 | return |
706 | ||
707 | if success: | |
708 | try: | |
709 | self.post_process(filename, info_dict) | |
710 | except (PostProcessingError) as err: | |
6622d22c | 711 | self.report_error(u'postprocessing: %s' % str(err)) |
59ae15a5 PH |
712 | return |
713 | ||
714 | def download(self, url_list): | |
715 | """Download a given list of URLs.""" | |
716 | if len(url_list) > 1 and self.fixed_template(): | |
717 | raise SameFileError(self.params['outtmpl']) | |
718 | ||
719 | for url in url_list: | |
146c12a2 JMF |
720 | try: |
721 | #It also downloads the videos | |
722 | videos = self.extract_info(url) | |
723 | except UnavailableVideoError: | |
613bf669 | 724 | self.report_error(u'unable to download video') |
146c12a2 JMF |
725 | except MaxDownloadsReached: |
726 | self.to_screen(u'[info] Maximum number of downloaded files reached.') | |
727 | raise | |
59ae15a5 PH |
728 | |
729 | return self._download_retcode | |
730 | ||
731 | def post_process(self, filename, ie_info): | |
7851b379 | 732 | """Run all the postprocessors on the given file.""" |
59ae15a5 PH |
733 | info = dict(ie_info) |
734 | info['filepath'] = filename | |
7851b379 | 735 | keep_video = None |
59ae15a5 | 736 | for pp in self._pps: |
7851b379 PH |
737 | try: |
738 | keep_video_wish,new_info = pp.run(info) | |
739 | if keep_video_wish is not None: | |
740 | if keep_video_wish: | |
741 | keep_video = keep_video_wish | |
742 | elif keep_video is None: | |
743 | # No clear decision yet, let IE decide | |
744 | keep_video = keep_video_wish | |
745 | except PostProcessingError as e: | |
746 | self.to_stderr(u'ERROR: ' + e.msg) | |
1d16b0c3 | 747 | if keep_video is False and not self.params.get('keepvideo', False): |
7851b379 | 748 | try: |
35d21713 | 749 | self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename) |
7851b379 PH |
750 | os.remove(encodeFilename(filename)) |
751 | except (IOError, OSError): | |
8207626b | 752 | self.report_warning(u'Unable to remove downloaded video file') |
59ae15a5 | 753 | |
de5d66d4 | 754 | def _download_with_rtmpdump(self, filename, url, player_url, page_url, play_path, tc_url): |
59ae15a5 PH |
755 | self.report_destination(filename) |
756 | tmpfilename = self.temp_name(filename) | |
757 | ||
758 | # Check for rtmpdump first | |
759 | try: | |
967897fd | 760 | subprocess.call(['rtmpdump', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) |
59ae15a5 | 761 | except (OSError, IOError): |
6622d22c | 762 | self.report_error(u'RTMP download detected but "rtmpdump" could not be run') |
59ae15a5 | 763 | return False |
8cd252f1 | 764 | verbosity_option = '--verbose' if self.params.get('verbose', False) else '--quiet' |
59ae15a5 PH |
765 | |
766 | # Download using rtmpdump. rtmpdump returns exit code 2 when | |
767 | # the connection was interrumpted and resuming appears to be | |
768 | # possible. This is part of rtmpdump's normal usage, AFAIK. | |
8cd252f1 | 769 | basic_args = ['rtmpdump', verbosity_option, '-r', url, '-o', tmpfilename] |
f5ebb614 | 770 | if player_url is not None: |
8cd252f1 | 771 | basic_args += ['--swfVfy', player_url] |
f5ebb614 PH |
772 | if page_url is not None: |
773 | basic_args += ['--pageUrl', page_url] | |
adb029ed | 774 | if play_path is not None: |
8cd252f1 | 775 | basic_args += ['--playpath', play_path] |
de5d66d4 | 776 | if tc_url is not None: |
777 | basic_args += ['--tcUrl', url] | |
8cd252f1 | 778 | args = basic_args + [[], ['--resume', '--skip', '1']][self.params.get('continuedl', False)] |
59ae15a5 PH |
779 | if self.params.get('verbose', False): |
780 | try: | |
781 | import pipes | |
782 | shell_quote = lambda args: ' '.join(map(pipes.quote, args)) | |
783 | except ImportError: | |
784 | shell_quote = repr | |
785 | self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) | |
786 | retval = subprocess.call(args) | |
787 | while retval == 2 or retval == 1: | |
788 | prevsize = os.path.getsize(encodeFilename(tmpfilename)) | |
789 | self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) | |
790 | time.sleep(5.0) # This seems to be needed | |
791 | retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | |
792 | cursize = os.path.getsize(encodeFilename(tmpfilename)) | |
793 | if prevsize == cursize and retval == 1: | |
794 | break | |
795 | # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | |
796 | if prevsize == cursize and retval == 2 and cursize > 1024: | |
797 | self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | |
798 | retval = 0 | |
799 | break | |
800 | if retval == 0: | |
bffbd5f0 PH |
801 | fsize = os.path.getsize(encodeFilename(tmpfilename)) |
802 | self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) | |
59ae15a5 | 803 | self.try_rename(tmpfilename, filename) |
bffbd5f0 PH |
804 | self._hook_progress({ |
805 | 'downloaded_bytes': fsize, | |
806 | 'total_bytes': fsize, | |
807 | 'filename': filename, | |
808 | 'status': 'finished', | |
809 | }) | |
59ae15a5 PH |
810 | return True |
811 | else: | |
6622d22c JMF |
812 | self.to_stderr(u"\n") |
813 | self.report_error(u'rtmpdump exited with code %d' % retval) | |
59ae15a5 PH |
814 | return False |
815 | ||
f2cd958c | 816 | def _download_with_mplayer(self, filename, url): |
817 | self.report_destination(filename) | |
818 | tmpfilename = self.temp_name(filename) | |
819 | ||
f2cd958c | 820 | args = ['mplayer', '-really-quiet', '-vo', 'null', '-vc', 'dummy', '-dumpstream', '-dumpfile', tmpfilename, url] |
821 | # Check for mplayer first | |
822 | try: | |
3054ff0c | 823 | subprocess.call(['mplayer', '-h'], stdout=(open(os.path.devnull, 'w')), stderr=subprocess.STDOUT) |
f2cd958c | 824 | except (OSError, IOError): |
825 | self.report_error(u'MMS or RTSP download detected but "%s" could not be run' % args[0] ) | |
826 | return False | |
827 | ||
828 | # Download using mplayer. | |
829 | retval = subprocess.call(args) | |
830 | if retval == 0: | |
831 | fsize = os.path.getsize(encodeFilename(tmpfilename)) | |
832 | self.to_screen(u'\r[%s] %s bytes' % (args[0], fsize)) | |
833 | self.try_rename(tmpfilename, filename) | |
834 | self._hook_progress({ | |
835 | 'downloaded_bytes': fsize, | |
836 | 'total_bytes': fsize, | |
837 | 'filename': filename, | |
838 | 'status': 'finished', | |
839 | }) | |
840 | return True | |
841 | else: | |
842 | self.to_stderr(u"\n") | |
3054ff0c | 843 | self.report_error(u'mplayer exited with code %d' % retval) |
f2cd958c | 844 | return False |
845 | ||
846 | ||
59ae15a5 PH |
847 | def _do_download(self, filename, info_dict): |
848 | url = info_dict['url'] | |
59ae15a5 PH |
849 | |
850 | # Check file already present | |
851 | if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): | |
852 | self.report_file_already_downloaded(filename) | |
bffbd5f0 PH |
853 | self._hook_progress({ |
854 | 'filename': filename, | |
855 | 'status': 'finished', | |
856 | }) | |
59ae15a5 PH |
857 | return True |
858 | ||
859 | # Attempt to download using rtmpdump | |
860 | if url.startswith('rtmp'): | |
f5ebb614 PH |
861 | return self._download_with_rtmpdump(filename, url, |
862 | info_dict.get('player_url', None), | |
adb029ed | 863 | info_dict.get('page_url', None), |
de5d66d4 | 864 | info_dict.get('play_path', None), |
865 | info_dict.get('tc_url', None)) | |
59ae15a5 | 866 | |
f2cd958c | 867 | # Attempt to download using mplayer |
868 | if url.startswith('mms') or url.startswith('rtsp'): | |
869 | return self._download_with_mplayer(filename, url) | |
870 | ||
59ae15a5 PH |
871 | tmpfilename = self.temp_name(filename) |
872 | stream = None | |
873 | ||
874 | # Do not include the Accept-Encoding header | |
875 | headers = {'Youtubedl-no-compression': 'True'} | |
3446dfb7 PH |
876 | if 'user_agent' in info_dict: |
877 | headers['Youtubedl-user-agent'] = info_dict['user_agent'] | |
59ae15a5 PH |
878 | basic_request = compat_urllib_request.Request(url, None, headers) |
879 | request = compat_urllib_request.Request(url, None, headers) | |
880 | ||
37c8fd48 FV |
881 | if self.params.get('test', False): |
882 | request.add_header('Range','bytes=0-10240') | |
883 | ||
59ae15a5 PH |
884 | # Establish possible resume length |
885 | if os.path.isfile(encodeFilename(tmpfilename)): | |
886 | resume_len = os.path.getsize(encodeFilename(tmpfilename)) | |
887 | else: | |
888 | resume_len = 0 | |
889 | ||
890 | open_mode = 'wb' | |
891 | if resume_len != 0: | |
892 | if self.params.get('continuedl', False): | |
893 | self.report_resuming_byte(resume_len) | |
894 | request.add_header('Range','bytes=%d-' % resume_len) | |
895 | open_mode = 'ab' | |
896 | else: | |
897 | resume_len = 0 | |
898 | ||
899 | count = 0 | |
900 | retries = self.params.get('retries', 0) | |
901 | while count <= retries: | |
902 | # Establish connection | |
903 | try: | |
904 | if count == 0 and 'urlhandle' in info_dict: | |
905 | data = info_dict['urlhandle'] | |
906 | data = compat_urllib_request.urlopen(request) | |
907 | break | |
908 | except (compat_urllib_error.HTTPError, ) as err: | |
909 | if (err.code < 500 or err.code >= 600) and err.code != 416: | |
910 | # Unexpected HTTP error | |
911 | raise | |
912 | elif err.code == 416: | |
913 | # Unable to resume (requested range not satisfiable) | |
914 | try: | |
915 | # Open the connection again without the range header | |
916 | data = compat_urllib_request.urlopen(basic_request) | |
917 | content_length = data.info()['Content-Length'] | |
918 | except (compat_urllib_error.HTTPError, ) as err: | |
919 | if err.code < 500 or err.code >= 600: | |
920 | raise | |
921 | else: | |
922 | # Examine the reported length | |
923 | if (content_length is not None and | |
924 | (resume_len - 100 < int(content_length) < resume_len + 100)): | |
925 | # The file had already been fully downloaded. | |
926 | # Explanation to the above condition: in issue #175 it was revealed that | |
927 | # YouTube sometimes adds or removes a few bytes from the end of the file, | |
928 | # changing the file size slightly and causing problems for some users. So | |
929 | # I decided to implement a suggested change and consider the file | |
930 | # completely downloaded if the file size differs less than 100 bytes from | |
931 | # the one in the hard drive. | |
932 | self.report_file_already_downloaded(filename) | |
933 | self.try_rename(tmpfilename, filename) | |
bffbd5f0 PH |
934 | self._hook_progress({ |
935 | 'filename': filename, | |
936 | 'status': 'finished', | |
937 | }) | |
59ae15a5 PH |
938 | return True |
939 | else: | |
940 | # The length does not match, we start the download over | |
941 | self.report_unable_to_resume() | |
942 | open_mode = 'wb' | |
943 | break | |
944 | # Retry | |
945 | count += 1 | |
946 | if count <= retries: | |
947 | self.report_retry(count, retries) | |
948 | ||
949 | if count > retries: | |
6622d22c | 950 | self.report_error(u'giving up after %s retries' % retries) |
59ae15a5 PH |
951 | return False |
952 | ||
953 | data_len = data.info().get('Content-length', None) | |
954 | if data_len is not None: | |
955 | data_len = int(data_len) + resume_len | |
9e982f9e JC |
956 | min_data_len = self.params.get("min_filesize", None) |
957 | max_data_len = self.params.get("max_filesize", None) | |
958 | if min_data_len is not None and data_len < min_data_len: | |
959 | self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) | |
960 | return False | |
961 | if max_data_len is not None and data_len > max_data_len: | |
962 | self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | |
963 | return False | |
964 | ||
59ae15a5 PH |
965 | data_len_str = self.format_bytes(data_len) |
966 | byte_counter = 0 + resume_len | |
967 | block_size = self.params.get('buffersize', 1024) | |
968 | start = time.time() | |
969 | while True: | |
970 | # Download and write | |
971 | before = time.time() | |
972 | data_block = data.read(block_size) | |
973 | after = time.time() | |
974 | if len(data_block) == 0: | |
975 | break | |
976 | byte_counter += len(data_block) | |
977 | ||
978 | # Open file just in time | |
979 | if stream is None: | |
980 | try: | |
981 | (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) | |
982 | assert stream is not None | |
983 | filename = self.undo_temp_name(tmpfilename) | |
984 | self.report_destination(filename) | |
985 | except (OSError, IOError) as err: | |
6622d22c | 986 | self.report_error(u'unable to open for writing: %s' % str(err)) |
59ae15a5 PH |
987 | return False |
988 | try: | |
989 | stream.write(data_block) | |
990 | except (IOError, OSError) as err: | |
6622d22c JMF |
991 | self.to_stderr(u"\n") |
992 | self.report_error(u'unable to write data: %s' % str(err)) | |
59ae15a5 PH |
993 | return False |
994 | if not self.params.get('noresizebuffer', False): | |
995 | block_size = self.best_block_size(after - before, len(data_block)) | |
996 | ||
997 | # Progress message | |
998 | speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) | |
999 | if data_len is None: | |
1000 | self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') | |
1001 | else: | |
1002 | percent_str = self.calc_percent(byte_counter, data_len) | |
1003 | eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | |
1004 | self.report_progress(percent_str, data_len_str, speed_str, eta_str) | |
1005 | ||
bffbd5f0 PH |
1006 | self._hook_progress({ |
1007 | 'downloaded_bytes': byte_counter, | |
1008 | 'total_bytes': data_len, | |
1009 | 'tmpfilename': tmpfilename, | |
1010 | 'filename': filename, | |
1011 | 'status': 'downloading', | |
1012 | }) | |
1013 | ||
59ae15a5 PH |
1014 | # Apply rate limit |
1015 | self.slow_down(start, byte_counter - resume_len) | |
1016 | ||
1017 | if stream is None: | |
6622d22c JMF |
1018 | self.to_stderr(u"\n") |
1019 | self.report_error(u'Did not get any data blocks') | |
59ae15a5 PH |
1020 | return False |
1021 | stream.close() | |
1022 | self.report_finish() | |
1023 | if data_len is not None and byte_counter != data_len: | |
1024 | raise ContentTooShortError(byte_counter, int(data_len)) | |
1025 | self.try_rename(tmpfilename, filename) | |
1026 | ||
1027 | # Update file modification time | |
1028 | if self.params.get('updatetime', True): | |
1029 | info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) | |
1030 | ||
bffbd5f0 PH |
1031 | self._hook_progress({ |
1032 | 'downloaded_bytes': byte_counter, | |
1033 | 'total_bytes': byte_counter, | |
1034 | 'filename': filename, | |
1035 | 'status': 'finished', | |
1036 | }) | |
1037 | ||
59ae15a5 | 1038 | return True |
bffbd5f0 PH |
1039 | |
1040 | def _hook_progress(self, status): | |
1041 | for ph in self._progress_hooks: | |
1042 | ph(status) | |
1043 | ||
1044 | def add_progress_hook(self, ph): | |
1045 | """ ph gets called on download progress, with a dictionary with the entries | |
1046 | * filename: The final filename | |
1047 | * status: One of "downloading" and "finished" | |
1048 | ||
1049 | It can also have some of the following entries: | |
1050 | ||
1051 | * downloaded_bytes: Bytes on disks | |
1052 | * total_bytes: Total bytes, None if unknown | |
1053 | * tmpfilename: The filename we're currently writing to | |
1054 | ||
1055 | Hooks are guaranteed to be called at least once (with status "finished") | |
1056 | if the download is successful. | |
1057 | """ | |
1058 | self._progress_hooks.append(ph) |