]>
Commit | Line | Data |
---|---|---|
d77c3dfd FV |
1 | #!/usr/bin/env python |
2 | # -*- coding: utf-8 -*- | |
3 | ||
9e8056d5 PH |
4 | from __future__ import absolute_import |
5 | ||
d77c3dfd | 6 | import math |
ce4be3a9 | 7 | import io |
d77c3dfd FV |
8 | import os |
9 | import re | |
10 | import socket | |
11 | import subprocess | |
12 | import sys | |
13 | import time | |
59ce2019 | 14 | import traceback |
d77c3dfd FV |
15 | |
16 | if os.name == 'nt': | |
59ae15a5 | 17 | import ctypes |
3eec021a | 18 | |
9e8056d5 | 19 | from .utils import * |
d77c3dfd FV |
20 | |
21 | ||
22 | class FileDownloader(object): | |
59ae15a5 PH |
23 | """File Downloader class. |
24 | ||
25 | File downloader objects are the ones responsible of downloading the | |
26 | actual video file and writing it to disk if the user has requested | |
27 | it, among some other tasks. In most cases there should be one per | |
28 | program. As, given a video URL, the downloader doesn't know how to | |
29 | extract all the needed information, task that InfoExtractors do, it | |
30 | has to pass the URL to one of them. | |
31 | ||
32 | For this, file downloader objects have a method that allows | |
33 | InfoExtractors to be registered in a given order. When it is passed | |
34 | a URL, the file downloader handles it to the first InfoExtractor it | |
35 | finds that reports being able to handle it. The InfoExtractor extracts | |
36 | all the information about the video or videos the URL refers to, and | |
37 | asks the FileDownloader to process the video information, possibly | |
38 | downloading the video. | |
39 | ||
40 | File downloaders accept a lot of parameters. In order not to saturate | |
41 | the object constructor with arguments, it receives a dictionary of | |
42 | options instead. These options are available through the params | |
43 | attribute for the InfoExtractors to use. The FileDownloader also | |
44 | registers itself as the downloader in charge for the InfoExtractors | |
45 | that are added to it, so this is a "mutual registration". | |
46 | ||
47 | Available options: | |
48 | ||
49 | username: Username for authentication purposes. | |
50 | password: Password for authentication purposes. | |
51 | usenetrc: Use netrc for authentication instead. | |
52 | quiet: Do not print messages to stdout. | |
53 | forceurl: Force printing final URL. | |
54 | forcetitle: Force printing title. | |
55 | forcethumbnail: Force printing thumbnail URL. | |
56 | forcedescription: Force printing description. | |
57 | forcefilename: Force printing final filename. | |
58 | simulate: Do not download the video files. | |
59 | format: Video format code. | |
60 | format_limit: Highest quality format to try. | |
61 | outtmpl: Template for output names. | |
62 | restrictfilenames: Do not allow "&" and spaces in file names | |
63 | ignoreerrors: Do not stop on download errors. | |
64 | ratelimit: Download speed limit, in bytes/sec. | |
65 | nooverwrites: Prevent overwriting files. | |
66 | retries: Number of times to retry for HTTP error 5xx | |
67 | buffersize: Size of download buffer in bytes. | |
68 | noresizebuffer: Do not automatically resize the download buffer. | |
69 | continuedl: Try to continue downloads if possible. | |
70 | noprogress: Do not print the progress bar. | |
71 | playliststart: Playlist item to start at. | |
72 | playlistend: Playlist item to end at. | |
73 | matchtitle: Download only matching titles. | |
74 | rejecttitle: Reject downloads for matching titles. | |
75 | logtostderr: Log messages to stderr instead of stdout. | |
76 | consoletitle: Display progress in console window's titlebar. | |
77 | nopart: Do not use temporary .part files. | |
78 | updatetime: Use the Last-modified header to set output file timestamps. | |
79 | writedescription: Write the video description to a .description file | |
80 | writeinfojson: Write the video description to a .info.json file | |
81 | writesubtitles: Write the video subtitles to a .srt file | |
82 | subtitleslang: Language of the subtitles to download | |
37c8fd48 | 83 | test: Download only first bytes to test the downloader. |
7851b379 | 84 | keepvideo: Keep the video file after post-processing |
9e982f9e JC |
85 | min_filesize: Skip files smaller than this size |
86 | max_filesize: Skip files larger than this size | |
59ae15a5 PH |
87 | """ |
88 | ||
89 | params = None | |
90 | _ies = [] | |
91 | _pps = [] | |
92 | _download_retcode = None | |
93 | _num_downloads = None | |
94 | _screen_file = None | |
95 | ||
96 | def __init__(self, params): | |
97 | """Create a FileDownloader object with the given options.""" | |
98 | self._ies = [] | |
99 | self._pps = [] | |
bffbd5f0 | 100 | self._progress_hooks = [] |
59ae15a5 PH |
101 | self._download_retcode = 0 |
102 | self._num_downloads = 0 | |
103 | self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)] | |
104 | self.params = params | |
105 | ||
106 | if '%(stitle)s' in self.params['outtmpl']: | |
107 | self.to_stderr(u'WARNING: %(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.') | |
108 | ||
109 | @staticmethod | |
110 | def format_bytes(bytes): | |
111 | if bytes is None: | |
112 | return 'N/A' | |
113 | if type(bytes) is str: | |
114 | bytes = float(bytes) | |
115 | if bytes == 0.0: | |
116 | exponent = 0 | |
117 | else: | |
118 | exponent = int(math.log(bytes, 1024.0)) | |
119 | suffix = 'bkMGTPEZY'[exponent] | |
120 | converted = float(bytes) / float(1024 ** exponent) | |
121 | return '%.2f%s' % (converted, suffix) | |
122 | ||
123 | @staticmethod | |
124 | def calc_percent(byte_counter, data_len): | |
125 | if data_len is None: | |
126 | return '---.-%' | |
127 | return '%6s' % ('%3.1f%%' % (float(byte_counter) / float(data_len) * 100.0)) | |
128 | ||
129 | @staticmethod | |
130 | def calc_eta(start, now, total, current): | |
131 | if total is None: | |
132 | return '--:--' | |
133 | dif = now - start | |
134 | if current == 0 or dif < 0.001: # One millisecond | |
135 | return '--:--' | |
136 | rate = float(current) / dif | |
137 | eta = int((float(total) - float(current)) / rate) | |
138 | (eta_mins, eta_secs) = divmod(eta, 60) | |
139 | if eta_mins > 99: | |
140 | return '--:--' | |
141 | return '%02d:%02d' % (eta_mins, eta_secs) | |
142 | ||
143 | @staticmethod | |
144 | def calc_speed(start, now, bytes): | |
145 | dif = now - start | |
146 | if bytes == 0 or dif < 0.001: # One millisecond | |
147 | return '%10s' % '---b/s' | |
148 | return '%10s' % ('%s/s' % FileDownloader.format_bytes(float(bytes) / dif)) | |
149 | ||
150 | @staticmethod | |
151 | def best_block_size(elapsed_time, bytes): | |
152 | new_min = max(bytes / 2.0, 1.0) | |
153 | new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB | |
154 | if elapsed_time < 0.001: | |
155 | return int(new_max) | |
156 | rate = bytes / elapsed_time | |
157 | if rate > new_max: | |
158 | return int(new_max) | |
159 | if rate < new_min: | |
160 | return int(new_min) | |
161 | return int(rate) | |
162 | ||
163 | @staticmethod | |
164 | def parse_bytes(bytestr): | |
165 | """Parse a string indicating a byte quantity into an integer.""" | |
166 | matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr) | |
167 | if matchobj is None: | |
168 | return None | |
169 | number = float(matchobj.group(1)) | |
170 | multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower()) | |
171 | return int(round(number * multiplier)) | |
172 | ||
173 | def add_info_extractor(self, ie): | |
174 | """Add an InfoExtractor object to the end of the list.""" | |
175 | self._ies.append(ie) | |
176 | ie.set_downloader(self) | |
177 | ||
178 | def add_post_processor(self, pp): | |
179 | """Add a PostProcessor object to the end of the chain.""" | |
180 | self._pps.append(pp) | |
181 | pp.set_downloader(self) | |
182 | ||
183 | def to_screen(self, message, skip_eol=False): | |
184 | """Print message to stdout if not in quiet mode.""" | |
185 | assert type(message) == type(u'') | |
186 | if not self.params.get('quiet', False): | |
187 | terminator = [u'\n', u''][skip_eol] | |
188 | output = message + terminator | |
189 | if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | |
190 | output = output.encode(preferredencoding(), 'ignore') | |
191 | self._screen_file.write(output) | |
192 | self._screen_file.flush() | |
193 | ||
194 | def to_stderr(self, message): | |
195 | """Print message to stderr.""" | |
196 | assert type(message) == type(u'') | |
197 | output = message + u'\n' | |
198 | if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr | |
199 | output = output.encode(preferredencoding()) | |
200 | sys.stderr.write(output) | |
201 | ||
202 | def to_cons_title(self, message): | |
203 | """Set console/terminal window title to message.""" | |
204 | if not self.params.get('consoletitle', False): | |
205 | return | |
206 | if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow(): | |
207 | # c_wchar_p() might not be necessary if `message` is | |
208 | # already of type unicode() | |
209 | ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message)) | |
210 | elif 'TERM' in os.environ: | |
906417c7 | 211 | self.to_screen('\033]0;%s\007' % message, skip_eol=True) |
59ae15a5 PH |
212 | |
213 | def fixed_template(self): | |
214 | """Checks if the output template is fixed.""" | |
215 | return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None) | |
216 | ||
1c256f70 | 217 | def trouble(self, message=None, tb=None): |
59ae15a5 PH |
218 | """Determine action to take when a download problem appears. |
219 | ||
220 | Depending on if the downloader has been configured to ignore | |
221 | download errors or not, this method may throw an exception or | |
222 | not when errors are found, after printing the message. | |
01951dda PH |
223 | |
224 | tb, if given, is additional traceback information. | |
59ae15a5 PH |
225 | """ |
226 | if message is not None: | |
227 | self.to_stderr(message) | |
59ce2019 | 228 | if self.params.get('verbose'): |
1c256f70 | 229 | if tb is None: |
01951dda PH |
230 | tb_data = traceback.format_list(traceback.extract_stack()) |
231 | tb = u''.join(tb_data) | |
1c256f70 | 232 | self.to_stderr(tb) |
59ae15a5 PH |
233 | if not self.params.get('ignoreerrors', False): |
234 | raise DownloadError(message) | |
235 | self._download_retcode = 1 | |
236 | ||
237 | def slow_down(self, start_time, byte_counter): | |
238 | """Sleep if the download speed is over the rate limit.""" | |
239 | rate_limit = self.params.get('ratelimit', None) | |
240 | if rate_limit is None or byte_counter == 0: | |
241 | return | |
242 | now = time.time() | |
243 | elapsed = now - start_time | |
244 | if elapsed <= 0.0: | |
245 | return | |
246 | speed = float(byte_counter) / elapsed | |
247 | if speed > rate_limit: | |
248 | time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit) | |
249 | ||
250 | def temp_name(self, filename): | |
251 | """Returns a temporary filename for the given filename.""" | |
252 | if self.params.get('nopart', False) or filename == u'-' or \ | |
253 | (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))): | |
254 | return filename | |
255 | return filename + u'.part' | |
256 | ||
257 | def undo_temp_name(self, filename): | |
258 | if filename.endswith(u'.part'): | |
259 | return filename[:-len(u'.part')] | |
260 | return filename | |
261 | ||
262 | def try_rename(self, old_filename, new_filename): | |
263 | try: | |
264 | if old_filename == new_filename: | |
265 | return | |
266 | os.rename(encodeFilename(old_filename), encodeFilename(new_filename)) | |
267 | except (IOError, OSError) as err: | |
268 | self.trouble(u'ERROR: unable to rename file') | |
269 | ||
270 | def try_utime(self, filename, last_modified_hdr): | |
271 | """Try to set the last-modified time of the given file.""" | |
272 | if last_modified_hdr is None: | |
273 | return | |
274 | if not os.path.isfile(encodeFilename(filename)): | |
275 | return | |
276 | timestr = last_modified_hdr | |
277 | if timestr is None: | |
278 | return | |
279 | filetime = timeconvert(timestr) | |
280 | if filetime is None: | |
281 | return filetime | |
282 | try: | |
283 | os.utime(filename, (time.time(), filetime)) | |
284 | except: | |
285 | pass | |
286 | return filetime | |
287 | ||
288 | def report_writedescription(self, descfn): | |
289 | """ Report that the description file is being written """ | |
290 | self.to_screen(u'[info] Writing video description to: ' + descfn) | |
291 | ||
292 | def report_writesubtitles(self, srtfn): | |
293 | """ Report that the subtitles file is being written """ | |
294 | self.to_screen(u'[info] Writing video subtitles to: ' + srtfn) | |
295 | ||
296 | def report_writeinfojson(self, infofn): | |
297 | """ Report that the metadata file has been written """ | |
298 | self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn) | |
299 | ||
300 | def report_destination(self, filename): | |
301 | """Report destination filename.""" | |
302 | self.to_screen(u'[download] Destination: ' + filename) | |
303 | ||
304 | def report_progress(self, percent_str, data_len_str, speed_str, eta_str): | |
305 | """Report download progress.""" | |
306 | if self.params.get('noprogress', False): | |
307 | return | |
308 | self.to_screen(u'\r[download] %s of %s at %s ETA %s' % | |
309 | (percent_str, data_len_str, speed_str, eta_str), skip_eol=True) | |
310 | self.to_cons_title(u'youtube-dl - %s of %s at %s ETA %s' % | |
311 | (percent_str.strip(), data_len_str.strip(), speed_str.strip(), eta_str.strip())) | |
312 | ||
313 | def report_resuming_byte(self, resume_len): | |
314 | """Report attempt to resume at given byte.""" | |
315 | self.to_screen(u'[download] Resuming download at byte %s' % resume_len) | |
316 | ||
317 | def report_retry(self, count, retries): | |
318 | """Report retry in case of HTTP error 5xx""" | |
319 | self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries)) | |
320 | ||
321 | def report_file_already_downloaded(self, file_name): | |
322 | """Report file has already been fully downloaded.""" | |
323 | try: | |
324 | self.to_screen(u'[download] %s has already been downloaded' % file_name) | |
325 | except (UnicodeEncodeError) as err: | |
326 | self.to_screen(u'[download] The file has already been downloaded') | |
327 | ||
328 | def report_unable_to_resume(self): | |
329 | """Report it was impossible to resume download.""" | |
330 | self.to_screen(u'[download] Unable to resume') | |
331 | ||
332 | def report_finish(self): | |
333 | """Report download finished.""" | |
334 | if self.params.get('noprogress', False): | |
335 | self.to_screen(u'[download] Download completed') | |
336 | else: | |
337 | self.to_screen(u'') | |
338 | ||
339 | def increment_downloads(self): | |
340 | """Increment the ordinal that assigns a number to each file.""" | |
341 | self._num_downloads += 1 | |
342 | ||
343 | def prepare_filename(self, info_dict): | |
344 | """Generate the output filename.""" | |
345 | try: | |
346 | template_dict = dict(info_dict) | |
347 | ||
348 | template_dict['epoch'] = int(time.time()) | |
349 | template_dict['autonumber'] = u'%05d' % self._num_downloads | |
350 | ||
796173d0 PH |
351 | sanitize = lambda k,v: sanitize_filename( |
352 | u'NA' if v is None else compat_str(v), | |
353 | restricted=self.params.get('restrictfilenames'), | |
354 | is_id=(k==u'id')) | |
355 | template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items()) | |
59ae15a5 PH |
356 | |
357 | filename = self.params['outtmpl'] % template_dict | |
358 | return filename | |
359 | except (ValueError, KeyError) as err: | |
360 | self.trouble(u'ERROR: invalid system charset or erroneous output template') | |
361 | return None | |
362 | ||
363 | def _match_entry(self, info_dict): | |
364 | """ Returns None iff the file should be downloaded """ | |
365 | ||
366 | title = info_dict['title'] | |
367 | matchtitle = self.params.get('matchtitle', False) | |
368 | if matchtitle: | |
369 | matchtitle = matchtitle.decode('utf8') | |
370 | if not re.search(matchtitle, title, re.IGNORECASE): | |
371 | return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"' | |
372 | rejecttitle = self.params.get('rejecttitle', False) | |
373 | if rejecttitle: | |
374 | rejecttitle = rejecttitle.decode('utf8') | |
375 | if re.search(rejecttitle, title, re.IGNORECASE): | |
376 | return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"' | |
377 | return None | |
378 | ||
379 | def process_info(self, info_dict): | |
380 | """Process a single dictionary returned by an InfoExtractor.""" | |
381 | ||
382 | # Keep for backwards compatibility | |
383 | info_dict['stitle'] = info_dict['title'] | |
384 | ||
385 | if not 'format' in info_dict: | |
386 | info_dict['format'] = info_dict['ext'] | |
387 | ||
388 | reason = self._match_entry(info_dict) | |
389 | if reason is not None: | |
390 | self.to_screen(u'[download] ' + reason) | |
391 | return | |
392 | ||
393 | max_downloads = self.params.get('max_downloads') | |
394 | if max_downloads is not None: | |
395 | if self._num_downloads > int(max_downloads): | |
396 | raise MaxDownloadsReached() | |
397 | ||
398 | filename = self.prepare_filename(info_dict) | |
399 | ||
400 | # Forced printings | |
401 | if self.params.get('forcetitle', False): | |
402 | compat_print(info_dict['title']) | |
403 | if self.params.get('forceurl', False): | |
404 | compat_print(info_dict['url']) | |
405 | if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict: | |
406 | compat_print(info_dict['thumbnail']) | |
407 | if self.params.get('forcedescription', False) and 'description' in info_dict: | |
408 | compat_print(info_dict['description']) | |
409 | if self.params.get('forcefilename', False) and filename is not None: | |
410 | compat_print(filename) | |
411 | if self.params.get('forceformat', False): | |
412 | compat_print(info_dict['format']) | |
413 | ||
414 | # Do nothing else if in simulate mode | |
415 | if self.params.get('simulate', False): | |
416 | return | |
417 | ||
418 | if filename is None: | |
419 | return | |
420 | ||
421 | try: | |
422 | dn = os.path.dirname(encodeFilename(filename)) | |
423 | if dn != '' and not os.path.exists(dn): # dn is already encoded | |
424 | os.makedirs(dn) | |
425 | except (OSError, IOError) as err: | |
426 | self.trouble(u'ERROR: unable to create directory ' + compat_str(err)) | |
427 | return | |
428 | ||
429 | if self.params.get('writedescription', False): | |
430 | try: | |
431 | descfn = filename + u'.description' | |
432 | self.report_writedescription(descfn) | |
bfa6389b PH |
433 | with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile: |
434 | descfile.write(info_dict['description']) | |
59ae15a5 PH |
435 | except (OSError, IOError): |
436 | self.trouble(u'ERROR: Cannot write description file ' + descfn) | |
437 | return | |
438 | ||
439 | if self.params.get('writesubtitles', False) and 'subtitles' in info_dict and info_dict['subtitles']: | |
440 | # subtitles download errors are already managed as troubles in relevant IE | |
441 | # that way it will silently go on when used with unsupporting IE | |
442 | try: | |
443 | srtfn = filename.rsplit('.', 1)[0] + u'.srt' | |
444 | self.report_writesubtitles(srtfn) | |
1a2c3c0f FV |
445 | with io.open(encodeFilename(srtfn), 'w', encoding='utf-8') as srtfile: |
446 | srtfile.write(info_dict['subtitles']) | |
59ae15a5 PH |
447 | except (OSError, IOError): |
448 | self.trouble(u'ERROR: Cannot write subtitles file ' + descfn) | |
449 | return | |
450 | ||
451 | if self.params.get('writeinfojson', False): | |
452 | infofn = filename + u'.info.json' | |
453 | self.report_writeinfojson(infofn) | |
59ae15a5 | 454 | try: |
f4bfd65f PH |
455 | json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle']) |
456 | write_json_file(json_info_dict, encodeFilename(infofn)) | |
59ae15a5 PH |
457 | except (OSError, IOError): |
458 | self.trouble(u'ERROR: Cannot write metadata to JSON file ' + infofn) | |
459 | return | |
460 | ||
461 | if not self.params.get('skip_download', False): | |
462 | if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)): | |
463 | success = True | |
464 | else: | |
465 | try: | |
466 | success = self._do_download(filename, info_dict) | |
467 | except (OSError, IOError) as err: | |
6ad98fb3 | 468 | raise UnavailableVideoError() |
59ae15a5 PH |
469 | except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err: |
470 | self.trouble(u'ERROR: unable to download video data: %s' % str(err)) | |
471 | return | |
472 | except (ContentTooShortError, ) as err: | |
473 | self.trouble(u'ERROR: content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded)) | |
474 | return | |
475 | ||
476 | if success: | |
477 | try: | |
478 | self.post_process(filename, info_dict) | |
479 | except (PostProcessingError) as err: | |
480 | self.trouble(u'ERROR: postprocessing: %s' % str(err)) | |
481 | return | |
482 | ||
483 | def download(self, url_list): | |
484 | """Download a given list of URLs.""" | |
485 | if len(url_list) > 1 and self.fixed_template(): | |
486 | raise SameFileError(self.params['outtmpl']) | |
487 | ||
488 | for url in url_list: | |
489 | suitable_found = False | |
490 | for ie in self._ies: | |
491 | # Go to next InfoExtractor if not suitable | |
492 | if not ie.suitable(url): | |
493 | continue | |
494 | ||
495 | # Warn if the _WORKING attribute is False | |
496 | if not ie.working(): | |
1c256f70 PH |
497 | self.to_stderr(u'WARNING: the program functionality for this site has been marked as broken, ' |
498 | u'and will probably not work. If you want to go on, use the -i option.') | |
59ae15a5 PH |
499 | |
500 | # Suitable InfoExtractor found | |
501 | suitable_found = True | |
502 | ||
503 | # Extract information from URL and process it | |
1c256f70 PH |
504 | try: |
505 | videos = ie.extract(url) | |
506 | except ExtractorError as de: # An error we somewhat expected | |
01951dda | 507 | self.trouble(u'ERROR: ' + compat_str(de), de.format_traceback()) |
1c256f70 PH |
508 | break |
509 | except Exception as e: | |
510 | if self.params.get('ignoreerrors', False): | |
511 | self.trouble(u'ERROR: ' + compat_str(e), tb=compat_str(traceback.format_exc())) | |
512 | break | |
513 | else: | |
514 | raise | |
95fedbf8 | 515 | |
0214ce7c | 516 | if len(videos or []) > 1 and self.fixed_template(): |
95fedbf8 FV |
517 | raise SameFileError(self.params['outtmpl']) |
518 | ||
59ae15a5 PH |
519 | for video in videos or []: |
520 | video['extractor'] = ie.IE_NAME | |
521 | try: | |
522 | self.increment_downloads() | |
523 | self.process_info(video) | |
524 | except UnavailableVideoError: | |
525 | self.trouble(u'\nERROR: unable to download video') | |
526 | ||
527 | # Suitable InfoExtractor had been found; go to next URL | |
528 | break | |
529 | ||
530 | if not suitable_found: | |
531 | self.trouble(u'ERROR: no suitable InfoExtractor: %s' % url) | |
532 | ||
533 | return self._download_retcode | |
534 | ||
535 | def post_process(self, filename, ie_info): | |
7851b379 | 536 | """Run all the postprocessors on the given file.""" |
59ae15a5 PH |
537 | info = dict(ie_info) |
538 | info['filepath'] = filename | |
7851b379 | 539 | keep_video = None |
59ae15a5 | 540 | for pp in self._pps: |
7851b379 PH |
541 | try: |
542 | keep_video_wish,new_info = pp.run(info) | |
543 | if keep_video_wish is not None: | |
544 | if keep_video_wish: | |
545 | keep_video = keep_video_wish | |
546 | elif keep_video is None: | |
547 | # No clear decision yet, let IE decide | |
548 | keep_video = keep_video_wish | |
549 | except PostProcessingError as e: | |
550 | self.to_stderr(u'ERROR: ' + e.msg) | |
1d16b0c3 | 551 | if keep_video is False and not self.params.get('keepvideo', False): |
7851b379 PH |
552 | try: |
553 | self.to_stderr(u'Deleting original file %s (pass -k to keep)' % filename) | |
554 | os.remove(encodeFilename(filename)) | |
555 | except (IOError, OSError): | |
556 | self.to_stderr(u'WARNING: Unable to remove downloaded video file') | |
59ae15a5 | 557 | |
f5ebb614 | 558 | def _download_with_rtmpdump(self, filename, url, player_url, page_url): |
59ae15a5 PH |
559 | self.report_destination(filename) |
560 | tmpfilename = self.temp_name(filename) | |
561 | ||
562 | # Check for rtmpdump first | |
563 | try: | |
564 | subprocess.call(['rtmpdump', '-h'], stdout=(file(os.path.devnull, 'w')), stderr=subprocess.STDOUT) | |
565 | except (OSError, IOError): | |
566 | self.trouble(u'ERROR: RTMP download detected but "rtmpdump" could not be run') | |
567 | return False | |
568 | ||
569 | # Download using rtmpdump. rtmpdump returns exit code 2 when | |
570 | # the connection was interrumpted and resuming appears to be | |
571 | # possible. This is part of rtmpdump's normal usage, AFAIK. | |
f5ebb614 PH |
572 | basic_args = ['rtmpdump', '-q', '-r', url, '-o', tmpfilename] |
573 | if player_url is not None: | |
574 | basic_args += ['-W', player_url] | |
575 | if page_url is not None: | |
576 | basic_args += ['--pageUrl', page_url] | |
59ae15a5 PH |
577 | args = basic_args + [[], ['-e', '-k', '1']][self.params.get('continuedl', False)] |
578 | if self.params.get('verbose', False): | |
579 | try: | |
580 | import pipes | |
581 | shell_quote = lambda args: ' '.join(map(pipes.quote, args)) | |
582 | except ImportError: | |
583 | shell_quote = repr | |
584 | self.to_screen(u'[debug] rtmpdump command line: ' + shell_quote(args)) | |
585 | retval = subprocess.call(args) | |
586 | while retval == 2 or retval == 1: | |
587 | prevsize = os.path.getsize(encodeFilename(tmpfilename)) | |
588 | self.to_screen(u'\r[rtmpdump] %s bytes' % prevsize, skip_eol=True) | |
589 | time.sleep(5.0) # This seems to be needed | |
590 | retval = subprocess.call(basic_args + ['-e'] + [[], ['-k', '1']][retval == 1]) | |
591 | cursize = os.path.getsize(encodeFilename(tmpfilename)) | |
592 | if prevsize == cursize and retval == 1: | |
593 | break | |
594 | # Some rtmp streams seem abort after ~ 99.8%. Don't complain for those | |
595 | if prevsize == cursize and retval == 2 and cursize > 1024: | |
596 | self.to_screen(u'\r[rtmpdump] Could not download the whole video. This can happen for some advertisements.') | |
597 | retval = 0 | |
598 | break | |
599 | if retval == 0: | |
bffbd5f0 PH |
600 | fsize = os.path.getsize(encodeFilename(tmpfilename)) |
601 | self.to_screen(u'\r[rtmpdump] %s bytes' % fsize) | |
59ae15a5 | 602 | self.try_rename(tmpfilename, filename) |
bffbd5f0 PH |
603 | self._hook_progress({ |
604 | 'downloaded_bytes': fsize, | |
605 | 'total_bytes': fsize, | |
606 | 'filename': filename, | |
607 | 'status': 'finished', | |
608 | }) | |
59ae15a5 PH |
609 | return True |
610 | else: | |
611 | self.trouble(u'\nERROR: rtmpdump exited with code %d' % retval) | |
612 | return False | |
613 | ||
614 | def _do_download(self, filename, info_dict): | |
615 | url = info_dict['url'] | |
59ae15a5 PH |
616 | |
617 | # Check file already present | |
618 | if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False): | |
619 | self.report_file_already_downloaded(filename) | |
bffbd5f0 PH |
620 | self._hook_progress({ |
621 | 'filename': filename, | |
622 | 'status': 'finished', | |
623 | }) | |
59ae15a5 PH |
624 | return True |
625 | ||
626 | # Attempt to download using rtmpdump | |
627 | if url.startswith('rtmp'): | |
f5ebb614 PH |
628 | return self._download_with_rtmpdump(filename, url, |
629 | info_dict.get('player_url', None), | |
630 | info_dict.get('page_url', None)) | |
59ae15a5 PH |
631 | |
632 | tmpfilename = self.temp_name(filename) | |
633 | stream = None | |
634 | ||
635 | # Do not include the Accept-Encoding header | |
636 | headers = {'Youtubedl-no-compression': 'True'} | |
3446dfb7 PH |
637 | if 'user_agent' in info_dict: |
638 | headers['Youtubedl-user-agent'] = info_dict['user_agent'] | |
59ae15a5 PH |
639 | basic_request = compat_urllib_request.Request(url, None, headers) |
640 | request = compat_urllib_request.Request(url, None, headers) | |
641 | ||
37c8fd48 FV |
642 | if self.params.get('test', False): |
643 | request.add_header('Range','bytes=0-10240') | |
644 | ||
59ae15a5 PH |
645 | # Establish possible resume length |
646 | if os.path.isfile(encodeFilename(tmpfilename)): | |
647 | resume_len = os.path.getsize(encodeFilename(tmpfilename)) | |
648 | else: | |
649 | resume_len = 0 | |
650 | ||
651 | open_mode = 'wb' | |
652 | if resume_len != 0: | |
653 | if self.params.get('continuedl', False): | |
654 | self.report_resuming_byte(resume_len) | |
655 | request.add_header('Range','bytes=%d-' % resume_len) | |
656 | open_mode = 'ab' | |
657 | else: | |
658 | resume_len = 0 | |
659 | ||
660 | count = 0 | |
661 | retries = self.params.get('retries', 0) | |
662 | while count <= retries: | |
663 | # Establish connection | |
664 | try: | |
665 | if count == 0 and 'urlhandle' in info_dict: | |
666 | data = info_dict['urlhandle'] | |
667 | data = compat_urllib_request.urlopen(request) | |
668 | break | |
669 | except (compat_urllib_error.HTTPError, ) as err: | |
670 | if (err.code < 500 or err.code >= 600) and err.code != 416: | |
671 | # Unexpected HTTP error | |
672 | raise | |
673 | elif err.code == 416: | |
674 | # Unable to resume (requested range not satisfiable) | |
675 | try: | |
676 | # Open the connection again without the range header | |
677 | data = compat_urllib_request.urlopen(basic_request) | |
678 | content_length = data.info()['Content-Length'] | |
679 | except (compat_urllib_error.HTTPError, ) as err: | |
680 | if err.code < 500 or err.code >= 600: | |
681 | raise | |
682 | else: | |
683 | # Examine the reported length | |
684 | if (content_length is not None and | |
685 | (resume_len - 100 < int(content_length) < resume_len + 100)): | |
686 | # The file had already been fully downloaded. | |
687 | # Explanation to the above condition: in issue #175 it was revealed that | |
688 | # YouTube sometimes adds or removes a few bytes from the end of the file, | |
689 | # changing the file size slightly and causing problems for some users. So | |
690 | # I decided to implement a suggested change and consider the file | |
691 | # completely downloaded if the file size differs less than 100 bytes from | |
692 | # the one in the hard drive. | |
693 | self.report_file_already_downloaded(filename) | |
694 | self.try_rename(tmpfilename, filename) | |
bffbd5f0 PH |
695 | self._hook_progress({ |
696 | 'filename': filename, | |
697 | 'status': 'finished', | |
698 | }) | |
59ae15a5 PH |
699 | return True |
700 | else: | |
701 | # The length does not match, we start the download over | |
702 | self.report_unable_to_resume() | |
703 | open_mode = 'wb' | |
704 | break | |
705 | # Retry | |
706 | count += 1 | |
707 | if count <= retries: | |
708 | self.report_retry(count, retries) | |
709 | ||
710 | if count > retries: | |
711 | self.trouble(u'ERROR: giving up after %s retries' % retries) | |
712 | return False | |
713 | ||
714 | data_len = data.info().get('Content-length', None) | |
715 | if data_len is not None: | |
716 | data_len = int(data_len) + resume_len | |
9e982f9e JC |
717 | min_data_len = self.params.get("min_filesize", None) |
718 | max_data_len = self.params.get("max_filesize", None) | |
719 | if min_data_len is not None and data_len < min_data_len: | |
720 | self.to_screen(u'\r[download] File is smaller than min-filesize (%s bytes < %s bytes). Aborting.' % (data_len, min_data_len)) | |
721 | return False | |
722 | if max_data_len is not None and data_len > max_data_len: | |
723 | self.to_screen(u'\r[download] File is larger than max-filesize (%s bytes > %s bytes). Aborting.' % (data_len, max_data_len)) | |
724 | return False | |
725 | ||
59ae15a5 PH |
726 | data_len_str = self.format_bytes(data_len) |
727 | byte_counter = 0 + resume_len | |
728 | block_size = self.params.get('buffersize', 1024) | |
729 | start = time.time() | |
730 | while True: | |
731 | # Download and write | |
732 | before = time.time() | |
733 | data_block = data.read(block_size) | |
734 | after = time.time() | |
735 | if len(data_block) == 0: | |
736 | break | |
737 | byte_counter += len(data_block) | |
738 | ||
739 | # Open file just in time | |
740 | if stream is None: | |
741 | try: | |
742 | (stream, tmpfilename) = sanitize_open(tmpfilename, open_mode) | |
743 | assert stream is not None | |
744 | filename = self.undo_temp_name(tmpfilename) | |
745 | self.report_destination(filename) | |
746 | except (OSError, IOError) as err: | |
747 | self.trouble(u'ERROR: unable to open for writing: %s' % str(err)) | |
748 | return False | |
749 | try: | |
750 | stream.write(data_block) | |
751 | except (IOError, OSError) as err: | |
752 | self.trouble(u'\nERROR: unable to write data: %s' % str(err)) | |
753 | return False | |
754 | if not self.params.get('noresizebuffer', False): | |
755 | block_size = self.best_block_size(after - before, len(data_block)) | |
756 | ||
757 | # Progress message | |
758 | speed_str = self.calc_speed(start, time.time(), byte_counter - resume_len) | |
759 | if data_len is None: | |
760 | self.report_progress('Unknown %', data_len_str, speed_str, 'Unknown ETA') | |
761 | else: | |
762 | percent_str = self.calc_percent(byte_counter, data_len) | |
763 | eta_str = self.calc_eta(start, time.time(), data_len - resume_len, byte_counter - resume_len) | |
764 | self.report_progress(percent_str, data_len_str, speed_str, eta_str) | |
765 | ||
bffbd5f0 PH |
766 | self._hook_progress({ |
767 | 'downloaded_bytes': byte_counter, | |
768 | 'total_bytes': data_len, | |
769 | 'tmpfilename': tmpfilename, | |
770 | 'filename': filename, | |
771 | 'status': 'downloading', | |
772 | }) | |
773 | ||
59ae15a5 PH |
774 | # Apply rate limit |
775 | self.slow_down(start, byte_counter - resume_len) | |
776 | ||
777 | if stream is None: | |
778 | self.trouble(u'\nERROR: Did not get any data blocks') | |
779 | return False | |
780 | stream.close() | |
781 | self.report_finish() | |
782 | if data_len is not None and byte_counter != data_len: | |
783 | raise ContentTooShortError(byte_counter, int(data_len)) | |
784 | self.try_rename(tmpfilename, filename) | |
785 | ||
786 | # Update file modification time | |
787 | if self.params.get('updatetime', True): | |
788 | info_dict['filetime'] = self.try_utime(filename, data.info().get('last-modified', None)) | |
789 | ||
bffbd5f0 PH |
790 | self._hook_progress({ |
791 | 'downloaded_bytes': byte_counter, | |
792 | 'total_bytes': byte_counter, | |
793 | 'filename': filename, | |
794 | 'status': 'finished', | |
795 | }) | |
796 | ||
59ae15a5 | 797 | return True |
bffbd5f0 PH |
798 | |
799 | def _hook_progress(self, status): | |
800 | for ph in self._progress_hooks: | |
801 | ph(status) | |
802 | ||
803 | def add_progress_hook(self, ph): | |
804 | """ ph gets called on download progress, with a dictionary with the entries | |
805 | * filename: The final filename | |
806 | * status: One of "downloading" and "finished" | |
807 | ||
808 | It can also have some of the following entries: | |
809 | ||
810 | * downloaded_bytes: Bytes on disks | |
811 | * total_bytes: Total bytes, None if unknown | |
812 | * tmpfilename: The filename we're currently writing to | |
813 | ||
814 | Hooks are guaranteed to be called at least once (with status "finished") | |
815 | if the download is successful. | |
816 | """ | |
817 | self._progress_hooks.append(ph) |