]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
release 2013.11.22.2
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de 7import io
8694c600 8import json
8222d8de
JMF
9import os
10import re
11import shutil
12import socket
13import sys
14import time
15import traceback
16
1e5b9a95
PH
17if os.name == 'nt':
18 import ctypes
19
ce02ed60
PH
20from .utils import (
21 compat_http_client,
22 compat_print,
23 compat_str,
24 compat_urllib_error,
25 compat_urllib_request,
26 ContentTooShortError,
27 date_from_str,
28 DateRange,
29 determine_ext,
30 DownloadError,
31 encodeFilename,
32 ExtractorError,
33 locked_file,
34 MaxDownloadsReached,
35 PostProcessingError,
36 preferredencoding,
37 SameFileError,
38 sanitize_filename,
39 subtitles_filename,
40 takewhile_inclusive,
41 UnavailableVideoError,
42 write_json_file,
43 write_string,
44)
023fa8c4 45from .extractor import get_info_extractor, gen_extractors
8222d8de
JMF
46from .FileDownloader import FileDownloader
47
48
49class YoutubeDL(object):
50 """YoutubeDL class.
51
52 YoutubeDL objects are the ones responsible of downloading the
53 actual video file and writing it to disk if the user has requested
54 it, among some other tasks. In most cases there should be one per
55 program. As, given a video URL, the downloader doesn't know how to
56 extract all the needed information, task that InfoExtractors do, it
57 has to pass the URL to one of them.
58
59 For this, YoutubeDL objects have a method that allows
60 InfoExtractors to be registered in a given order. When it is passed
61 a URL, the YoutubeDL object handles it to the first InfoExtractor it
62 finds that reports being able to handle it. The InfoExtractor extracts
63 all the information about the video or videos the URL refers to, and
64 YoutubeDL process the extracted information, possibly using a File
65 Downloader to download the video.
66
67 YoutubeDL objects accept a lot of parameters. In order not to saturate
68 the object constructor with arguments, it receives a dictionary of
69 options instead. These options are available through the params
70 attribute for the InfoExtractors to use. The YoutubeDL also
71 registers itself as the downloader in charge for the InfoExtractors
72 that are added to it, so this is a "mutual registration".
73
74 Available options:
75
76 username: Username for authentication purposes.
77 password: Password for authentication purposes.
c6c19746 78 videopassword: Password for acces a video.
8222d8de
JMF
79 usenetrc: Use netrc for authentication instead.
80 verbose: Print additional info to stdout.
81 quiet: Do not print messages to stdout.
82 forceurl: Force printing final URL.
83 forcetitle: Force printing title.
84 forceid: Force printing ID.
85 forcethumbnail: Force printing thumbnail URL.
86 forcedescription: Force printing description.
87 forcefilename: Force printing final filename.
8694c600 88 forcejson: Force printing info_dict as JSON.
8222d8de
JMF
89 simulate: Do not download the video files.
90 format: Video format code.
91 format_limit: Highest quality format to try.
92 outtmpl: Template for output names.
93 restrictfilenames: Do not allow "&" and spaces in file names
94 ignoreerrors: Do not stop on download errors.
95 nooverwrites: Prevent overwriting files.
96 playliststart: Playlist item to start at.
97 playlistend: Playlist item to end at.
98 matchtitle: Download only matching titles.
99 rejecttitle: Reject downloads for matching titles.
100 logtostderr: Log messages to stderr instead of stdout.
101 writedescription: Write the video description to a .description file
102 writeinfojson: Write the video description to a .info.json file
1fb07d10 103 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
104 writethumbnail: Write the thumbnail image to a file
105 writesubtitles: Write the video subtitles to a file
b004821f 106 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 107 allsubtitles: Downloads all the subtitles of the video
0b7f3118 108 (requires writesubtitles or writeautomaticsub)
8222d8de 109 listsubtitles: Lists all available subtitles for the video
b98a6b2f 110 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 111 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
112 keepvideo: Keep the video file after post-processing
113 daterange: A DateRange object, download only if the upload_date is in the range.
114 skip_download: Skip the actual download of the video file
c35f9e72 115 cachedir: Location of the cache files in the filesystem.
c3c88a26 116 None to disable filesystem cache.
47192f92 117 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
118 age_limit: An integer representing the user's age in years.
119 Unsuitable videos for the given age are skipped.
c1c9a79c
PH
120 downloadarchive: File name of a file where all downloads are recorded.
121 Videos already present in the file are not downloaded
122 again.
fe7e0c98 123
8222d8de
JMF
124 The following parameters are not used by YoutubeDL itself, they are used by
125 the FileDownloader:
126 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
127 noresizebuffer, retries, continuedl, noprogress, consoletitle
128 """
129
130 params = None
131 _ies = []
132 _pps = []
133 _download_retcode = None
134 _num_downloads = None
135 _screen_file = None
136
137 def __init__(self, params):
138 """Create a FileDownloader object with the given options."""
139 self._ies = []
56c73665 140 self._ies_instances = {}
8222d8de
JMF
141 self._pps = []
142 self._progress_hooks = []
143 self._download_retcode = 0
144 self._num_downloads = 0
145 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
34308b30
PH
146
147 if (sys.version_info >= (3,) and sys.platform != 'win32' and
148 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
149 and not params['restrictfilenames']):
150 # On Python 3, the Unicode filesystem API will throw errors (#1474)
151 self.report_warning(
1d368c75 152 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
153 u'cannot encode all charactes. '
154 u'Set the LC_ALL environment variable to fix this.')
155 params['restrictfilenames'] = True
156
8222d8de
JMF
157 self.params = params
158 self.fd = FileDownloader(self, self.params)
159
160 if '%(stitle)s' in self.params['outtmpl']:
161 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
162
163 def add_info_extractor(self, ie):
164 """Add an InfoExtractor object to the end of the list."""
165 self._ies.append(ie)
56c73665 166 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
167 ie.set_downloader(self)
168
56c73665
JMF
169 def get_info_extractor(self, ie_key):
170 """
171 Get an instance of an IE with name ie_key, it will try to get one from
172 the _ies list, if there's no instance it will create a new one and add
173 it to the extractor list.
174 """
175 ie = self._ies_instances.get(ie_key)
176 if ie is None:
177 ie = get_info_extractor(ie_key)()
178 self.add_info_extractor(ie)
179 return ie
180
023fa8c4
JMF
181 def add_default_info_extractors(self):
182 """
183 Add the InfoExtractors returned by gen_extractors to the end of the list
184 """
185 for ie in gen_extractors():
186 self.add_info_extractor(ie)
187
8222d8de
JMF
188 def add_post_processor(self, pp):
189 """Add a PostProcessor object to the end of the chain."""
190 self._pps.append(pp)
191 pp.set_downloader(self)
192
193 def to_screen(self, message, skip_eol=False):
194 """Print message to stdout if not in quiet mode."""
8222d8de
JMF
195 if not self.params.get('quiet', False):
196 terminator = [u'\n', u''][skip_eol]
197 output = message + terminator
7459e3a2 198 write_string(output, self._screen_file)
8222d8de
JMF
199
200 def to_stderr(self, message):
201 """Print message to stderr."""
202 assert type(message) == type(u'')
203 output = message + u'\n'
204 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
205 output = output.encode(preferredencoding())
206 sys.stderr.write(output)
207
1e5b9a95
PH
208 def to_console_title(self, message):
209 if not self.params.get('consoletitle', False):
210 return
211 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
212 # c_wchar_p() might not be necessary if `message` is
213 # already of type unicode()
214 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
215 elif 'TERM' in os.environ:
749febf4 216 write_string(u'\033]0;%s\007' % message, self._screen_file)
1e5b9a95 217
bdde425c
PH
218 def save_console_title(self):
219 if not self.params.get('consoletitle', False):
220 return
221 if 'TERM' in os.environ:
efd6c574
JMF
222 # Save the title on stack
223 write_string(u'\033[22;0t', self._screen_file)
bdde425c
PH
224
225 def restore_console_title(self):
226 if not self.params.get('consoletitle', False):
227 return
228 if 'TERM' in os.environ:
efd6c574
JMF
229 # Restore the title from stack
230 write_string(u'\033[23;0t', self._screen_file)
bdde425c
PH
231
232 def __enter__(self):
233 self.save_console_title()
234 return self
235
236 def __exit__(self, *args):
237 self.restore_console_title()
238
8222d8de
JMF
239 def fixed_template(self):
240 """Checks if the output template is fixed."""
241 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
242
243 def trouble(self, message=None, tb=None):
244 """Determine action to take when a download problem appears.
245
246 Depending on if the downloader has been configured to ignore
247 download errors or not, this method may throw an exception or
248 not when errors are found, after printing the message.
249
250 tb, if given, is additional traceback information.
251 """
252 if message is not None:
253 self.to_stderr(message)
254 if self.params.get('verbose'):
255 if tb is None:
256 if sys.exc_info()[0]: # if .trouble has been called from an except block
257 tb = u''
258 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
259 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
260 tb += compat_str(traceback.format_exc())
261 else:
262 tb_data = traceback.format_list(traceback.extract_stack())
263 tb = u''.join(tb_data)
264 self.to_stderr(tb)
265 if not self.params.get('ignoreerrors', False):
266 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
267 exc_info = sys.exc_info()[1].exc_info
268 else:
269 exc_info = sys.exc_info()
270 raise DownloadError(message, exc_info)
271 self._download_retcode = 1
272
273 def report_warning(self, message):
274 '''
275 Print the message to stderr, it will be prefixed with 'WARNING:'
276 If stderr is a tty file the 'WARNING:' will be colored
277 '''
278 if sys.stderr.isatty() and os.name != 'nt':
fe7e0c98 279 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 280 else:
fe7e0c98
JMF
281 _msg_header = u'WARNING:'
282 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
283 self.to_stderr(warning_message)
284
285 def report_error(self, message, tb=None):
286 '''
287 Do the same as trouble, but prefixes the message with 'ERROR:', colored
288 in red if stderr is a tty file.
289 '''
290 if sys.stderr.isatty() and os.name != 'nt':
291 _msg_header = u'\033[0;31mERROR:\033[0m'
292 else:
293 _msg_header = u'ERROR:'
294 error_message = u'%s %s' % (_msg_header, message)
295 self.trouble(error_message, tb)
296
8222d8de
JMF
297 def report_writedescription(self, descfn):
298 """ Report that the description file is being written """
299 self.to_screen(u'[info] Writing video description to: ' + descfn)
300
301 def report_writesubtitles(self, sub_filename):
302 """ Report that the subtitles file is being written """
303 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
304
305 def report_writeinfojson(self, infofn):
306 """ Report that the metadata file has been written """
307 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
308
1fb07d10
JG
309 def report_writeannotations(self, annofn):
310 """ Report that the annotations file has been written. """
311 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
312
8222d8de
JMF
313 def report_file_already_downloaded(self, file_name):
314 """Report file has already been fully downloaded."""
315 try:
316 self.to_screen(u'[download] %s has already been downloaded' % file_name)
ce02ed60 317 except UnicodeEncodeError:
8222d8de
JMF
318 self.to_screen(u'[download] The file has already been downloaded')
319
320 def increment_downloads(self):
321 """Increment the ordinal that assigns a number to each file."""
322 self._num_downloads += 1
323
324 def prepare_filename(self, info_dict):
325 """Generate the output filename."""
326 try:
327 template_dict = dict(info_dict)
328
329 template_dict['epoch'] = int(time.time())
330 autonumber_size = self.params.get('autonumber_size')
331 if autonumber_size is None:
332 autonumber_size = 5
333 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
334 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 335 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
336 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
337
586a91b6 338 sanitize = lambda k, v: sanitize_filename(
8222d8de
JMF
339 u'NA' if v is None else compat_str(v),
340 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
341 is_id=(k == u'id'))
342 template_dict = dict((k, sanitize(k, v))
343 for k, v in template_dict.items())
8222d8de 344
586a91b6
PH
345 tmpl = os.path.expanduser(self.params['outtmpl'])
346 filename = tmpl % template_dict
8222d8de
JMF
347 return filename
348 except KeyError as err:
349 self.report_error(u'Erroneous output template')
350 return None
351 except ValueError as err:
4efba05c 352 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
353 return None
354
355 def _match_entry(self, info_dict):
356 """ Returns None iff the file should be downloaded """
357
7012b23c
PH
358 if 'title' in info_dict:
359 # This can happen when we're just evaluating the playlist
360 title = info_dict['title']
361 matchtitle = self.params.get('matchtitle', False)
362 if matchtitle:
363 if not re.search(matchtitle, title, re.IGNORECASE):
364 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
365 rejecttitle = self.params.get('rejecttitle', False)
366 if rejecttitle:
367 if re.search(rejecttitle, title, re.IGNORECASE):
368 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
369 date = info_dict.get('upload_date', None)
370 if date is not None:
371 dateRange = self.params.get('daterange', DateRange())
372 if date not in dateRange:
373 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
374 age_limit = self.params.get('age_limit')
375 if age_limit is not None:
cfadd183 376 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 377 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 378 if self.in_download_archive(info_dict):
7012b23c
PH
379 return (u'%s has already been recorded in archive'
380 % info_dict.get('title', info_dict.get('id', u'video')))
8222d8de 381 return None
fe7e0c98 382
b6c45014
JMF
383 @staticmethod
384 def add_extra_info(info_dict, extra_info):
385 '''Set the keys from extra_info in info dict if they are missing'''
386 for key, value in extra_info.items():
387 info_dict.setdefault(key, value)
388
8222d8de
JMF
389 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
390 '''
391 Returns a list with a dictionary for each video we find.
392 If 'download', also downloads the videos.
393 extra_info is a dict containing the extra values to add to each result
394 '''
fe7e0c98 395
8222d8de 396 if ie_key:
56c73665 397 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
398 else:
399 ies = self._ies
400
401 for ie in ies:
402 if not ie.suitable(url):
403 continue
404
405 if not ie.working():
406 self.report_warning(u'The program functionality for this site has been marked as broken, '
407 u'and will probably not work.')
408
409 try:
410 ie_result = ie.extract(url)
411 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
412 break
413 if isinstance(ie_result, list):
414 # Backwards compatibility: old IE result format
8222d8de
JMF
415 ie_result = {
416 '_type': 'compat_list',
417 'entries': ie_result,
418 }
9103bbc5
JMF
419 self.add_extra_info(ie_result,
420 {
421 'extractor': ie.IE_NAME,
be97abc2
JMF
422 'webpage_url': url,
423 'extractor_key': ie.ie_key(),
9103bbc5 424 })
b6c45014 425 return self.process_ie_result(ie_result, download, extra_info)
8222d8de
JMF
426 except ExtractorError as de: # An error we somewhat expected
427 self.report_error(compat_str(de), de.format_traceback())
428 break
429 except Exception as e:
430 if self.params.get('ignoreerrors', False):
431 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
432 break
433 else:
434 raise
435 else:
436 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 437
8222d8de
JMF
438 def process_ie_result(self, ie_result, download=True, extra_info={}):
439 """
440 Take the result of the ie(may be modified) and resolve all unresolved
441 references (URLs, playlist items).
442
443 It will also download the videos if 'download'.
444 Returns the resolved ie_result.
445 """
446
447 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
448 if result_type == 'video':
b6c45014 449 self.add_extra_info(ie_result, extra_info)
feee2ecf 450 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
451 elif result_type == 'url':
452 # We have to add extra_info to the results because it may be
453 # contained in a playlist
454 return self.extract_info(ie_result['url'],
455 download,
456 ie_key=ie_result.get('ie_key'),
457 extra_info=extra_info)
458 elif result_type == 'playlist':
7012b23c 459
8222d8de
JMF
460 # We process each entry in the playlist
461 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 462 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
463
464 playlist_results = []
465
466 n_all_entries = len(ie_result['entries'])
467 playliststart = self.params.get('playliststart', 1) - 1
468 playlistend = self.params.get('playlistend', -1)
469
470 if playlistend == -1:
471 entries = ie_result['entries'][playliststart:]
472 else:
473 entries = ie_result['entries'][playliststart:playlistend]
474
475 n_entries = len(entries)
476
477 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
478 (ie_result['extractor'], playlist, n_all_entries, n_entries))
479
fe7e0c98
JMF
480 for i, entry in enumerate(entries, 1):
481 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 482 extra = {
fe7e0c98
JMF
483 'playlist': playlist,
484 'playlist_index': i + playliststart,
b6c45014 485 'extractor': ie_result['extractor'],
9103bbc5 486 'webpage_url': ie_result['webpage_url'],
be97abc2 487 'extractor_key': ie_result['extractor_key'],
fe7e0c98 488 }
7012b23c
PH
489
490 reason = self._match_entry(entry)
491 if reason is not None:
492 self.to_screen(u'[download] ' + reason)
493 continue
494
8222d8de
JMF
495 entry_result = self.process_ie_result(entry,
496 download=download,
497 extra_info=extra)
498 playlist_results.append(entry_result)
499 ie_result['entries'] = playlist_results
500 return ie_result
501 elif result_type == 'compat_list':
502 def _fixup(r):
b6c45014 503 self.add_extra_info(r,
9103bbc5
JMF
504 {
505 'extractor': ie_result['extractor'],
506 'webpage_url': ie_result['webpage_url'],
be97abc2 507 'extractor_key': ie_result['extractor_key'],
9103bbc5 508 })
8222d8de
JMF
509 return r
510 ie_result['entries'] = [
b6c45014 511 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
512 for r in ie_result['entries']
513 ]
514 return ie_result
515 else:
516 raise Exception('Invalid result type: %s' % result_type)
517
a9c58ad9
JMF
518 def select_format(self, format_spec, available_formats):
519 if format_spec == 'best' or format_spec is None:
520 return available_formats[-1]
521 elif format_spec == 'worst':
522 return available_formats[0]
523 else:
49e86983
JMF
524 extensions = [u'mp4', u'flv', u'webm', u'3gp']
525 if format_spec in extensions:
526 filter_f = lambda f: f['ext'] == format_spec
527 else:
528 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 529 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
530 if matches:
531 return matches[-1]
532 return None
533
dd82ffea
JMF
534 def process_video_result(self, info_dict, download=True):
535 assert info_dict.get('_type', 'video') == 'video'
536
537 if 'playlist' not in info_dict:
538 # It isn't part of a playlist
539 info_dict['playlist'] = None
540 info_dict['playlist_index'] = None
541
6ff000b8 542 # This extractors handle format selection themselves
a7685f3b 543 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
544 if download:
545 self.process_info(info_dict)
6ff000b8
JMF
546 return info_dict
547
dd82ffea
JMF
548 # We now pick which formats have to be downloaded
549 if info_dict.get('formats') is None:
550 # There's only one format available
551 formats = [info_dict]
552 else:
553 formats = info_dict['formats']
554
555 # We check that all the formats have the format and format_id fields
556 for (i, format) in enumerate(formats):
dd82ffea 557 if format.get('format_id') is None:
8016c922 558 format['format_id'] = compat_str(i)
8c51aa65
JMF
559 if format.get('format') is None:
560 format['format'] = u'{id} - {res}{note}'.format(
561 id=format['format_id'],
562 res=self.format_resolution(format),
71934988 563 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 564 )
c1002e96
PH
565 # Automatically determine file extension if missing
566 if 'ext' not in format:
567 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
568
569 if self.params.get('listformats', None):
570 self.list_formats(info_dict)
571 return
572
99e206d5
JMF
573 format_limit = self.params.get('format_limit', None)
574 if format_limit:
f4d96df0
PH
575 formats = list(takewhile_inclusive(
576 lambda f: f['format_id'] != format_limit, formats
577 ))
e028d0d1
JMF
578 if self.params.get('prefer_free_formats'):
579 def _free_formats_key(f):
580 try:
581 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
582 except ValueError:
583 ext_ord = -1
584 # We only compare the extension if they have the same height and width
585 return (f.get('height'), f.get('width'), ext_ord)
586 formats = sorted(formats, key=_free_formats_key)
99e206d5 587
dd82ffea 588 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
589 if req_format is None:
590 req_format = 'best'
dd82ffea 591 formats_to_download = []
dd82ffea 592 # The -1 is for supporting YoutubeIE
a9c58ad9 593 if req_format in ('-1', 'all'):
dd82ffea
JMF
594 formats_to_download = formats
595 else:
a9c58ad9 596 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 597 # the first that is available, starting from left
dd82ffea
JMF
598 req_formats = req_format.split('/')
599 for rf in req_formats:
a9c58ad9
JMF
600 selected_format = self.select_format(rf, formats)
601 if selected_format is not None:
602 formats_to_download = [selected_format]
dd82ffea
JMF
603 break
604 if not formats_to_download:
78a3a9f8
PH
605 raise ExtractorError(u'requested format not available',
606 expected=True)
dd82ffea
JMF
607
608 if download:
609 if len(formats_to_download) > 1:
610 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
611 for format in formats_to_download:
612 new_info = dict(info_dict)
613 new_info.update(format)
614 self.process_info(new_info)
615 # We update the info dict with the best quality format (backwards compatibility)
616 info_dict.update(formats_to_download[-1])
617 return info_dict
618
8222d8de
JMF
619 def process_info(self, info_dict):
620 """Process a single resolved IE result."""
621
622 assert info_dict.get('_type', 'video') == 'video'
623 #We increment the download the download count here to match the previous behaviour.
624 self.increment_downloads()
625
626 info_dict['fulltitle'] = info_dict['title']
627 if len(info_dict['title']) > 200:
628 info_dict['title'] = info_dict['title'][:197] + u'...'
629
630 # Keep for backwards compatibility
631 info_dict['stitle'] = info_dict['title']
632
633 if not 'format' in info_dict:
634 info_dict['format'] = info_dict['ext']
635
636 reason = self._match_entry(info_dict)
637 if reason is not None:
638 self.to_screen(u'[download] ' + reason)
639 return
640
641 max_downloads = self.params.get('max_downloads')
642 if max_downloads is not None:
643 if self._num_downloads > int(max_downloads):
644 raise MaxDownloadsReached()
645
646 filename = self.prepare_filename(info_dict)
647
648 # Forced printings
649 if self.params.get('forcetitle', False):
00ea0f11 650 compat_print(info_dict['fulltitle'])
8222d8de
JMF
651 if self.params.get('forceid', False):
652 compat_print(info_dict['id'])
653 if self.params.get('forceurl', False):
edde6c56
PH
654 # For RTMP URLs, also include the playpath
655 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 656 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
8222d8de 657 compat_print(info_dict['thumbnail'])
216d71d0 658 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
8222d8de
JMF
659 compat_print(info_dict['description'])
660 if self.params.get('forcefilename', False) and filename is not None:
661 compat_print(filename)
662 if self.params.get('forceformat', False):
663 compat_print(info_dict['format'])
9d153818
MF
664 if self.params.get('forcejson', False):
665 compat_print(json.dumps(info_dict))
8222d8de
JMF
666
667 # Do nothing else if in simulate mode
668 if self.params.get('simulate', False):
669 return
670
671 if filename is None:
672 return
673
674 try:
675 dn = os.path.dirname(encodeFilename(filename))
676 if dn != '' and not os.path.exists(dn):
677 os.makedirs(dn)
678 except (OSError, IOError) as err:
679 self.report_error(u'unable to create directory ' + compat_str(err))
680 return
681
682 if self.params.get('writedescription', False):
683 try:
684 descfn = filename + u'.description'
685 self.report_writedescription(descfn)
686 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
687 descfile.write(info_dict['description'])
b3f0e530 688 except (KeyError, TypeError):
535f59bb 689 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
690 except (OSError, IOError):
691 self.report_error(u'Cannot write description file ' + descfn)
692 return
693
1fb07d10
JG
694 if self.params.get('writeannotations', False):
695 try:
fe7e0c98
JMF
696 annofn = filename + u'.annotations.xml'
697 self.report_writeannotations(annofn)
698 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
699 annofile.write(info_dict['annotations'])
1fb07d10
JG
700 except (KeyError, TypeError):
701 self.report_warning(u'There are no annotations to write.')
702 except (OSError, IOError):
fe7e0c98
JMF
703 self.report_error(u'Cannot write annotations file: ' + annofn)
704 return
1fb07d10 705
c4a91be7 706 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 707 self.params.get('writeautomaticsub')])
c4a91be7 708
fe7e0c98 709 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
710 # subtitles download errors are already managed as troubles in relevant IE
711 # that way it will silently go on when used with unsupporting IE
8222d8de 712 subtitles = info_dict['subtitles']
ca715127 713 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
714 for sub_lang in subtitles.keys():
715 sub = subtitles[sub_lang]
6804038d
JMF
716 if sub is None:
717 continue
8222d8de 718 try:
d4051a8e 719 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
720 self.report_writesubtitles(sub_filename)
721 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 722 subfile.write(sub)
8222d8de
JMF
723 except (OSError, IOError):
724 self.report_error(u'Cannot write subtitles file ' + descfn)
725 return
726
8222d8de 727 if self.params.get('writeinfojson', False):
9771cceb 728 infofn = os.path.splitext(filename)[0] + u'.info.json'
8222d8de
JMF
729 self.report_writeinfojson(infofn)
730 try:
fe7e0c98 731 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
732 write_json_file(json_info_dict, encodeFilename(infofn))
733 except (OSError, IOError):
734 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
735 return
736
737 if self.params.get('writethumbnail', False):
d8269e1d 738 if info_dict.get('thumbnail') is not None:
cbdbb766 739 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
740 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
741 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
742 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
743 try:
744 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
745 with open(thumb_filename, 'wb') as thumbf:
746 shutil.copyfileobj(uf, thumbf)
747 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
748 (info_dict['extractor'], info_dict['id'], thumb_filename))
749 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
750 self.report_warning(u'Unable to download thumbnail "%s": %s' %
751 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
752
753 if not self.params.get('skip_download', False):
754 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
755 success = True
756 else:
757 try:
758 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
759 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
760 self.report_error(u'unable to download video data: %s' % str(err))
761 return
c40c6aaa
JMF
762 except (OSError, IOError) as err:
763 raise UnavailableVideoError(err)
8222d8de
JMF
764 except (ContentTooShortError, ) as err:
765 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
766 return
767
768 if success:
769 try:
770 self.post_process(filename, info_dict)
771 except (PostProcessingError) as err:
772 self.report_error(u'postprocessing: %s' % str(err))
773 return
774
c1c9a79c
PH
775 self.record_download_archive(info_dict)
776
8222d8de
JMF
777 def download(self, url_list):
778 """Download a given list of URLs."""
779 if len(url_list) > 1 and self.fixed_template():
780 raise SameFileError(self.params['outtmpl'])
781
782 for url in url_list:
783 try:
784 #It also downloads the videos
785 videos = self.extract_info(url)
786 except UnavailableVideoError:
787 self.report_error(u'unable to download video')
788 except MaxDownloadsReached:
789 self.to_screen(u'[info] Maximum number of downloaded files reached.')
790 raise
791
792 return self._download_retcode
793
794 def post_process(self, filename, ie_info):
795 """Run all the postprocessors on the given file."""
796 info = dict(ie_info)
797 info['filepath'] = filename
798 keep_video = None
799 for pp in self._pps:
800 try:
fe7e0c98 801 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
802 if keep_video_wish is not None:
803 if keep_video_wish:
804 keep_video = keep_video_wish
805 elif keep_video is None:
806 # No clear decision yet, let IE decide
807 keep_video = keep_video_wish
808 except PostProcessingError as e:
bbcbf4d4 809 self.report_error(e.msg)
8222d8de
JMF
810 if keep_video is False and not self.params.get('keepvideo', False):
811 try:
812 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
813 os.remove(encodeFilename(filename))
814 except (IOError, OSError):
815 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c
PH
816
817 def in_download_archive(self, info_dict):
818 fn = self.params.get('download_archive')
819 if fn is None:
820 return False
7012b23c
PH
821 extractor = info_dict.get('extractor_id')
822 if extractor is None:
823 if 'id' in info_dict:
824 extractor = info_dict.get('ie_key') # key in a playlist
825 if extractor is None:
826 return False # Incomplete video information
827 # Future-proof against any change in case
828 # and backwards compatibility with prior versions
829 extractor = extractor.lower()
830 vid_id = extractor + u' ' + info_dict['id']
c1c9a79c
PH
831 try:
832 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
833 for line in archive_file:
834 if line.strip() == vid_id:
835 return True
836 except IOError as ioe:
837 if ioe.errno != errno.ENOENT:
838 raise
839 return False
840
841 def record_download_archive(self, info_dict):
842 fn = self.params.get('download_archive')
843 if fn is None:
844 return
845 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
846 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
847 archive_file.write(vid_id + u'\n')
dd82ffea 848
8c51aa65 849 @staticmethod
8abeeb94 850 def format_resolution(format, default='unknown'):
57dd9a8f
PH
851 if format.get('_resolution') is not None:
852 return format['_resolution']
8c51aa65
JMF
853 if format.get('height') is not None:
854 if format.get('width') is not None:
855 res = u'%sx%s' % (format['width'], format['height'])
856 else:
857 res = u'%sp' % format['height']
858 else:
8abeeb94 859 res = default
8c51aa65
JMF
860 return res
861
dd82ffea 862 def list_formats(self, info_dict):
91c7271a
PH
863 def format_note(fdict):
864 if fdict.get('format_note') is not None:
865 return fdict['format_note']
866 res = u''
867 if fdict.get('vcodec') is not None:
7150858d
PH
868 res += u'%-5s' % fdict['vcodec']
869 elif fdict.get('vbr') is not None:
870 res += u'video'
91c7271a
PH
871 if fdict.get('vbr') is not None:
872 res += u'@%4dk' % fdict['vbr']
873 if fdict.get('acodec') is not None:
874 if res:
875 res += u', '
7150858d
PH
876 res += u'%-5s' % fdict['acodec']
877 elif fdict.get('abr') is not None:
878 if res:
879 res += u', '
880 res += 'audio'
91c7271a
PH
881 if fdict.get('abr') is not None:
882 res += u'@%3dk' % fdict['abr']
883 return res
884
57dd9a8f 885 def line(format):
897d6cc4 886 return (u'%-20s%-10s%-12s%s' % (
8c51aa65
JMF
887 format['format_id'],
888 format['ext'],
8c51aa65 889 self.format_resolution(format),
91c7271a 890 format_note(format),
8c51aa65
JMF
891 )
892 )
57dd9a8f 893
94badb25
PH
894 formats = info_dict.get('formats', [info_dict])
895 formats_s = list(map(line, formats))
896 if len(formats) > 1:
b5349e87
PH
897 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
898 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
899
900 header_line = line({
901 'format_id': u'format code', 'ext': u'extension',
902 '_resolution': u'resolution', 'format_note': u'note'})
903 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
904 (info_dict['id'], header_line, u"\n".join(formats_s)))