]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
release 2013.11.17
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import sys
13 import time
14 import traceback
15
16 if os.name == 'nt':
17 import ctypes
18
19 from .utils import (
20 compat_http_client,
21 compat_print,
22 compat_str,
23 compat_urllib_error,
24 compat_urllib_request,
25 ContentTooShortError,
26 date_from_str,
27 DateRange,
28 determine_ext,
29 DownloadError,
30 encodeFilename,
31 ExtractorError,
32 locked_file,
33 MaxDownloadsReached,
34 PostProcessingError,
35 preferredencoding,
36 SameFileError,
37 sanitize_filename,
38 subtitles_filename,
39 takewhile_inclusive,
40 UnavailableVideoError,
41 write_json_file,
42 write_string,
43 )
44 from .extractor import get_info_extractor, gen_extractors
45 from .FileDownloader import FileDownloader
46
47
48 class YoutubeDL(object):
49 """YoutubeDL class.
50
51 YoutubeDL objects are the ones responsible of downloading the
52 actual video file and writing it to disk if the user has requested
53 it, among some other tasks. In most cases there should be one per
54 program. As, given a video URL, the downloader doesn't know how to
55 extract all the needed information, task that InfoExtractors do, it
56 has to pass the URL to one of them.
57
58 For this, YoutubeDL objects have a method that allows
59 InfoExtractors to be registered in a given order. When it is passed
60 a URL, the YoutubeDL object handles it to the first InfoExtractor it
61 finds that reports being able to handle it. The InfoExtractor extracts
62 all the information about the video or videos the URL refers to, and
63 YoutubeDL process the extracted information, possibly using a File
64 Downloader to download the video.
65
66 YoutubeDL objects accept a lot of parameters. In order not to saturate
67 the object constructor with arguments, it receives a dictionary of
68 options instead. These options are available through the params
69 attribute for the InfoExtractors to use. The YoutubeDL also
70 registers itself as the downloader in charge for the InfoExtractors
71 that are added to it, so this is a "mutual registration".
72
73 Available options:
74
75 username: Username for authentication purposes.
76 password: Password for authentication purposes.
77 videopassword: Password for acces a video.
78 usenetrc: Use netrc for authentication instead.
79 verbose: Print additional info to stdout.
80 quiet: Do not print messages to stdout.
81 forceurl: Force printing final URL.
82 forcetitle: Force printing title.
83 forceid: Force printing ID.
84 forcethumbnail: Force printing thumbnail URL.
85 forcedescription: Force printing description.
86 forcefilename: Force printing final filename.
87 simulate: Do not download the video files.
88 format: Video format code.
89 format_limit: Highest quality format to try.
90 outtmpl: Template for output names.
91 restrictfilenames: Do not allow "&" and spaces in file names
92 ignoreerrors: Do not stop on download errors.
93 nooverwrites: Prevent overwriting files.
94 playliststart: Playlist item to start at.
95 playlistend: Playlist item to end at.
96 matchtitle: Download only matching titles.
97 rejecttitle: Reject downloads for matching titles.
98 logtostderr: Log messages to stderr instead of stdout.
99 writedescription: Write the video description to a .description file
100 writeinfojson: Write the video description to a .info.json file
101 writeannotations: Write the video annotations to a .annotations.xml file
102 writethumbnail: Write the thumbnail image to a file
103 writesubtitles: Write the video subtitles to a file
104 writeautomaticsub: Write the automatic subtitles to a file
105 allsubtitles: Downloads all the subtitles of the video
106 (requires writesubtitles or writeautomaticsub)
107 listsubtitles: Lists all available subtitles for the video
108 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
109 subtitleslangs: List of languages of the subtitles to download
110 keepvideo: Keep the video file after post-processing
111 daterange: A DateRange object, download only if the upload_date is in the range.
112 skip_download: Skip the actual download of the video file
113 cachedir: Location of the cache files in the filesystem.
114 None to disable filesystem cache.
115 noplaylist: Download single video instead of a playlist if in doubt.
116 age_limit: An integer representing the user's age in years.
117 Unsuitable videos for the given age are skipped.
118 downloadarchive: File name of a file where all downloads are recorded.
119 Videos already present in the file are not downloaded
120 again.
121
122 The following parameters are not used by YoutubeDL itself, they are used by
123 the FileDownloader:
124 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
125 noresizebuffer, retries, continuedl, noprogress, consoletitle
126 """
127
128 params = None
129 _ies = []
130 _pps = []
131 _download_retcode = None
132 _num_downloads = None
133 _screen_file = None
134
135 def __init__(self, params):
136 """Create a FileDownloader object with the given options."""
137 self._ies = []
138 self._ies_instances = {}
139 self._pps = []
140 self._progress_hooks = []
141 self._download_retcode = 0
142 self._num_downloads = 0
143 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
144
145 if (sys.version_info >= (3,) and sys.platform != 'win32' and
146 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
147 and not params['restrictfilenames']):
148 # On Python 3, the Unicode filesystem API will throw errors (#1474)
149 self.report_warning(
150 u'Assuming --restrict-filenames since file system encoding '
151 u'cannot encode all charactes. '
152 u'Set the LC_ALL environment variable to fix this.')
153 params['restrictfilenames'] = True
154
155 self.params = params
156 self.fd = FileDownloader(self, self.params)
157
158 if '%(stitle)s' in self.params['outtmpl']:
159 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
160
161 def add_info_extractor(self, ie):
162 """Add an InfoExtractor object to the end of the list."""
163 self._ies.append(ie)
164 self._ies_instances[ie.ie_key()] = ie
165 ie.set_downloader(self)
166
167 def get_info_extractor(self, ie_key):
168 """
169 Get an instance of an IE with name ie_key, it will try to get one from
170 the _ies list, if there's no instance it will create a new one and add
171 it to the extractor list.
172 """
173 ie = self._ies_instances.get(ie_key)
174 if ie is None:
175 ie = get_info_extractor(ie_key)()
176 self.add_info_extractor(ie)
177 return ie
178
179 def add_default_info_extractors(self):
180 """
181 Add the InfoExtractors returned by gen_extractors to the end of the list
182 """
183 for ie in gen_extractors():
184 self.add_info_extractor(ie)
185
186 def add_post_processor(self, pp):
187 """Add a PostProcessor object to the end of the chain."""
188 self._pps.append(pp)
189 pp.set_downloader(self)
190
191 def to_screen(self, message, skip_eol=False):
192 """Print message to stdout if not in quiet mode."""
193 if not self.params.get('quiet', False):
194 terminator = [u'\n', u''][skip_eol]
195 output = message + terminator
196 write_string(output, self._screen_file)
197
198 def to_stderr(self, message):
199 """Print message to stderr."""
200 assert type(message) == type(u'')
201 output = message + u'\n'
202 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
203 output = output.encode(preferredencoding())
204 sys.stderr.write(output)
205
206 def to_console_title(self, message):
207 if not self.params.get('consoletitle', False):
208 return
209 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
210 # c_wchar_p() might not be necessary if `message` is
211 # already of type unicode()
212 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
213 elif 'TERM' in os.environ:
214 write_string(u'\033]0;%s\007' % message, self._screen_file)
215
216 def save_console_title(self):
217 if not self.params.get('consoletitle', False):
218 return
219 if 'TERM' in os.environ:
220 write_string(u'\033[22t', self._screen_file)
221
222 def restore_console_title(self):
223 if not self.params.get('consoletitle', False):
224 return
225 if 'TERM' in os.environ:
226 write_string(u'\033[23t', self._screen_file)
227
228 def __enter__(self):
229 self.save_console_title()
230 return self
231
232 def __exit__(self, *args):
233 self.restore_console_title()
234
235 def fixed_template(self):
236 """Checks if the output template is fixed."""
237 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
238
239 def trouble(self, message=None, tb=None):
240 """Determine action to take when a download problem appears.
241
242 Depending on if the downloader has been configured to ignore
243 download errors or not, this method may throw an exception or
244 not when errors are found, after printing the message.
245
246 tb, if given, is additional traceback information.
247 """
248 if message is not None:
249 self.to_stderr(message)
250 if self.params.get('verbose'):
251 if tb is None:
252 if sys.exc_info()[0]: # if .trouble has been called from an except block
253 tb = u''
254 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
255 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
256 tb += compat_str(traceback.format_exc())
257 else:
258 tb_data = traceback.format_list(traceback.extract_stack())
259 tb = u''.join(tb_data)
260 self.to_stderr(tb)
261 if not self.params.get('ignoreerrors', False):
262 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
263 exc_info = sys.exc_info()[1].exc_info
264 else:
265 exc_info = sys.exc_info()
266 raise DownloadError(message, exc_info)
267 self._download_retcode = 1
268
269 def report_warning(self, message):
270 '''
271 Print the message to stderr, it will be prefixed with 'WARNING:'
272 If stderr is a tty file the 'WARNING:' will be colored
273 '''
274 if sys.stderr.isatty() and os.name != 'nt':
275 _msg_header = u'\033[0;33mWARNING:\033[0m'
276 else:
277 _msg_header = u'WARNING:'
278 warning_message = u'%s %s' % (_msg_header, message)
279 self.to_stderr(warning_message)
280
281 def report_error(self, message, tb=None):
282 '''
283 Do the same as trouble, but prefixes the message with 'ERROR:', colored
284 in red if stderr is a tty file.
285 '''
286 if sys.stderr.isatty() and os.name != 'nt':
287 _msg_header = u'\033[0;31mERROR:\033[0m'
288 else:
289 _msg_header = u'ERROR:'
290 error_message = u'%s %s' % (_msg_header, message)
291 self.trouble(error_message, tb)
292
293 def report_writedescription(self, descfn):
294 """ Report that the description file is being written """
295 self.to_screen(u'[info] Writing video description to: ' + descfn)
296
297 def report_writesubtitles(self, sub_filename):
298 """ Report that the subtitles file is being written """
299 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
300
301 def report_writeinfojson(self, infofn):
302 """ Report that the metadata file has been written """
303 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
304
305 def report_writeannotations(self, annofn):
306 """ Report that the annotations file has been written. """
307 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
308
309 def report_file_already_downloaded(self, file_name):
310 """Report file has already been fully downloaded."""
311 try:
312 self.to_screen(u'[download] %s has already been downloaded' % file_name)
313 except UnicodeEncodeError:
314 self.to_screen(u'[download] The file has already been downloaded')
315
316 def increment_downloads(self):
317 """Increment the ordinal that assigns a number to each file."""
318 self._num_downloads += 1
319
320 def prepare_filename(self, info_dict):
321 """Generate the output filename."""
322 try:
323 template_dict = dict(info_dict)
324
325 template_dict['epoch'] = int(time.time())
326 autonumber_size = self.params.get('autonumber_size')
327 if autonumber_size is None:
328 autonumber_size = 5
329 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
330 template_dict['autonumber'] = autonumber_templ % self._num_downloads
331 if template_dict.get('playlist_index') is not None:
332 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
333
334 sanitize = lambda k, v: sanitize_filename(
335 u'NA' if v is None else compat_str(v),
336 restricted=self.params.get('restrictfilenames'),
337 is_id=(k == u'id'))
338 template_dict = dict((k, sanitize(k, v))
339 for k, v in template_dict.items())
340
341 tmpl = os.path.expanduser(self.params['outtmpl'])
342 filename = tmpl % template_dict
343 return filename
344 except KeyError as err:
345 self.report_error(u'Erroneous output template')
346 return None
347 except ValueError as err:
348 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
349 return None
350
351 def _match_entry(self, info_dict):
352 """ Returns None iff the file should be downloaded """
353
354 title = info_dict['title']
355 matchtitle = self.params.get('matchtitle', False)
356 if matchtitle:
357 if not re.search(matchtitle, title, re.IGNORECASE):
358 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
359 rejecttitle = self.params.get('rejecttitle', False)
360 if rejecttitle:
361 if re.search(rejecttitle, title, re.IGNORECASE):
362 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
363 date = info_dict.get('upload_date', None)
364 if date is not None:
365 dateRange = self.params.get('daterange', DateRange())
366 if date not in dateRange:
367 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
368 age_limit = self.params.get('age_limit')
369 if age_limit is not None:
370 if age_limit < info_dict.get('age_limit', 0):
371 return u'Skipping "' + title + '" because it is age restricted'
372 if self.in_download_archive(info_dict):
373 return (u'%(title)s has already been recorded in archive'
374 % info_dict)
375 return None
376
377 @staticmethod
378 def add_extra_info(info_dict, extra_info):
379 '''Set the keys from extra_info in info dict if they are missing'''
380 for key, value in extra_info.items():
381 info_dict.setdefault(key, value)
382
383 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
384 '''
385 Returns a list with a dictionary for each video we find.
386 If 'download', also downloads the videos.
387 extra_info is a dict containing the extra values to add to each result
388 '''
389
390 if ie_key:
391 ies = [self.get_info_extractor(ie_key)]
392 else:
393 ies = self._ies
394
395 for ie in ies:
396 if not ie.suitable(url):
397 continue
398
399 if not ie.working():
400 self.report_warning(u'The program functionality for this site has been marked as broken, '
401 u'and will probably not work.')
402
403 try:
404 ie_result = ie.extract(url)
405 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
406 break
407 if isinstance(ie_result, list):
408 # Backwards compatibility: old IE result format
409 ie_result = {
410 '_type': 'compat_list',
411 'entries': ie_result,
412 }
413 self.add_extra_info(ie_result,
414 {
415 'extractor': ie.IE_NAME,
416 'webpage_url': url,
417 'extractor_key': ie.ie_key(),
418 })
419 return self.process_ie_result(ie_result, download, extra_info)
420 except ExtractorError as de: # An error we somewhat expected
421 self.report_error(compat_str(de), de.format_traceback())
422 break
423 except Exception as e:
424 if self.params.get('ignoreerrors', False):
425 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
426 break
427 else:
428 raise
429 else:
430 self.report_error(u'no suitable InfoExtractor: %s' % url)
431
432 def process_ie_result(self, ie_result, download=True, extra_info={}):
433 """
434 Take the result of the ie(may be modified) and resolve all unresolved
435 references (URLs, playlist items).
436
437 It will also download the videos if 'download'.
438 Returns the resolved ie_result.
439 """
440
441 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
442 if result_type == 'video':
443 self.add_extra_info(ie_result, extra_info)
444 return self.process_video_result(ie_result, download=download)
445 elif result_type == 'url':
446 # We have to add extra_info to the results because it may be
447 # contained in a playlist
448 return self.extract_info(ie_result['url'],
449 download,
450 ie_key=ie_result.get('ie_key'),
451 extra_info=extra_info)
452 elif result_type == 'playlist':
453 self.add_extra_info(ie_result, extra_info)
454 # We process each entry in the playlist
455 playlist = ie_result.get('title', None) or ie_result.get('id', None)
456 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
457
458 playlist_results = []
459
460 n_all_entries = len(ie_result['entries'])
461 playliststart = self.params.get('playliststart', 1) - 1
462 playlistend = self.params.get('playlistend', -1)
463
464 if playlistend == -1:
465 entries = ie_result['entries'][playliststart:]
466 else:
467 entries = ie_result['entries'][playliststart:playlistend]
468
469 n_entries = len(entries)
470
471 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
472 (ie_result['extractor'], playlist, n_all_entries, n_entries))
473
474 for i, entry in enumerate(entries, 1):
475 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
476 extra = {
477 'playlist': playlist,
478 'playlist_index': i + playliststart,
479 'extractor': ie_result['extractor'],
480 'webpage_url': ie_result['webpage_url'],
481 'extractor_key': ie_result['extractor_key'],
482 }
483 entry_result = self.process_ie_result(entry,
484 download=download,
485 extra_info=extra)
486 playlist_results.append(entry_result)
487 ie_result['entries'] = playlist_results
488 return ie_result
489 elif result_type == 'compat_list':
490 def _fixup(r):
491 self.add_extra_info(r,
492 {
493 'extractor': ie_result['extractor'],
494 'webpage_url': ie_result['webpage_url'],
495 'extractor_key': ie_result['extractor_key'],
496 })
497 return r
498 ie_result['entries'] = [
499 self.process_ie_result(_fixup(r), download, extra_info)
500 for r in ie_result['entries']
501 ]
502 return ie_result
503 else:
504 raise Exception('Invalid result type: %s' % result_type)
505
506 def select_format(self, format_spec, available_formats):
507 if format_spec == 'best' or format_spec is None:
508 return available_formats[-1]
509 elif format_spec == 'worst':
510 return available_formats[0]
511 else:
512 extensions = [u'mp4', u'flv', u'webm', u'3gp']
513 if format_spec in extensions:
514 filter_f = lambda f: f['ext'] == format_spec
515 else:
516 filter_f = lambda f: f['format_id'] == format_spec
517 matches = list(filter(filter_f, available_formats))
518 if matches:
519 return matches[-1]
520 return None
521
522 def process_video_result(self, info_dict, download=True):
523 assert info_dict.get('_type', 'video') == 'video'
524
525 if 'playlist' not in info_dict:
526 # It isn't part of a playlist
527 info_dict['playlist'] = None
528 info_dict['playlist_index'] = None
529
530 # This extractors handle format selection themselves
531 if info_dict['extractor'] in [u'youtube', u'Youku']:
532 if download:
533 self.process_info(info_dict)
534 return info_dict
535
536 # We now pick which formats have to be downloaded
537 if info_dict.get('formats') is None:
538 # There's only one format available
539 formats = [info_dict]
540 else:
541 formats = info_dict['formats']
542
543 # We check that all the formats have the format and format_id fields
544 for (i, format) in enumerate(formats):
545 if format.get('format_id') is None:
546 format['format_id'] = compat_str(i)
547 if format.get('format') is None:
548 format['format'] = u'{id} - {res}{note}'.format(
549 id=format['format_id'],
550 res=self.format_resolution(format),
551 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
552 )
553 # Automatically determine file extension if missing
554 if 'ext' not in format:
555 format['ext'] = determine_ext(format['url'])
556
557 if self.params.get('listformats', None):
558 self.list_formats(info_dict)
559 return
560
561 format_limit = self.params.get('format_limit', None)
562 if format_limit:
563 formats = list(takewhile_inclusive(
564 lambda f: f['format_id'] != format_limit, formats
565 ))
566 if self.params.get('prefer_free_formats'):
567 def _free_formats_key(f):
568 try:
569 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
570 except ValueError:
571 ext_ord = -1
572 # We only compare the extension if they have the same height and width
573 return (f.get('height'), f.get('width'), ext_ord)
574 formats = sorted(formats, key=_free_formats_key)
575
576 req_format = self.params.get('format', 'best')
577 if req_format is None:
578 req_format = 'best'
579 formats_to_download = []
580 # The -1 is for supporting YoutubeIE
581 if req_format in ('-1', 'all'):
582 formats_to_download = formats
583 else:
584 # We can accept formats requestd in the format: 34/5/best, we pick
585 # the first that is available, starting from left
586 req_formats = req_format.split('/')
587 for rf in req_formats:
588 selected_format = self.select_format(rf, formats)
589 if selected_format is not None:
590 formats_to_download = [selected_format]
591 break
592 if not formats_to_download:
593 raise ExtractorError(u'requested format not available',
594 expected=True)
595
596 if download:
597 if len(formats_to_download) > 1:
598 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
599 for format in formats_to_download:
600 new_info = dict(info_dict)
601 new_info.update(format)
602 self.process_info(new_info)
603 # We update the info dict with the best quality format (backwards compatibility)
604 info_dict.update(formats_to_download[-1])
605 return info_dict
606
607 def process_info(self, info_dict):
608 """Process a single resolved IE result."""
609
610 assert info_dict.get('_type', 'video') == 'video'
611 #We increment the download the download count here to match the previous behaviour.
612 self.increment_downloads()
613
614 info_dict['fulltitle'] = info_dict['title']
615 if len(info_dict['title']) > 200:
616 info_dict['title'] = info_dict['title'][:197] + u'...'
617
618 # Keep for backwards compatibility
619 info_dict['stitle'] = info_dict['title']
620
621 if not 'format' in info_dict:
622 info_dict['format'] = info_dict['ext']
623
624 reason = self._match_entry(info_dict)
625 if reason is not None:
626 self.to_screen(u'[download] ' + reason)
627 return
628
629 max_downloads = self.params.get('max_downloads')
630 if max_downloads is not None:
631 if self._num_downloads > int(max_downloads):
632 raise MaxDownloadsReached()
633
634 filename = self.prepare_filename(info_dict)
635
636 # Forced printings
637 if self.params.get('forcetitle', False):
638 compat_print(info_dict['title'])
639 if self.params.get('forceid', False):
640 compat_print(info_dict['id'])
641 if self.params.get('forceurl', False):
642 # For RTMP URLs, also include the playpath
643 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
644 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
645 compat_print(info_dict['thumbnail'])
646 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
647 compat_print(info_dict['description'])
648 if self.params.get('forcefilename', False) and filename is not None:
649 compat_print(filename)
650 if self.params.get('forceformat', False):
651 compat_print(info_dict['format'])
652
653 # Do nothing else if in simulate mode
654 if self.params.get('simulate', False):
655 return
656
657 if filename is None:
658 return
659
660 try:
661 dn = os.path.dirname(encodeFilename(filename))
662 if dn != '' and not os.path.exists(dn):
663 os.makedirs(dn)
664 except (OSError, IOError) as err:
665 self.report_error(u'unable to create directory ' + compat_str(err))
666 return
667
668 if self.params.get('writedescription', False):
669 try:
670 descfn = filename + u'.description'
671 self.report_writedescription(descfn)
672 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
673 descfile.write(info_dict['description'])
674 except (KeyError, TypeError):
675 self.report_warning(u'There\'s no description to write.')
676 except (OSError, IOError):
677 self.report_error(u'Cannot write description file ' + descfn)
678 return
679
680 if self.params.get('writeannotations', False):
681 try:
682 annofn = filename + u'.annotations.xml'
683 self.report_writeannotations(annofn)
684 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
685 annofile.write(info_dict['annotations'])
686 except (KeyError, TypeError):
687 self.report_warning(u'There are no annotations to write.')
688 except (OSError, IOError):
689 self.report_error(u'Cannot write annotations file: ' + annofn)
690 return
691
692 subtitles_are_requested = any([self.params.get('writesubtitles', False),
693 self.params.get('writeautomaticsub')])
694
695 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
696 # subtitles download errors are already managed as troubles in relevant IE
697 # that way it will silently go on when used with unsupporting IE
698 subtitles = info_dict['subtitles']
699 sub_format = self.params.get('subtitlesformat', 'srt')
700 for sub_lang in subtitles.keys():
701 sub = subtitles[sub_lang]
702 if sub is None:
703 continue
704 try:
705 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
706 self.report_writesubtitles(sub_filename)
707 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
708 subfile.write(sub)
709 except (OSError, IOError):
710 self.report_error(u'Cannot write subtitles file ' + descfn)
711 return
712
713 if self.params.get('writeinfojson', False):
714 infofn = filename + u'.info.json'
715 self.report_writeinfojson(infofn)
716 try:
717 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
718 write_json_file(json_info_dict, encodeFilename(infofn))
719 except (OSError, IOError):
720 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
721 return
722
723 if self.params.get('writethumbnail', False):
724 if info_dict.get('thumbnail') is not None:
725 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
726 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
727 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
728 (info_dict['extractor'], info_dict['id']))
729 try:
730 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
731 with open(thumb_filename, 'wb') as thumbf:
732 shutil.copyfileobj(uf, thumbf)
733 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
734 (info_dict['extractor'], info_dict['id'], thumb_filename))
735 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
736 self.report_warning(u'Unable to download thumbnail "%s": %s' %
737 (info_dict['thumbnail'], compat_str(err)))
738
739 if not self.params.get('skip_download', False):
740 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
741 success = True
742 else:
743 try:
744 success = self.fd._do_download(filename, info_dict)
745 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
746 self.report_error(u'unable to download video data: %s' % str(err))
747 return
748 except (OSError, IOError) as err:
749 raise UnavailableVideoError(err)
750 except (ContentTooShortError, ) as err:
751 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
752 return
753
754 if success:
755 try:
756 self.post_process(filename, info_dict)
757 except (PostProcessingError) as err:
758 self.report_error(u'postprocessing: %s' % str(err))
759 return
760
761 self.record_download_archive(info_dict)
762
763 def download(self, url_list):
764 """Download a given list of URLs."""
765 if len(url_list) > 1 and self.fixed_template():
766 raise SameFileError(self.params['outtmpl'])
767
768 for url in url_list:
769 try:
770 #It also downloads the videos
771 videos = self.extract_info(url)
772 except UnavailableVideoError:
773 self.report_error(u'unable to download video')
774 except MaxDownloadsReached:
775 self.to_screen(u'[info] Maximum number of downloaded files reached.')
776 raise
777
778 return self._download_retcode
779
780 def post_process(self, filename, ie_info):
781 """Run all the postprocessors on the given file."""
782 info = dict(ie_info)
783 info['filepath'] = filename
784 keep_video = None
785 for pp in self._pps:
786 try:
787 keep_video_wish, new_info = pp.run(info)
788 if keep_video_wish is not None:
789 if keep_video_wish:
790 keep_video = keep_video_wish
791 elif keep_video is None:
792 # No clear decision yet, let IE decide
793 keep_video = keep_video_wish
794 except PostProcessingError as e:
795 self.report_error(e.msg)
796 if keep_video is False and not self.params.get('keepvideo', False):
797 try:
798 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
799 os.remove(encodeFilename(filename))
800 except (IOError, OSError):
801 self.report_warning(u'Unable to remove downloaded video file')
802
803 def in_download_archive(self, info_dict):
804 fn = self.params.get('download_archive')
805 if fn is None:
806 return False
807 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
808 try:
809 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
810 for line in archive_file:
811 if line.strip() == vid_id:
812 return True
813 except IOError as ioe:
814 if ioe.errno != errno.ENOENT:
815 raise
816 return False
817
818 def record_download_archive(self, info_dict):
819 fn = self.params.get('download_archive')
820 if fn is None:
821 return
822 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
823 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
824 archive_file.write(vid_id + u'\n')
825
826 @staticmethod
827 def format_resolution(format, default='unknown'):
828 if format.get('_resolution') is not None:
829 return format['_resolution']
830 if format.get('height') is not None:
831 if format.get('width') is not None:
832 res = u'%sx%s' % (format['width'], format['height'])
833 else:
834 res = u'%sp' % format['height']
835 else:
836 res = default
837 return res
838
839 def list_formats(self, info_dict):
840 def format_note(fdict):
841 if fdict.get('format_note') is not None:
842 return fdict['format_note']
843 res = u''
844 if fdict.get('vcodec') is not None:
845 res += u'%-5s' % fdict['vcodec']
846 elif fdict.get('vbr') is not None:
847 res += u'video'
848 if fdict.get('vbr') is not None:
849 res += u'@%4dk' % fdict['vbr']
850 if fdict.get('acodec') is not None:
851 if res:
852 res += u', '
853 res += u'%-5s' % fdict['acodec']
854 elif fdict.get('abr') is not None:
855 if res:
856 res += u', '
857 res += 'audio'
858 if fdict.get('abr') is not None:
859 res += u'@%3dk' % fdict['abr']
860 return res
861
862 def line(format):
863 return (u'%-20s%-10s%-12s%s' % (
864 format['format_id'],
865 format['ext'],
866 self.format_resolution(format),
867 format_note(format),
868 )
869 )
870
871 formats = info_dict.get('formats', [info_dict])
872 formats_s = list(map(line, formats))
873 if len(formats) > 1:
874 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
875 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
876
877 header_line = line({
878 'format_id': u'format code', 'ext': u'extension',
879 '_resolution': u'resolution', 'format_note': u'note'})
880 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
881 (info_dict['id'], header_line, u"\n".join(formats_s)))