]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Simplify logger code(#1811)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import json
9 import os
10 import re
11 import shutil
12 import socket
13 import sys
14 import time
15 import traceback
16
17 if os.name == 'nt':
18 import ctypes
19
20 from .utils import (
21 compat_http_client,
22 compat_print,
23 compat_str,
24 compat_urllib_error,
25 compat_urllib_request,
26 ContentTooShortError,
27 date_from_str,
28 DateRange,
29 determine_ext,
30 DownloadError,
31 encodeFilename,
32 ExtractorError,
33 locked_file,
34 MaxDownloadsReached,
35 PostProcessingError,
36 preferredencoding,
37 SameFileError,
38 sanitize_filename,
39 subtitles_filename,
40 takewhile_inclusive,
41 UnavailableVideoError,
42 write_json_file,
43 write_string,
44 )
45 from .extractor import get_info_extractor, gen_extractors
46 from .FileDownloader import FileDownloader
47
48
49 class YoutubeDL(object):
50 """YoutubeDL class.
51
52 YoutubeDL objects are the ones responsible of downloading the
53 actual video file and writing it to disk if the user has requested
54 it, among some other tasks. In most cases there should be one per
55 program. As, given a video URL, the downloader doesn't know how to
56 extract all the needed information, task that InfoExtractors do, it
57 has to pass the URL to one of them.
58
59 For this, YoutubeDL objects have a method that allows
60 InfoExtractors to be registered in a given order. When it is passed
61 a URL, the YoutubeDL object handles it to the first InfoExtractor it
62 finds that reports being able to handle it. The InfoExtractor extracts
63 all the information about the video or videos the URL refers to, and
64 YoutubeDL process the extracted information, possibly using a File
65 Downloader to download the video.
66
67 YoutubeDL objects accept a lot of parameters. In order not to saturate
68 the object constructor with arguments, it receives a dictionary of
69 options instead. These options are available through the params
70 attribute for the InfoExtractors to use. The YoutubeDL also
71 registers itself as the downloader in charge for the InfoExtractors
72 that are added to it, so this is a "mutual registration".
73
74 Available options:
75
76 username: Username for authentication purposes.
77 password: Password for authentication purposes.
78 videopassword: Password for acces a video.
79 usenetrc: Use netrc for authentication instead.
80 verbose: Print additional info to stdout.
81 quiet: Do not print messages to stdout.
82 forceurl: Force printing final URL.
83 forcetitle: Force printing title.
84 forceid: Force printing ID.
85 forcethumbnail: Force printing thumbnail URL.
86 forcedescription: Force printing description.
87 forcefilename: Force printing final filename.
88 forcejson: Force printing info_dict as JSON.
89 simulate: Do not download the video files.
90 format: Video format code.
91 format_limit: Highest quality format to try.
92 outtmpl: Template for output names.
93 restrictfilenames: Do not allow "&" and spaces in file names
94 ignoreerrors: Do not stop on download errors.
95 nooverwrites: Prevent overwriting files.
96 playliststart: Playlist item to start at.
97 playlistend: Playlist item to end at.
98 matchtitle: Download only matching titles.
99 rejecttitle: Reject downloads for matching titles.
100 logger: Log messages to a logging.Logger instance.
101 logtostderr: Log messages to stderr instead of stdout.
102 writedescription: Write the video description to a .description file
103 writeinfojson: Write the video description to a .info.json file
104 writeannotations: Write the video annotations to a .annotations.xml file
105 writethumbnail: Write the thumbnail image to a file
106 writesubtitles: Write the video subtitles to a file
107 writeautomaticsub: Write the automatic subtitles to a file
108 allsubtitles: Downloads all the subtitles of the video
109 (requires writesubtitles or writeautomaticsub)
110 listsubtitles: Lists all available subtitles for the video
111 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
112 subtitleslangs: List of languages of the subtitles to download
113 keepvideo: Keep the video file after post-processing
114 daterange: A DateRange object, download only if the upload_date is in the range.
115 skip_download: Skip the actual download of the video file
116 cachedir: Location of the cache files in the filesystem.
117 None to disable filesystem cache.
118 noplaylist: Download single video instead of a playlist if in doubt.
119 age_limit: An integer representing the user's age in years.
120 Unsuitable videos for the given age are skipped.
121 downloadarchive: File name of a file where all downloads are recorded.
122 Videos already present in the file are not downloaded
123 again.
124
125 The following parameters are not used by YoutubeDL itself, they are used by
126 the FileDownloader:
127 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
128 noresizebuffer, retries, continuedl, noprogress, consoletitle
129 """
130
131 params = None
132 _ies = []
133 _pps = []
134 _download_retcode = None
135 _num_downloads = None
136 _screen_file = None
137
138 def __init__(self, params):
139 """Create a FileDownloader object with the given options."""
140 self._ies = []
141 self._ies_instances = {}
142 self._pps = []
143 self._progress_hooks = []
144 self._download_retcode = 0
145 self._num_downloads = 0
146 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
147
148 if (sys.version_info >= (3,) and sys.platform != 'win32' and
149 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
150 and not params['restrictfilenames']):
151 # On Python 3, the Unicode filesystem API will throw errors (#1474)
152 self.report_warning(
153 u'Assuming --restrict-filenames since file system encoding '
154 u'cannot encode all charactes. '
155 u'Set the LC_ALL environment variable to fix this.')
156 params['restrictfilenames'] = True
157
158 self.params = params
159 self.fd = FileDownloader(self, self.params)
160
161 if '%(stitle)s' in self.params['outtmpl']:
162 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
163
164 def add_info_extractor(self, ie):
165 """Add an InfoExtractor object to the end of the list."""
166 self._ies.append(ie)
167 self._ies_instances[ie.ie_key()] = ie
168 ie.set_downloader(self)
169
170 def get_info_extractor(self, ie_key):
171 """
172 Get an instance of an IE with name ie_key, it will try to get one from
173 the _ies list, if there's no instance it will create a new one and add
174 it to the extractor list.
175 """
176 ie = self._ies_instances.get(ie_key)
177 if ie is None:
178 ie = get_info_extractor(ie_key)()
179 self.add_info_extractor(ie)
180 return ie
181
182 def add_default_info_extractors(self):
183 """
184 Add the InfoExtractors returned by gen_extractors to the end of the list
185 """
186 for ie in gen_extractors():
187 self.add_info_extractor(ie)
188
189 def add_post_processor(self, pp):
190 """Add a PostProcessor object to the end of the chain."""
191 self._pps.append(pp)
192 pp.set_downloader(self)
193
194 def to_screen(self, message, skip_eol=False):
195 """Print message to stdout if not in quiet mode."""
196 if self.params.get('logger'):
197 self.params['logger'].debug(message)
198 elif not self.params.get('quiet', False):
199 terminator = [u'\n', u''][skip_eol]
200 output = message + terminator
201 write_string(output, self._screen_file)
202
203 def to_stderr(self, message):
204 """Print message to stderr."""
205 assert type(message) == type(u'')
206 if self.params.get('logger'):
207 self.params['logger'].error(message)
208 else:
209 output = message + u'\n'
210 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
211 output = output.encode(preferredencoding())
212 sys.stderr.write(output)
213
214 def to_console_title(self, message):
215 if not self.params.get('consoletitle', False):
216 return
217 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
218 # c_wchar_p() might not be necessary if `message` is
219 # already of type unicode()
220 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
221 elif 'TERM' in os.environ:
222 write_string(u'\033]0;%s\007' % message, self._screen_file)
223
224 def save_console_title(self):
225 if not self.params.get('consoletitle', False):
226 return
227 if 'TERM' in os.environ:
228 # Save the title on stack
229 write_string(u'\033[22;0t', self._screen_file)
230
231 def restore_console_title(self):
232 if not self.params.get('consoletitle', False):
233 return
234 if 'TERM' in os.environ:
235 # Restore the title from stack
236 write_string(u'\033[23;0t', self._screen_file)
237
238 def __enter__(self):
239 self.save_console_title()
240 return self
241
242 def __exit__(self, *args):
243 self.restore_console_title()
244
245 def fixed_template(self):
246 """Checks if the output template is fixed."""
247 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
248
249 def trouble(self, message=None, tb=None):
250 """Determine action to take when a download problem appears.
251
252 Depending on if the downloader has been configured to ignore
253 download errors or not, this method may throw an exception or
254 not when errors are found, after printing the message.
255
256 tb, if given, is additional traceback information.
257 """
258 if message is not None:
259 self.to_stderr(message)
260 if self.params.get('verbose'):
261 if tb is None:
262 if sys.exc_info()[0]: # if .trouble has been called from an except block
263 tb = u''
264 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
265 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
266 tb += compat_str(traceback.format_exc())
267 else:
268 tb_data = traceback.format_list(traceback.extract_stack())
269 tb = u''.join(tb_data)
270 self.to_stderr(tb)
271 if not self.params.get('ignoreerrors', False):
272 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
273 exc_info = sys.exc_info()[1].exc_info
274 else:
275 exc_info = sys.exc_info()
276 raise DownloadError(message, exc_info)
277 self._download_retcode = 1
278
279 def report_warning(self, message):
280 '''
281 Print the message to stderr, it will be prefixed with 'WARNING:'
282 If stderr is a tty file the 'WARNING:' will be colored
283 '''
284 if sys.stderr.isatty() and os.name != 'nt':
285 _msg_header = u'\033[0;33mWARNING:\033[0m'
286 else:
287 _msg_header = u'WARNING:'
288 warning_message = u'%s %s' % (_msg_header, message)
289 self.to_stderr(warning_message)
290
291 def report_error(self, message, tb=None):
292 '''
293 Do the same as trouble, but prefixes the message with 'ERROR:', colored
294 in red if stderr is a tty file.
295 '''
296 if sys.stderr.isatty() and os.name != 'nt':
297 _msg_header = u'\033[0;31mERROR:\033[0m'
298 else:
299 _msg_header = u'ERROR:'
300 error_message = u'%s %s' % (_msg_header, message)
301 self.trouble(error_message, tb)
302
303 def report_writedescription(self, descfn):
304 """ Report that the description file is being written """
305 self.to_screen(u'[info] Writing video description to: ' + descfn)
306
307 def report_writesubtitles(self, sub_filename):
308 """ Report that the subtitles file is being written """
309 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
310
311 def report_writeinfojson(self, infofn):
312 """ Report that the metadata file has been written """
313 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
314
315 def report_writeannotations(self, annofn):
316 """ Report that the annotations file has been written. """
317 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
318
319 def report_file_already_downloaded(self, file_name):
320 """Report file has already been fully downloaded."""
321 try:
322 self.to_screen(u'[download] %s has already been downloaded' % file_name)
323 except UnicodeEncodeError:
324 self.to_screen(u'[download] The file has already been downloaded')
325
326 def increment_downloads(self):
327 """Increment the ordinal that assigns a number to each file."""
328 self._num_downloads += 1
329
330 def prepare_filename(self, info_dict):
331 """Generate the output filename."""
332 try:
333 template_dict = dict(info_dict)
334
335 template_dict['epoch'] = int(time.time())
336 autonumber_size = self.params.get('autonumber_size')
337 if autonumber_size is None:
338 autonumber_size = 5
339 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
340 template_dict['autonumber'] = autonumber_templ % self._num_downloads
341 if template_dict.get('playlist_index') is not None:
342 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
343
344 sanitize = lambda k, v: sanitize_filename(
345 u'NA' if v is None else compat_str(v),
346 restricted=self.params.get('restrictfilenames'),
347 is_id=(k == u'id'))
348 template_dict = dict((k, sanitize(k, v))
349 for k, v in template_dict.items())
350
351 tmpl = os.path.expanduser(self.params['outtmpl'])
352 filename = tmpl % template_dict
353 return filename
354 except KeyError as err:
355 self.report_error(u'Erroneous output template')
356 return None
357 except ValueError as err:
358 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
359 return None
360
361 def _match_entry(self, info_dict):
362 """ Returns None iff the file should be downloaded """
363
364 if 'title' in info_dict:
365 # This can happen when we're just evaluating the playlist
366 title = info_dict['title']
367 matchtitle = self.params.get('matchtitle', False)
368 if matchtitle:
369 if not re.search(matchtitle, title, re.IGNORECASE):
370 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
371 rejecttitle = self.params.get('rejecttitle', False)
372 if rejecttitle:
373 if re.search(rejecttitle, title, re.IGNORECASE):
374 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
375 date = info_dict.get('upload_date', None)
376 if date is not None:
377 dateRange = self.params.get('daterange', DateRange())
378 if date not in dateRange:
379 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
380 age_limit = self.params.get('age_limit')
381 if age_limit is not None:
382 if age_limit < info_dict.get('age_limit', 0):
383 return u'Skipping "' + title + '" because it is age restricted'
384 if self.in_download_archive(info_dict):
385 return (u'%s has already been recorded in archive'
386 % info_dict.get('title', info_dict.get('id', u'video')))
387 return None
388
389 @staticmethod
390 def add_extra_info(info_dict, extra_info):
391 '''Set the keys from extra_info in info dict if they are missing'''
392 for key, value in extra_info.items():
393 info_dict.setdefault(key, value)
394
395 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
396 '''
397 Returns a list with a dictionary for each video we find.
398 If 'download', also downloads the videos.
399 extra_info is a dict containing the extra values to add to each result
400 '''
401
402 if ie_key:
403 ies = [self.get_info_extractor(ie_key)]
404 else:
405 ies = self._ies
406
407 for ie in ies:
408 if not ie.suitable(url):
409 continue
410
411 if not ie.working():
412 self.report_warning(u'The program functionality for this site has been marked as broken, '
413 u'and will probably not work.')
414
415 try:
416 ie_result = ie.extract(url)
417 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
418 break
419 if isinstance(ie_result, list):
420 # Backwards compatibility: old IE result format
421 ie_result = {
422 '_type': 'compat_list',
423 'entries': ie_result,
424 }
425 self.add_extra_info(ie_result,
426 {
427 'extractor': ie.IE_NAME,
428 'webpage_url': url,
429 'extractor_key': ie.ie_key(),
430 })
431 return self.process_ie_result(ie_result, download, extra_info)
432 except ExtractorError as de: # An error we somewhat expected
433 self.report_error(compat_str(de), de.format_traceback())
434 break
435 except Exception as e:
436 if self.params.get('ignoreerrors', False):
437 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
438 break
439 else:
440 raise
441 else:
442 self.report_error(u'no suitable InfoExtractor: %s' % url)
443
444 def process_ie_result(self, ie_result, download=True, extra_info={}):
445 """
446 Take the result of the ie(may be modified) and resolve all unresolved
447 references (URLs, playlist items).
448
449 It will also download the videos if 'download'.
450 Returns the resolved ie_result.
451 """
452
453 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
454 if result_type == 'video':
455 self.add_extra_info(ie_result, extra_info)
456 return self.process_video_result(ie_result, download=download)
457 elif result_type == 'url':
458 # We have to add extra_info to the results because it may be
459 # contained in a playlist
460 return self.extract_info(ie_result['url'],
461 download,
462 ie_key=ie_result.get('ie_key'),
463 extra_info=extra_info)
464 elif result_type == 'playlist':
465
466 # We process each entry in the playlist
467 playlist = ie_result.get('title', None) or ie_result.get('id', None)
468 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
469
470 playlist_results = []
471
472 n_all_entries = len(ie_result['entries'])
473 playliststart = self.params.get('playliststart', 1) - 1
474 playlistend = self.params.get('playlistend', -1)
475
476 if playlistend == -1:
477 entries = ie_result['entries'][playliststart:]
478 else:
479 entries = ie_result['entries'][playliststart:playlistend]
480
481 n_entries = len(entries)
482
483 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
484 (ie_result['extractor'], playlist, n_all_entries, n_entries))
485
486 for i, entry in enumerate(entries, 1):
487 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
488 extra = {
489 'playlist': playlist,
490 'playlist_index': i + playliststart,
491 'extractor': ie_result['extractor'],
492 'webpage_url': ie_result['webpage_url'],
493 'extractor_key': ie_result['extractor_key'],
494 }
495
496 reason = self._match_entry(entry)
497 if reason is not None:
498 self.to_screen(u'[download] ' + reason)
499 continue
500
501 entry_result = self.process_ie_result(entry,
502 download=download,
503 extra_info=extra)
504 playlist_results.append(entry_result)
505 ie_result['entries'] = playlist_results
506 return ie_result
507 elif result_type == 'compat_list':
508 def _fixup(r):
509 self.add_extra_info(r,
510 {
511 'extractor': ie_result['extractor'],
512 'webpage_url': ie_result['webpage_url'],
513 'extractor_key': ie_result['extractor_key'],
514 })
515 return r
516 ie_result['entries'] = [
517 self.process_ie_result(_fixup(r), download, extra_info)
518 for r in ie_result['entries']
519 ]
520 return ie_result
521 else:
522 raise Exception('Invalid result type: %s' % result_type)
523
524 def select_format(self, format_spec, available_formats):
525 if format_spec == 'best' or format_spec is None:
526 return available_formats[-1]
527 elif format_spec == 'worst':
528 return available_formats[0]
529 else:
530 extensions = [u'mp4', u'flv', u'webm', u'3gp']
531 if format_spec in extensions:
532 filter_f = lambda f: f['ext'] == format_spec
533 else:
534 filter_f = lambda f: f['format_id'] == format_spec
535 matches = list(filter(filter_f, available_formats))
536 if matches:
537 return matches[-1]
538 return None
539
540 def process_video_result(self, info_dict, download=True):
541 assert info_dict.get('_type', 'video') == 'video'
542
543 if 'playlist' not in info_dict:
544 # It isn't part of a playlist
545 info_dict['playlist'] = None
546 info_dict['playlist_index'] = None
547
548 # This extractors handle format selection themselves
549 if info_dict['extractor'] in [u'youtube', u'Youku']:
550 if download:
551 self.process_info(info_dict)
552 return info_dict
553
554 # We now pick which formats have to be downloaded
555 if info_dict.get('formats') is None:
556 # There's only one format available
557 formats = [info_dict]
558 else:
559 formats = info_dict['formats']
560
561 # We check that all the formats have the format and format_id fields
562 for (i, format) in enumerate(formats):
563 if format.get('format_id') is None:
564 format['format_id'] = compat_str(i)
565 if format.get('format') is None:
566 format['format'] = u'{id} - {res}{note}'.format(
567 id=format['format_id'],
568 res=self.format_resolution(format),
569 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
570 )
571 # Automatically determine file extension if missing
572 if 'ext' not in format:
573 format['ext'] = determine_ext(format['url'])
574
575 if self.params.get('listformats', None):
576 self.list_formats(info_dict)
577 return
578
579 format_limit = self.params.get('format_limit', None)
580 if format_limit:
581 formats = list(takewhile_inclusive(
582 lambda f: f['format_id'] != format_limit, formats
583 ))
584 if self.params.get('prefer_free_formats'):
585 def _free_formats_key(f):
586 try:
587 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
588 except ValueError:
589 ext_ord = -1
590 # We only compare the extension if they have the same height and width
591 return (f.get('height'), f.get('width'), ext_ord)
592 formats = sorted(formats, key=_free_formats_key)
593
594 req_format = self.params.get('format', 'best')
595 if req_format is None:
596 req_format = 'best'
597 formats_to_download = []
598 # The -1 is for supporting YoutubeIE
599 if req_format in ('-1', 'all'):
600 formats_to_download = formats
601 else:
602 # We can accept formats requestd in the format: 34/5/best, we pick
603 # the first that is available, starting from left
604 req_formats = req_format.split('/')
605 for rf in req_formats:
606 selected_format = self.select_format(rf, formats)
607 if selected_format is not None:
608 formats_to_download = [selected_format]
609 break
610 if not formats_to_download:
611 raise ExtractorError(u'requested format not available',
612 expected=True)
613
614 if download:
615 if len(formats_to_download) > 1:
616 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
617 for format in formats_to_download:
618 new_info = dict(info_dict)
619 new_info.update(format)
620 self.process_info(new_info)
621 # We update the info dict with the best quality format (backwards compatibility)
622 info_dict.update(formats_to_download[-1])
623 return info_dict
624
625 def process_info(self, info_dict):
626 """Process a single resolved IE result."""
627
628 assert info_dict.get('_type', 'video') == 'video'
629 #We increment the download the download count here to match the previous behaviour.
630 self.increment_downloads()
631
632 info_dict['fulltitle'] = info_dict['title']
633 if len(info_dict['title']) > 200:
634 info_dict['title'] = info_dict['title'][:197] + u'...'
635
636 # Keep for backwards compatibility
637 info_dict['stitle'] = info_dict['title']
638
639 if not 'format' in info_dict:
640 info_dict['format'] = info_dict['ext']
641
642 reason = self._match_entry(info_dict)
643 if reason is not None:
644 self.to_screen(u'[download] ' + reason)
645 return
646
647 max_downloads = self.params.get('max_downloads')
648 if max_downloads is not None:
649 if self._num_downloads > int(max_downloads):
650 raise MaxDownloadsReached()
651
652 filename = self.prepare_filename(info_dict)
653
654 # Forced printings
655 if self.params.get('forcetitle', False):
656 compat_print(info_dict['fulltitle'])
657 if self.params.get('forceid', False):
658 compat_print(info_dict['id'])
659 if self.params.get('forceurl', False):
660 # For RTMP URLs, also include the playpath
661 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
662 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
663 compat_print(info_dict['thumbnail'])
664 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
665 compat_print(info_dict['description'])
666 if self.params.get('forcefilename', False) and filename is not None:
667 compat_print(filename)
668 if self.params.get('forceformat', False):
669 compat_print(info_dict['format'])
670 if self.params.get('forcejson', False):
671 compat_print(json.dumps(info_dict))
672
673 # Do nothing else if in simulate mode
674 if self.params.get('simulate', False):
675 return
676
677 if filename is None:
678 return
679
680 try:
681 dn = os.path.dirname(encodeFilename(filename))
682 if dn != '' and not os.path.exists(dn):
683 os.makedirs(dn)
684 except (OSError, IOError) as err:
685 self.report_error(u'unable to create directory ' + compat_str(err))
686 return
687
688 if self.params.get('writedescription', False):
689 try:
690 descfn = filename + u'.description'
691 self.report_writedescription(descfn)
692 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
693 descfile.write(info_dict['description'])
694 except (KeyError, TypeError):
695 self.report_warning(u'There\'s no description to write.')
696 except (OSError, IOError):
697 self.report_error(u'Cannot write description file ' + descfn)
698 return
699
700 if self.params.get('writeannotations', False):
701 try:
702 annofn = filename + u'.annotations.xml'
703 self.report_writeannotations(annofn)
704 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
705 annofile.write(info_dict['annotations'])
706 except (KeyError, TypeError):
707 self.report_warning(u'There are no annotations to write.')
708 except (OSError, IOError):
709 self.report_error(u'Cannot write annotations file: ' + annofn)
710 return
711
712 subtitles_are_requested = any([self.params.get('writesubtitles', False),
713 self.params.get('writeautomaticsub')])
714
715 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
716 # subtitles download errors are already managed as troubles in relevant IE
717 # that way it will silently go on when used with unsupporting IE
718 subtitles = info_dict['subtitles']
719 sub_format = self.params.get('subtitlesformat', 'srt')
720 for sub_lang in subtitles.keys():
721 sub = subtitles[sub_lang]
722 if sub is None:
723 continue
724 try:
725 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
726 self.report_writesubtitles(sub_filename)
727 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
728 subfile.write(sub)
729 except (OSError, IOError):
730 self.report_error(u'Cannot write subtitles file ' + descfn)
731 return
732
733 if self.params.get('writeinfojson', False):
734 infofn = os.path.splitext(filename)[0] + u'.info.json'
735 self.report_writeinfojson(infofn)
736 try:
737 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
738 write_json_file(json_info_dict, encodeFilename(infofn))
739 except (OSError, IOError):
740 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
741 return
742
743 if self.params.get('writethumbnail', False):
744 if info_dict.get('thumbnail') is not None:
745 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
746 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
747 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
748 (info_dict['extractor'], info_dict['id']))
749 try:
750 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
751 with open(thumb_filename, 'wb') as thumbf:
752 shutil.copyfileobj(uf, thumbf)
753 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
754 (info_dict['extractor'], info_dict['id'], thumb_filename))
755 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
756 self.report_warning(u'Unable to download thumbnail "%s": %s' %
757 (info_dict['thumbnail'], compat_str(err)))
758
759 if not self.params.get('skip_download', False):
760 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
761 success = True
762 else:
763 try:
764 success = self.fd._do_download(filename, info_dict)
765 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
766 self.report_error(u'unable to download video data: %s' % str(err))
767 return
768 except (OSError, IOError) as err:
769 raise UnavailableVideoError(err)
770 except (ContentTooShortError, ) as err:
771 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
772 return
773
774 if success:
775 try:
776 self.post_process(filename, info_dict)
777 except (PostProcessingError) as err:
778 self.report_error(u'postprocessing: %s' % str(err))
779 return
780
781 self.record_download_archive(info_dict)
782
783 def download(self, url_list):
784 """Download a given list of URLs."""
785 if len(url_list) > 1 and self.fixed_template():
786 raise SameFileError(self.params['outtmpl'])
787
788 for url in url_list:
789 try:
790 #It also downloads the videos
791 videos = self.extract_info(url)
792 except UnavailableVideoError:
793 self.report_error(u'unable to download video')
794 except MaxDownloadsReached:
795 self.to_screen(u'[info] Maximum number of downloaded files reached.')
796 raise
797
798 return self._download_retcode
799
800 def post_process(self, filename, ie_info):
801 """Run all the postprocessors on the given file."""
802 info = dict(ie_info)
803 info['filepath'] = filename
804 keep_video = None
805 for pp in self._pps:
806 try:
807 keep_video_wish, new_info = pp.run(info)
808 if keep_video_wish is not None:
809 if keep_video_wish:
810 keep_video = keep_video_wish
811 elif keep_video is None:
812 # No clear decision yet, let IE decide
813 keep_video = keep_video_wish
814 except PostProcessingError as e:
815 self.report_error(e.msg)
816 if keep_video is False and not self.params.get('keepvideo', False):
817 try:
818 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
819 os.remove(encodeFilename(filename))
820 except (IOError, OSError):
821 self.report_warning(u'Unable to remove downloaded video file')
822
823 def in_download_archive(self, info_dict):
824 fn = self.params.get('download_archive')
825 if fn is None:
826 return False
827 extractor = info_dict.get('extractor_id')
828 if extractor is None:
829 if 'id' in info_dict:
830 extractor = info_dict.get('ie_key') # key in a playlist
831 if extractor is None:
832 return False # Incomplete video information
833 # Future-proof against any change in case
834 # and backwards compatibility with prior versions
835 extractor = extractor.lower()
836 vid_id = extractor + u' ' + info_dict['id']
837 try:
838 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
839 for line in archive_file:
840 if line.strip() == vid_id:
841 return True
842 except IOError as ioe:
843 if ioe.errno != errno.ENOENT:
844 raise
845 return False
846
847 def record_download_archive(self, info_dict):
848 fn = self.params.get('download_archive')
849 if fn is None:
850 return
851 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
852 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
853 archive_file.write(vid_id + u'\n')
854
855 @staticmethod
856 def format_resolution(format, default='unknown'):
857 if format.get('_resolution') is not None:
858 return format['_resolution']
859 if format.get('height') is not None:
860 if format.get('width') is not None:
861 res = u'%sx%s' % (format['width'], format['height'])
862 else:
863 res = u'%sp' % format['height']
864 else:
865 res = default
866 return res
867
868 def list_formats(self, info_dict):
869 def format_note(fdict):
870 if fdict.get('format_note') is not None:
871 return fdict['format_note']
872 res = u''
873 if fdict.get('vcodec') is not None:
874 res += u'%-5s' % fdict['vcodec']
875 elif fdict.get('vbr') is not None:
876 res += u'video'
877 if fdict.get('vbr') is not None:
878 res += u'@%4dk' % fdict['vbr']
879 if fdict.get('acodec') is not None:
880 if res:
881 res += u', '
882 res += u'%-5s' % fdict['acodec']
883 elif fdict.get('abr') is not None:
884 if res:
885 res += u', '
886 res += 'audio'
887 if fdict.get('abr') is not None:
888 res += u'@%3dk' % fdict['abr']
889 return res
890
891 def line(format):
892 return (u'%-20s%-10s%-12s%s' % (
893 format['format_id'],
894 format['ext'],
895 self.format_resolution(format),
896 format_note(format),
897 )
898 )
899
900 formats = info_dict.get('formats', [info_dict])
901 formats_s = list(map(line, formats))
902 if len(formats) > 1:
903 formats_s[0] += (' ' if format_note(formats[0]) else '') + '(worst)'
904 formats_s[-1] += (' ' if format_note(formats[-1]) else '') + '(best)'
905
906 header_line = line({
907 'format_id': u'format code', 'ext': u'extension',
908 '_resolution': u'resolution', 'format_note': u'note'})
909 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
910 (info_dict['id'], header_line, u"\n".join(formats_s)))