]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Style fixes in YoutubeDL.py
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import sys
13 import time
14 import traceback
15
16 from .utils import *
17 from .extractor import get_info_extractor, gen_extractors
18 from .FileDownloader import FileDownloader
19
20
21 class YoutubeDL(object):
22 """YoutubeDL class.
23
24 YoutubeDL objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, YoutubeDL objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the YoutubeDL object handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 YoutubeDL process the extracted information, possibly using a File
37 Downloader to download the video.
38
39 YoutubeDL objects accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The YoutubeDL also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
50 videopassword: Password for acces a video.
51 usenetrc: Use netrc for authentication instead.
52 verbose: Print additional info to stdout.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forceid: Force printing ID.
57 forcethumbnail: Force printing thumbnail URL.
58 forcedescription: Force printing description.
59 forcefilename: Force printing final filename.
60 simulate: Do not download the video files.
61 format: Video format code.
62 format_limit: Highest quality format to try.
63 outtmpl: Template for output names.
64 restrictfilenames: Do not allow "&" and spaces in file names
65 ignoreerrors: Do not stop on download errors.
66 nooverwrites: Prevent overwriting files.
67 playliststart: Playlist item to start at.
68 playlistend: Playlist item to end at.
69 matchtitle: Download only matching titles.
70 rejecttitle: Reject downloads for matching titles.
71 logtostderr: Log messages to stderr instead of stdout.
72 writedescription: Write the video description to a .description file
73 writeinfojson: Write the video description to a .info.json file
74 writeannotations: Write the video annotations to a .annotations.xml file
75 writethumbnail: Write the thumbnail image to a file
76 writesubtitles: Write the video subtitles to a file
77 writeautomaticsub: Write the automatic subtitles to a file
78 allsubtitles: Downloads all the subtitles of the video
79 (requires writesubtitles or writeautomaticsub)
80 listsubtitles: Lists all available subtitles for the video
81 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
82 subtitleslangs: List of languages of the subtitles to download
83 keepvideo: Keep the video file after post-processing
84 daterange: A DateRange object, download only if the upload_date is in the range.
85 skip_download: Skip the actual download of the video file
86 cachedir: Location of the cache files in the filesystem.
87 None to disable filesystem cache.
88 noplaylist: Download single video instead of a playlist if in doubt.
89 age_limit: An integer representing the user's age in years.
90 Unsuitable videos for the given age are skipped.
91 downloadarchive: File name of a file where all downloads are recorded.
92 Videos already present in the file are not downloaded
93 again.
94
95 The following parameters are not used by YoutubeDL itself, they are used by
96 the FileDownloader:
97 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
98 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 """
100
101 params = None
102 _ies = []
103 _pps = []
104 _download_retcode = None
105 _num_downloads = None
106 _screen_file = None
107
108 def __init__(self, params):
109 """Create a FileDownloader object with the given options."""
110 self._ies = []
111 self._ies_instances = {}
112 self._pps = []
113 self._progress_hooks = []
114 self._download_retcode = 0
115 self._num_downloads = 0
116 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
117
118 if (sys.version_info >= (3,) and sys.platform != 'win32' and
119 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
120 and not params['restrictfilenames']):
121 # On Python 3, the Unicode filesystem API will throw errors (#1474)
122 self.report_warning(
123 u'Assuming --restrict-filenames since file system encoding '
124 u'cannot encode all charactes. '
125 u'Set the LC_ALL environment variable to fix this.')
126 params['restrictfilenames'] = True
127
128 self.params = params
129 self.fd = FileDownloader(self, self.params)
130
131 if '%(stitle)s' in self.params['outtmpl']:
132 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
133
134 def add_info_extractor(self, ie):
135 """Add an InfoExtractor object to the end of the list."""
136 self._ies.append(ie)
137 self._ies_instances[ie.ie_key()] = ie
138 ie.set_downloader(self)
139
140 def get_info_extractor(self, ie_key):
141 """
142 Get an instance of an IE with name ie_key, it will try to get one from
143 the _ies list, if there's no instance it will create a new one and add
144 it to the extractor list.
145 """
146 ie = self._ies_instances.get(ie_key)
147 if ie is None:
148 ie = get_info_extractor(ie_key)()
149 self.add_info_extractor(ie)
150 return ie
151
152 def add_default_info_extractors(self):
153 """
154 Add the InfoExtractors returned by gen_extractors to the end of the list
155 """
156 for ie in gen_extractors():
157 self.add_info_extractor(ie)
158
159 def add_post_processor(self, pp):
160 """Add a PostProcessor object to the end of the chain."""
161 self._pps.append(pp)
162 pp.set_downloader(self)
163
164 def to_screen(self, message, skip_eol=False):
165 """Print message to stdout if not in quiet mode."""
166 if not self.params.get('quiet', False):
167 terminator = [u'\n', u''][skip_eol]
168 output = message + terminator
169 write_string(output, self._screen_file)
170
171 def to_stderr(self, message):
172 """Print message to stderr."""
173 assert type(message) == type(u'')
174 output = message + u'\n'
175 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
176 output = output.encode(preferredencoding())
177 sys.stderr.write(output)
178
179 def fixed_template(self):
180 """Checks if the output template is fixed."""
181 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
182
183 def trouble(self, message=None, tb=None):
184 """Determine action to take when a download problem appears.
185
186 Depending on if the downloader has been configured to ignore
187 download errors or not, this method may throw an exception or
188 not when errors are found, after printing the message.
189
190 tb, if given, is additional traceback information.
191 """
192 if message is not None:
193 self.to_stderr(message)
194 if self.params.get('verbose'):
195 if tb is None:
196 if sys.exc_info()[0]: # if .trouble has been called from an except block
197 tb = u''
198 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
199 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
200 tb += compat_str(traceback.format_exc())
201 else:
202 tb_data = traceback.format_list(traceback.extract_stack())
203 tb = u''.join(tb_data)
204 self.to_stderr(tb)
205 if not self.params.get('ignoreerrors', False):
206 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
207 exc_info = sys.exc_info()[1].exc_info
208 else:
209 exc_info = sys.exc_info()
210 raise DownloadError(message, exc_info)
211 self._download_retcode = 1
212
213 def report_warning(self, message):
214 '''
215 Print the message to stderr, it will be prefixed with 'WARNING:'
216 If stderr is a tty file the 'WARNING:' will be colored
217 '''
218 if sys.stderr.isatty() and os.name != 'nt':
219 _msg_header = u'\033[0;33mWARNING:\033[0m'
220 else:
221 _msg_header = u'WARNING:'
222 warning_message = u'%s %s' % (_msg_header, message)
223 self.to_stderr(warning_message)
224
225 def report_error(self, message, tb=None):
226 '''
227 Do the same as trouble, but prefixes the message with 'ERROR:', colored
228 in red if stderr is a tty file.
229 '''
230 if sys.stderr.isatty() and os.name != 'nt':
231 _msg_header = u'\033[0;31mERROR:\033[0m'
232 else:
233 _msg_header = u'ERROR:'
234 error_message = u'%s %s' % (_msg_header, message)
235 self.trouble(error_message, tb)
236
237 def report_writedescription(self, descfn):
238 """ Report that the description file is being written """
239 self.to_screen(u'[info] Writing video description to: ' + descfn)
240
241 def report_writesubtitles(self, sub_filename):
242 """ Report that the subtitles file is being written """
243 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
244
245 def report_writeinfojson(self, infofn):
246 """ Report that the metadata file has been written """
247 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
248
249 def report_writeannotations(self, annofn):
250 """ Report that the annotations file has been written. """
251 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
252
253 def report_file_already_downloaded(self, file_name):
254 """Report file has already been fully downloaded."""
255 try:
256 self.to_screen(u'[download] %s has already been downloaded' % file_name)
257 except (UnicodeEncodeError) as err:
258 self.to_screen(u'[download] The file has already been downloaded')
259
260 def increment_downloads(self):
261 """Increment the ordinal that assigns a number to each file."""
262 self._num_downloads += 1
263
264 def prepare_filename(self, info_dict):
265 """Generate the output filename."""
266 try:
267 template_dict = dict(info_dict)
268
269 template_dict['epoch'] = int(time.time())
270 autonumber_size = self.params.get('autonumber_size')
271 if autonumber_size is None:
272 autonumber_size = 5
273 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
274 template_dict['autonumber'] = autonumber_templ % self._num_downloads
275 if template_dict['playlist_index'] is not None:
276 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
277
278 sanitize = lambda k, v: sanitize_filename(
279 u'NA' if v is None else compat_str(v),
280 restricted=self.params.get('restrictfilenames'),
281 is_id=k == u'id')
282 template_dict = dict((k, sanitize(k, v)) for k, v in template_dict.items())
283
284 filename = self.params['outtmpl'] % template_dict
285 return filename
286 except KeyError as err:
287 self.report_error(u'Erroneous output template')
288 return None
289 except ValueError as err:
290 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
291 return None
292
293 def _match_entry(self, info_dict):
294 """ Returns None iff the file should be downloaded """
295
296 title = info_dict['title']
297 matchtitle = self.params.get('matchtitle', False)
298 if matchtitle:
299 if not re.search(matchtitle, title, re.IGNORECASE):
300 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
301 rejecttitle = self.params.get('rejecttitle', False)
302 if rejecttitle:
303 if re.search(rejecttitle, title, re.IGNORECASE):
304 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
305 date = info_dict.get('upload_date', None)
306 if date is not None:
307 dateRange = self.params.get('daterange', DateRange())
308 if date not in dateRange:
309 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
310 age_limit = self.params.get('age_limit')
311 if age_limit is not None:
312 if age_limit < info_dict.get('age_limit', 0):
313 return u'Skipping "' + title + '" because it is age restricted'
314 if self.in_download_archive(info_dict):
315 return (u'%(title)s has already been recorded in archive'
316 % info_dict)
317 return None
318
319 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
320 '''
321 Returns a list with a dictionary for each video we find.
322 If 'download', also downloads the videos.
323 extra_info is a dict containing the extra values to add to each result
324 '''
325
326 if ie_key:
327 ies = [self.get_info_extractor(ie_key)]
328 else:
329 ies = self._ies
330
331 for ie in ies:
332 if not ie.suitable(url):
333 continue
334
335 if not ie.working():
336 self.report_warning(u'The program functionality for this site has been marked as broken, '
337 u'and will probably not work.')
338
339 try:
340 ie_result = ie.extract(url)
341 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
342 break
343 if isinstance(ie_result, list):
344 # Backwards compatibility: old IE result format
345 for result in ie_result:
346 result.update(extra_info)
347 ie_result = {
348 '_type': 'compat_list',
349 'entries': ie_result,
350 }
351 else:
352 ie_result.update(extra_info)
353 if 'extractor' not in ie_result:
354 ie_result['extractor'] = ie.IE_NAME
355 return self.process_ie_result(ie_result, download=download)
356 except ExtractorError as de: # An error we somewhat expected
357 self.report_error(compat_str(de), de.format_traceback())
358 break
359 except Exception as e:
360 if self.params.get('ignoreerrors', False):
361 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
362 break
363 else:
364 raise
365 else:
366 self.report_error(u'no suitable InfoExtractor: %s' % url)
367
368 def process_ie_result(self, ie_result, download=True, extra_info={}):
369 """
370 Take the result of the ie(may be modified) and resolve all unresolved
371 references (URLs, playlist items).
372
373 It will also download the videos if 'download'.
374 Returns the resolved ie_result.
375 """
376
377 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
378 if result_type == 'video':
379 ie_result.update(extra_info)
380 return self.process_video_result(ie_result)
381 elif result_type == 'url':
382 # We have to add extra_info to the results because it may be
383 # contained in a playlist
384 return self.extract_info(ie_result['url'],
385 download,
386 ie_key=ie_result.get('ie_key'),
387 extra_info=extra_info)
388 elif result_type == 'playlist':
389 # We process each entry in the playlist
390 playlist = ie_result.get('title', None) or ie_result.get('id', None)
391 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
392
393 playlist_results = []
394
395 n_all_entries = len(ie_result['entries'])
396 playliststart = self.params.get('playliststart', 1) - 1
397 playlistend = self.params.get('playlistend', -1)
398
399 if playlistend == -1:
400 entries = ie_result['entries'][playliststart:]
401 else:
402 entries = ie_result['entries'][playliststart:playlistend]
403
404 n_entries = len(entries)
405
406 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
407 (ie_result['extractor'], playlist, n_all_entries, n_entries))
408
409 for i, entry in enumerate(entries, 1):
410 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
411 extra = {
412 'playlist': playlist,
413 'playlist_index': i + playliststart,
414 }
415 if not 'extractor' in entry:
416 # We set the extractor, if it's an url it will be set then to
417 # the new extractor, but if it's already a video we must make
418 # sure it's present: see issue #877
419 entry['extractor'] = ie_result['extractor']
420 entry_result = self.process_ie_result(entry,
421 download=download,
422 extra_info=extra)
423 playlist_results.append(entry_result)
424 ie_result['entries'] = playlist_results
425 return ie_result
426 elif result_type == 'compat_list':
427 def _fixup(r):
428 r.setdefault('extractor', ie_result['extractor'])
429 return r
430 ie_result['entries'] = [
431 self.process_ie_result(_fixup(r), download=download)
432 for r in ie_result['entries']
433 ]
434 return ie_result
435 else:
436 raise Exception('Invalid result type: %s' % result_type)
437
438 def select_format(self, format_spec, available_formats):
439 if format_spec == 'best' or format_spec is None:
440 return available_formats[-1]
441 elif format_spec == 'worst':
442 return available_formats[0]
443 else:
444 extensions = [u'mp4', u'flv', u'webm', u'3gp']
445 if format_spec in extensions:
446 filter_f = lambda f: f['ext'] == format_spec
447 else:
448 filter_f = lambda f: f['format_id'] == format_spec
449 matches = list(filter(filter_f, available_formats))
450 if matches:
451 return matches[-1]
452 return None
453
454 def process_video_result(self, info_dict, download=True):
455 assert info_dict.get('_type', 'video') == 'video'
456
457 if 'playlist' not in info_dict:
458 # It isn't part of a playlist
459 info_dict['playlist'] = None
460 info_dict['playlist_index'] = None
461
462 # This extractors handle format selection themselves
463 if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
464 if download:
465 self.process_info(info_dict)
466 return info_dict
467
468 # We now pick which formats have to be downloaded
469 if info_dict.get('formats') is None:
470 # There's only one format available
471 formats = [info_dict]
472 else:
473 formats = info_dict['formats']
474
475 # We check that all the formats have the format and format_id fields
476 for (i, format) in enumerate(formats):
477 if format.get('format_id') is None:
478 format['format_id'] = compat_str(i)
479 if format.get('format') is None:
480 format['format'] = u'{id} - {res}{note}'.format(
481 id=format['format_id'],
482 res=self.format_resolution(format),
483 note=u' ({})'.format(format['format_note']) if format.get('format_note') is not None else '',
484 )
485
486 if self.params.get('listformats', None):
487 self.list_formats(info_dict)
488 return
489
490 format_limit = self.params.get('format_limit', None)
491 if format_limit:
492 formats = list(takewhile_inclusive(
493 lambda f: f['format_id'] != format_limit, formats
494 ))
495 if self.params.get('prefer_free_formats'):
496 def _free_formats_key(f):
497 try:
498 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
499 except ValueError:
500 ext_ord = -1
501 # We only compare the extension if they have the same height and width
502 return (f.get('height'), f.get('width'), ext_ord)
503 formats = sorted(formats, key=_free_formats_key)
504
505 req_format = self.params.get('format', 'best')
506 if req_format is None:
507 req_format = 'best'
508 formats_to_download = []
509 # The -1 is for supporting YoutubeIE
510 if req_format in ('-1', 'all'):
511 formats_to_download = formats
512 else:
513 # We can accept formats requestd in the format: 34/5/best, we pick
514 # the first that is available, starting from left
515 req_formats = req_format.split('/')
516 for rf in req_formats:
517 selected_format = self.select_format(rf, formats)
518 if selected_format is not None:
519 formats_to_download = [selected_format]
520 break
521 if not formats_to_download:
522 raise ExtractorError(u'requested format not available')
523
524 if download:
525 if len(formats_to_download) > 1:
526 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
527 for format in formats_to_download:
528 new_info = dict(info_dict)
529 new_info.update(format)
530 self.process_info(new_info)
531 # We update the info dict with the best quality format (backwards compatibility)
532 info_dict.update(formats_to_download[-1])
533 return info_dict
534
535 def process_info(self, info_dict):
536 """Process a single resolved IE result."""
537
538 assert info_dict.get('_type', 'video') == 'video'
539 #We increment the download the download count here to match the previous behaviour.
540 self.increment_downloads()
541
542 info_dict['fulltitle'] = info_dict['title']
543 if len(info_dict['title']) > 200:
544 info_dict['title'] = info_dict['title'][:197] + u'...'
545
546 # Keep for backwards compatibility
547 info_dict['stitle'] = info_dict['title']
548
549 if not 'format' in info_dict:
550 info_dict['format'] = info_dict['ext']
551
552 reason = self._match_entry(info_dict)
553 if reason is not None:
554 self.to_screen(u'[download] ' + reason)
555 return
556
557 max_downloads = self.params.get('max_downloads')
558 if max_downloads is not None:
559 if self._num_downloads > int(max_downloads):
560 raise MaxDownloadsReached()
561
562 filename = self.prepare_filename(info_dict)
563
564 # Forced printings
565 if self.params.get('forcetitle', False):
566 compat_print(info_dict['title'])
567 if self.params.get('forceid', False):
568 compat_print(info_dict['id'])
569 if self.params.get('forceurl', False):
570 # For RTMP URLs, also include the playpath
571 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
572 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
573 compat_print(info_dict['thumbnail'])
574 if self.params.get('forcedescription', False) and 'description' in info_dict:
575 compat_print(info_dict['description'])
576 if self.params.get('forcefilename', False) and filename is not None:
577 compat_print(filename)
578 if self.params.get('forceformat', False):
579 compat_print(info_dict['format'])
580
581 # Do nothing else if in simulate mode
582 if self.params.get('simulate', False):
583 return
584
585 if filename is None:
586 return
587
588 try:
589 dn = os.path.dirname(encodeFilename(filename))
590 if dn != '' and not os.path.exists(dn):
591 os.makedirs(dn)
592 except (OSError, IOError) as err:
593 self.report_error(u'unable to create directory ' + compat_str(err))
594 return
595
596 if self.params.get('writedescription', False):
597 try:
598 descfn = filename + u'.description'
599 self.report_writedescription(descfn)
600 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
601 descfile.write(info_dict['description'])
602 except (KeyError, TypeError):
603 self.report_warning(u'There\'s no description to write.')
604 except (OSError, IOError):
605 self.report_error(u'Cannot write description file ' + descfn)
606 return
607
608 if self.params.get('writeannotations', False):
609 try:
610 annofn = filename + u'.annotations.xml'
611 self.report_writeannotations(annofn)
612 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
613 annofile.write(info_dict['annotations'])
614 except (KeyError, TypeError):
615 self.report_warning(u'There are no annotations to write.')
616 except (OSError, IOError):
617 self.report_error(u'Cannot write annotations file: ' + annofn)
618 return
619
620 subtitles_are_requested = any([self.params.get('writesubtitles', False),
621 self.params.get('writeautomaticsub')])
622
623 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
624 # subtitles download errors are already managed as troubles in relevant IE
625 # that way it will silently go on when used with unsupporting IE
626 subtitles = info_dict['subtitles']
627 sub_format = self.params.get('subtitlesformat')
628 for sub_lang in subtitles.keys():
629 sub = subtitles[sub_lang]
630 if sub is None:
631 continue
632 try:
633 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
634 self.report_writesubtitles(sub_filename)
635 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
636 subfile.write(sub)
637 except (OSError, IOError):
638 self.report_error(u'Cannot write subtitles file ' + descfn)
639 return
640
641 if self.params.get('writeinfojson', False):
642 infofn = filename + u'.info.json'
643 self.report_writeinfojson(infofn)
644 try:
645 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
646 write_json_file(json_info_dict, encodeFilename(infofn))
647 except (OSError, IOError):
648 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
649 return
650
651 if self.params.get('writethumbnail', False):
652 if info_dict.get('thumbnail') is not None:
653 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
654 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
655 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
656 (info_dict['extractor'], info_dict['id']))
657 try:
658 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
659 with open(thumb_filename, 'wb') as thumbf:
660 shutil.copyfileobj(uf, thumbf)
661 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
662 (info_dict['extractor'], info_dict['id'], thumb_filename))
663 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
664 self.report_warning(u'Unable to download thumbnail "%s": %s' %
665 (info_dict['thumbnail'], compat_str(err)))
666
667 if not self.params.get('skip_download', False):
668 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
669 success = True
670 else:
671 try:
672 success = self.fd._do_download(filename, info_dict)
673 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
674 self.report_error(u'unable to download video data: %s' % str(err))
675 return
676 except (OSError, IOError) as err:
677 raise UnavailableVideoError(err)
678 except (ContentTooShortError, ) as err:
679 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
680 return
681
682 if success:
683 try:
684 self.post_process(filename, info_dict)
685 except (PostProcessingError) as err:
686 self.report_error(u'postprocessing: %s' % str(err))
687 return
688
689 self.record_download_archive(info_dict)
690
691 def download(self, url_list):
692 """Download a given list of URLs."""
693 if len(url_list) > 1 and self.fixed_template():
694 raise SameFileError(self.params['outtmpl'])
695
696 for url in url_list:
697 try:
698 #It also downloads the videos
699 videos = self.extract_info(url)
700 except UnavailableVideoError:
701 self.report_error(u'unable to download video')
702 except MaxDownloadsReached:
703 self.to_screen(u'[info] Maximum number of downloaded files reached.')
704 raise
705
706 return self._download_retcode
707
708 def post_process(self, filename, ie_info):
709 """Run all the postprocessors on the given file."""
710 info = dict(ie_info)
711 info['filepath'] = filename
712 keep_video = None
713 for pp in self._pps:
714 try:
715 keep_video_wish, new_info = pp.run(info)
716 if keep_video_wish is not None:
717 if keep_video_wish:
718 keep_video = keep_video_wish
719 elif keep_video is None:
720 # No clear decision yet, let IE decide
721 keep_video = keep_video_wish
722 except PostProcessingError as e:
723 self.report_error(e.msg)
724 if keep_video is False and not self.params.get('keepvideo', False):
725 try:
726 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
727 os.remove(encodeFilename(filename))
728 except (IOError, OSError):
729 self.report_warning(u'Unable to remove downloaded video file')
730
731 def in_download_archive(self, info_dict):
732 fn = self.params.get('download_archive')
733 if fn is None:
734 return False
735 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
736 try:
737 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
738 for line in archive_file:
739 if line.strip() == vid_id:
740 return True
741 except IOError as ioe:
742 if ioe.errno != errno.ENOENT:
743 raise
744 return False
745
746 def record_download_archive(self, info_dict):
747 fn = self.params.get('download_archive')
748 if fn is None:
749 return
750 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
751 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
752 archive_file.write(vid_id + u'\n')
753
754 @staticmethod
755 def format_resolution(format):
756 if format.get('height') is not None:
757 if format.get('width') is not None:
758 res = u'%sx%s' % (format['width'], format['height'])
759 else:
760 res = u'%sp' % format['height']
761 else:
762 res = '???'
763 return res
764
765 def list_formats(self, info_dict):
766 formats_s = []
767 for format in info_dict.get('formats', [info_dict]):
768 formats_s.append(u'%-15s: %-5s %-15s[%s]' % (
769 format['format_id'],
770 format['ext'],
771 format.get('format_note') or '-',
772 self.format_resolution(format),
773 )
774 )
775 if len(formats_s) != 1:
776 formats_s[0] += ' (worst)'
777 formats_s[-1] += ' (best)'
778 formats_s = "\n".join(formats_s)
779 self.to_screen(u'[info] Available formats for %s:\n'
780 u'format code extension note resolution\n%s' % (
781 info_dict['id'], formats_s))