]> jfr.im git - yt-dlp.git/blob - youtube_dl/YoutubeDL.py
Expand tilde in template (Fixes #1639)
[yt-dlp.git] / youtube_dl / YoutubeDL.py
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3
4 from __future__ import absolute_import
5
6 import errno
7 import io
8 import os
9 import re
10 import shutil
11 import socket
12 import sys
13 import time
14 import traceback
15
16 from .utils import *
17 from .extractor import get_info_extractor, gen_extractors
18 from .FileDownloader import FileDownloader
19
20
21 class YoutubeDL(object):
22 """YoutubeDL class.
23
24 YoutubeDL objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, YoutubeDL objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the YoutubeDL object handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 YoutubeDL process the extracted information, possibly using a File
37 Downloader to download the video.
38
39 YoutubeDL objects accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The YoutubeDL also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
50 videopassword: Password for acces a video.
51 usenetrc: Use netrc for authentication instead.
52 verbose: Print additional info to stdout.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forceid: Force printing ID.
57 forcethumbnail: Force printing thumbnail URL.
58 forcedescription: Force printing description.
59 forcefilename: Force printing final filename.
60 simulate: Do not download the video files.
61 format: Video format code.
62 format_limit: Highest quality format to try.
63 outtmpl: Template for output names.
64 restrictfilenames: Do not allow "&" and spaces in file names
65 ignoreerrors: Do not stop on download errors.
66 nooverwrites: Prevent overwriting files.
67 playliststart: Playlist item to start at.
68 playlistend: Playlist item to end at.
69 matchtitle: Download only matching titles.
70 rejecttitle: Reject downloads for matching titles.
71 logtostderr: Log messages to stderr instead of stdout.
72 writedescription: Write the video description to a .description file
73 writeinfojson: Write the video description to a .info.json file
74 writeannotations: Write the video annotations to a .annotations.xml file
75 writethumbnail: Write the thumbnail image to a file
76 writesubtitles: Write the video subtitles to a file
77 writeautomaticsub: Write the automatic subtitles to a file
78 allsubtitles: Downloads all the subtitles of the video
79 (requires writesubtitles or writeautomaticsub)
80 listsubtitles: Lists all available subtitles for the video
81 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
82 subtitleslangs: List of languages of the subtitles to download
83 keepvideo: Keep the video file after post-processing
84 daterange: A DateRange object, download only if the upload_date is in the range.
85 skip_download: Skip the actual download of the video file
86 cachedir: Location of the cache files in the filesystem.
87 None to disable filesystem cache.
88 noplaylist: Download single video instead of a playlist if in doubt.
89 age_limit: An integer representing the user's age in years.
90 Unsuitable videos for the given age are skipped.
91 downloadarchive: File name of a file where all downloads are recorded.
92 Videos already present in the file are not downloaded
93 again.
94
95 The following parameters are not used by YoutubeDL itself, they are used by
96 the FileDownloader:
97 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
98 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 """
100
101 params = None
102 _ies = []
103 _pps = []
104 _download_retcode = None
105 _num_downloads = None
106 _screen_file = None
107
108 def __init__(self, params):
109 """Create a FileDownloader object with the given options."""
110 self._ies = []
111 self._ies_instances = {}
112 self._pps = []
113 self._progress_hooks = []
114 self._download_retcode = 0
115 self._num_downloads = 0
116 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
117
118 if (sys.version_info >= (3,) and sys.platform != 'win32' and
119 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
120 and not params['restrictfilenames']):
121 # On Python 3, the Unicode filesystem API will throw errors (#1474)
122 self.report_warning(
123 u'Assuming --restrict-filenames since file system encoding '
124 u'cannot encode all charactes. '
125 u'Set the LC_ALL environment variable to fix this.')
126 params['restrictfilenames'] = True
127
128 self.params = params
129 self.fd = FileDownloader(self, self.params)
130
131 if '%(stitle)s' in self.params['outtmpl']:
132 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
133
134 def add_info_extractor(self, ie):
135 """Add an InfoExtractor object to the end of the list."""
136 self._ies.append(ie)
137 self._ies_instances[ie.ie_key()] = ie
138 ie.set_downloader(self)
139
140 def get_info_extractor(self, ie_key):
141 """
142 Get an instance of an IE with name ie_key, it will try to get one from
143 the _ies list, if there's no instance it will create a new one and add
144 it to the extractor list.
145 """
146 ie = self._ies_instances.get(ie_key)
147 if ie is None:
148 ie = get_info_extractor(ie_key)()
149 self.add_info_extractor(ie)
150 return ie
151
152 def add_default_info_extractors(self):
153 """
154 Add the InfoExtractors returned by gen_extractors to the end of the list
155 """
156 for ie in gen_extractors():
157 self.add_info_extractor(ie)
158
159 def add_post_processor(self, pp):
160 """Add a PostProcessor object to the end of the chain."""
161 self._pps.append(pp)
162 pp.set_downloader(self)
163
164 def to_screen(self, message, skip_eol=False):
165 """Print message to stdout if not in quiet mode."""
166 if not self.params.get('quiet', False):
167 terminator = [u'\n', u''][skip_eol]
168 output = message + terminator
169 write_string(output, self._screen_file)
170
171 def to_stderr(self, message):
172 """Print message to stderr."""
173 assert type(message) == type(u'')
174 output = message + u'\n'
175 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
176 output = output.encode(preferredencoding())
177 sys.stderr.write(output)
178
179 def fixed_template(self):
180 """Checks if the output template is fixed."""
181 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
182
183 def trouble(self, message=None, tb=None):
184 """Determine action to take when a download problem appears.
185
186 Depending on if the downloader has been configured to ignore
187 download errors or not, this method may throw an exception or
188 not when errors are found, after printing the message.
189
190 tb, if given, is additional traceback information.
191 """
192 if message is not None:
193 self.to_stderr(message)
194 if self.params.get('verbose'):
195 if tb is None:
196 if sys.exc_info()[0]: # if .trouble has been called from an except block
197 tb = u''
198 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
199 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
200 tb += compat_str(traceback.format_exc())
201 else:
202 tb_data = traceback.format_list(traceback.extract_stack())
203 tb = u''.join(tb_data)
204 self.to_stderr(tb)
205 if not self.params.get('ignoreerrors', False):
206 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
207 exc_info = sys.exc_info()[1].exc_info
208 else:
209 exc_info = sys.exc_info()
210 raise DownloadError(message, exc_info)
211 self._download_retcode = 1
212
213 def report_warning(self, message):
214 '''
215 Print the message to stderr, it will be prefixed with 'WARNING:'
216 If stderr is a tty file the 'WARNING:' will be colored
217 '''
218 if sys.stderr.isatty() and os.name != 'nt':
219 _msg_header=u'\033[0;33mWARNING:\033[0m'
220 else:
221 _msg_header=u'WARNING:'
222 warning_message=u'%s %s' % (_msg_header,message)
223 self.to_stderr(warning_message)
224
225 def report_error(self, message, tb=None):
226 '''
227 Do the same as trouble, but prefixes the message with 'ERROR:', colored
228 in red if stderr is a tty file.
229 '''
230 if sys.stderr.isatty() and os.name != 'nt':
231 _msg_header = u'\033[0;31mERROR:\033[0m'
232 else:
233 _msg_header = u'ERROR:'
234 error_message = u'%s %s' % (_msg_header, message)
235 self.trouble(error_message, tb)
236
237 def slow_down(self, start_time, byte_counter):
238 """Sleep if the download speed is over the rate limit."""
239 rate_limit = self.params.get('ratelimit', None)
240 if rate_limit is None or byte_counter == 0:
241 return
242 now = time.time()
243 elapsed = now - start_time
244 if elapsed <= 0.0:
245 return
246 speed = float(byte_counter) / elapsed
247 if speed > rate_limit:
248 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
249
250 def report_writedescription(self, descfn):
251 """ Report that the description file is being written """
252 self.to_screen(u'[info] Writing video description to: ' + descfn)
253
254 def report_writesubtitles(self, sub_filename):
255 """ Report that the subtitles file is being written """
256 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
257
258 def report_writeinfojson(self, infofn):
259 """ Report that the metadata file has been written """
260 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
261
262 def report_writeannotations(self, annofn):
263 """ Report that the annotations file has been written. """
264 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
265
266 def report_file_already_downloaded(self, file_name):
267 """Report file has already been fully downloaded."""
268 try:
269 self.to_screen(u'[download] %s has already been downloaded' % file_name)
270 except (UnicodeEncodeError) as err:
271 self.to_screen(u'[download] The file has already been downloaded')
272
273 def increment_downloads(self):
274 """Increment the ordinal that assigns a number to each file."""
275 self._num_downloads += 1
276
277 def prepare_filename(self, info_dict):
278 """Generate the output filename."""
279 try:
280 template_dict = dict(info_dict)
281
282 template_dict['epoch'] = int(time.time())
283 autonumber_size = self.params.get('autonumber_size')
284 if autonumber_size is None:
285 autonumber_size = 5
286 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
287 template_dict['autonumber'] = autonumber_templ % self._num_downloads
288 if template_dict['playlist_index'] is not None:
289 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
290
291 sanitize = lambda k, v: sanitize_filename(
292 u'NA' if v is None else compat_str(v),
293 restricted=self.params.get('restrictfilenames'),
294 is_id=(k == u'id'))
295 template_dict = dict((k, sanitize(k, v))
296 for k, v in template_dict.items())
297
298 tmpl = os.path.expanduser(self.params['outtmpl'])
299 filename = tmpl % template_dict
300 return filename
301 except KeyError as err:
302 self.report_error(u'Erroneous output template')
303 return None
304 except ValueError as err:
305 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
306 return None
307
308 def _match_entry(self, info_dict):
309 """ Returns None iff the file should be downloaded """
310
311 title = info_dict['title']
312 matchtitle = self.params.get('matchtitle', False)
313 if matchtitle:
314 if not re.search(matchtitle, title, re.IGNORECASE):
315 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
316 rejecttitle = self.params.get('rejecttitle', False)
317 if rejecttitle:
318 if re.search(rejecttitle, title, re.IGNORECASE):
319 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
320 date = info_dict.get('upload_date', None)
321 if date is not None:
322 dateRange = self.params.get('daterange', DateRange())
323 if date not in dateRange:
324 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
325 age_limit = self.params.get('age_limit')
326 if age_limit is not None:
327 if age_limit < info_dict.get('age_limit', 0):
328 return u'Skipping "' + title + '" because it is age restricted'
329 if self.in_download_archive(info_dict):
330 return (u'%(title)s has already been recorded in archive'
331 % info_dict)
332 return None
333
334 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
335 '''
336 Returns a list with a dictionary for each video we find.
337 If 'download', also downloads the videos.
338 extra_info is a dict containing the extra values to add to each result
339 '''
340
341 if ie_key:
342 ies = [self.get_info_extractor(ie_key)]
343 else:
344 ies = self._ies
345
346 for ie in ies:
347 if not ie.suitable(url):
348 continue
349
350 if not ie.working():
351 self.report_warning(u'The program functionality for this site has been marked as broken, '
352 u'and will probably not work.')
353
354 try:
355 ie_result = ie.extract(url)
356 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
357 break
358 if isinstance(ie_result, list):
359 # Backwards compatibility: old IE result format
360 for result in ie_result:
361 result.update(extra_info)
362 ie_result = {
363 '_type': 'compat_list',
364 'entries': ie_result,
365 }
366 else:
367 ie_result.update(extra_info)
368 if 'extractor' not in ie_result:
369 ie_result['extractor'] = ie.IE_NAME
370 return self.process_ie_result(ie_result, download=download)
371 except ExtractorError as de: # An error we somewhat expected
372 self.report_error(compat_str(de), de.format_traceback())
373 break
374 except Exception as e:
375 if self.params.get('ignoreerrors', False):
376 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
377 break
378 else:
379 raise
380 else:
381 self.report_error(u'no suitable InfoExtractor: %s' % url)
382
383 def process_ie_result(self, ie_result, download=True, extra_info={}):
384 """
385 Take the result of the ie(may be modified) and resolve all unresolved
386 references (URLs, playlist items).
387
388 It will also download the videos if 'download'.
389 Returns the resolved ie_result.
390 """
391
392 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
393 if result_type == 'video':
394 ie_result.update(extra_info)
395 return self.process_video_result(ie_result)
396 elif result_type == 'url':
397 # We have to add extra_info to the results because it may be
398 # contained in a playlist
399 return self.extract_info(ie_result['url'],
400 download,
401 ie_key=ie_result.get('ie_key'),
402 extra_info=extra_info)
403 elif result_type == 'playlist':
404 # We process each entry in the playlist
405 playlist = ie_result.get('title', None) or ie_result.get('id', None)
406 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
407
408 playlist_results = []
409
410 n_all_entries = len(ie_result['entries'])
411 playliststart = self.params.get('playliststart', 1) - 1
412 playlistend = self.params.get('playlistend', -1)
413
414 if playlistend == -1:
415 entries = ie_result['entries'][playliststart:]
416 else:
417 entries = ie_result['entries'][playliststart:playlistend]
418
419 n_entries = len(entries)
420
421 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
422 (ie_result['extractor'], playlist, n_all_entries, n_entries))
423
424 for i,entry in enumerate(entries,1):
425 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
426 extra = {
427 'playlist': playlist,
428 'playlist_index': i + playliststart,
429 }
430 if not 'extractor' in entry:
431 # We set the extractor, if it's an url it will be set then to
432 # the new extractor, but if it's already a video we must make
433 # sure it's present: see issue #877
434 entry['extractor'] = ie_result['extractor']
435 entry_result = self.process_ie_result(entry,
436 download=download,
437 extra_info=extra)
438 playlist_results.append(entry_result)
439 ie_result['entries'] = playlist_results
440 return ie_result
441 elif result_type == 'compat_list':
442 def _fixup(r):
443 r.setdefault('extractor', ie_result['extractor'])
444 return r
445 ie_result['entries'] = [
446 self.process_ie_result(_fixup(r), download=download)
447 for r in ie_result['entries']
448 ]
449 return ie_result
450 else:
451 raise Exception('Invalid result type: %s' % result_type)
452
453 def process_video_result(self, info_dict, download=True):
454 assert info_dict.get('_type', 'video') == 'video'
455
456 if 'playlist' not in info_dict:
457 # It isn't part of a playlist
458 info_dict['playlist'] = None
459 info_dict['playlist_index'] = None
460
461 # This extractors handle format selection themselves
462 if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
463 self.process_info(info_dict)
464 return info_dict
465
466 # We now pick which formats have to be downloaded
467 if info_dict.get('formats') is None:
468 # There's only one format available
469 formats = [info_dict]
470 else:
471 formats = info_dict['formats']
472
473 # We check that all the formats have the format and format_id fields
474 for (i, format) in enumerate(formats):
475 if format.get('format') is None:
476 if format.get('height') is not None:
477 if format.get('width') is not None:
478 format_desc = u'%sx%s' % (format['width'], format['height'])
479 else:
480 format_desc = u'%sp' % format['height']
481 else:
482 format_desc = '???'
483 format['format'] = format_desc
484 if format.get('format_id') is None:
485 format['format_id'] = compat_str(i)
486
487 if self.params.get('listformats', None):
488 self.list_formats(info_dict)
489 return
490
491 format_limit = self.params.get('format_limit', None)
492 if format_limit:
493 formats = list(takewhile_inclusive(
494 lambda f: f['format_id'] != format_limit, formats
495 ))
496 if self.params.get('prefer_free_formats'):
497 def _free_formats_key(f):
498 try:
499 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
500 except ValueError:
501 ext_ord = -1
502 # We only compare the extension if they have the same height and width
503 return (f.get('height'), f.get('width'), ext_ord)
504 formats = sorted(formats, key=_free_formats_key)
505
506 req_format = self.params.get('format', 'best')
507 formats_to_download = []
508 if req_format == 'best' or req_format is None:
509 formats_to_download = [formats[-1]]
510 elif req_format == 'worst':
511 formats_to_download = [formats[0]]
512 # The -1 is for supporting YoutubeIE
513 elif req_format in ('-1', 'all'):
514 formats_to_download = formats
515 else:
516 # We can accept formats requestd in the format: 34/10/5, we pick
517 # the first that is available, starting from left
518 req_formats = req_format.split('/')
519 for rf in req_formats:
520 matches = filter(lambda f:f['format_id'] == rf ,formats)
521 if matches:
522 formats_to_download = [matches[0]]
523 break
524 if not formats_to_download:
525 raise ExtractorError(u'requested format not available')
526
527 if download:
528 if len(formats_to_download) > 1:
529 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
530 for format in formats_to_download:
531 new_info = dict(info_dict)
532 new_info.update(format)
533 self.process_info(new_info)
534 # We update the info dict with the best quality format (backwards compatibility)
535 info_dict.update(formats_to_download[-1])
536 return info_dict
537
538 def process_info(self, info_dict):
539 """Process a single resolved IE result."""
540
541 assert info_dict.get('_type', 'video') == 'video'
542 #We increment the download the download count here to match the previous behaviour.
543 self.increment_downloads()
544
545 info_dict['fulltitle'] = info_dict['title']
546 if len(info_dict['title']) > 200:
547 info_dict['title'] = info_dict['title'][:197] + u'...'
548
549 # Keep for backwards compatibility
550 info_dict['stitle'] = info_dict['title']
551
552 if not 'format' in info_dict:
553 info_dict['format'] = info_dict['ext']
554
555 reason = self._match_entry(info_dict)
556 if reason is not None:
557 self.to_screen(u'[download] ' + reason)
558 return
559
560 max_downloads = self.params.get('max_downloads')
561 if max_downloads is not None:
562 if self._num_downloads > int(max_downloads):
563 raise MaxDownloadsReached()
564
565 filename = self.prepare_filename(info_dict)
566
567 # Forced printings
568 if self.params.get('forcetitle', False):
569 compat_print(info_dict['title'])
570 if self.params.get('forceid', False):
571 compat_print(info_dict['id'])
572 if self.params.get('forceurl', False):
573 # For RTMP URLs, also include the playpath
574 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
575 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
576 compat_print(info_dict['thumbnail'])
577 if self.params.get('forcedescription', False) and 'description' in info_dict:
578 compat_print(info_dict['description'])
579 if self.params.get('forcefilename', False) and filename is not None:
580 compat_print(filename)
581 if self.params.get('forceformat', False):
582 compat_print(info_dict['format'])
583
584 # Do nothing else if in simulate mode
585 if self.params.get('simulate', False):
586 return
587
588 if filename is None:
589 return
590
591 try:
592 dn = os.path.dirname(encodeFilename(filename))
593 if dn != '' and not os.path.exists(dn):
594 os.makedirs(dn)
595 except (OSError, IOError) as err:
596 self.report_error(u'unable to create directory ' + compat_str(err))
597 return
598
599 if self.params.get('writedescription', False):
600 try:
601 descfn = filename + u'.description'
602 self.report_writedescription(descfn)
603 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
604 descfile.write(info_dict['description'])
605 except (KeyError, TypeError):
606 self.report_warning(u'There\'s no description to write.')
607 except (OSError, IOError):
608 self.report_error(u'Cannot write description file ' + descfn)
609 return
610
611 if self.params.get('writeannotations', False):
612 try:
613 annofn = filename + u'.annotations.xml'
614 self.report_writeannotations(annofn)
615 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
616 annofile.write(info_dict['annotations'])
617 except (KeyError, TypeError):
618 self.report_warning(u'There are no annotations to write.')
619 except (OSError, IOError):
620 self.report_error(u'Cannot write annotations file: ' + annofn)
621 return
622
623 subtitles_are_requested = any([self.params.get('writesubtitles', False),
624 self.params.get('writeautomaticsub')])
625
626 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
627 # subtitles download errors are already managed as troubles in relevant IE
628 # that way it will silently go on when used with unsupporting IE
629 subtitles = info_dict['subtitles']
630 sub_format = self.params.get('subtitlesformat')
631 for sub_lang in subtitles.keys():
632 sub = subtitles[sub_lang]
633 if sub is None:
634 continue
635 try:
636 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
637 self.report_writesubtitles(sub_filename)
638 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
639 subfile.write(sub)
640 except (OSError, IOError):
641 self.report_error(u'Cannot write subtitles file ' + descfn)
642 return
643
644 if self.params.get('writeinfojson', False):
645 infofn = filename + u'.info.json'
646 self.report_writeinfojson(infofn)
647 try:
648 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
649 write_json_file(json_info_dict, encodeFilename(infofn))
650 except (OSError, IOError):
651 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
652 return
653
654 if self.params.get('writethumbnail', False):
655 if info_dict.get('thumbnail') is not None:
656 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
657 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
658 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
659 (info_dict['extractor'], info_dict['id']))
660 try:
661 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
662 with open(thumb_filename, 'wb') as thumbf:
663 shutil.copyfileobj(uf, thumbf)
664 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
665 (info_dict['extractor'], info_dict['id'], thumb_filename))
666 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
667 self.report_warning(u'Unable to download thumbnail "%s": %s' %
668 (info_dict['thumbnail'], compat_str(err)))
669
670 if not self.params.get('skip_download', False):
671 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
672 success = True
673 else:
674 try:
675 success = self.fd._do_download(filename, info_dict)
676 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
677 self.report_error(u'unable to download video data: %s' % str(err))
678 return
679 except (OSError, IOError) as err:
680 raise UnavailableVideoError(err)
681 except (ContentTooShortError, ) as err:
682 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
683 return
684
685 if success:
686 try:
687 self.post_process(filename, info_dict)
688 except (PostProcessingError) as err:
689 self.report_error(u'postprocessing: %s' % str(err))
690 return
691
692 self.record_download_archive(info_dict)
693
694 def download(self, url_list):
695 """Download a given list of URLs."""
696 if len(url_list) > 1 and self.fixed_template():
697 raise SameFileError(self.params['outtmpl'])
698
699 for url in url_list:
700 try:
701 #It also downloads the videos
702 videos = self.extract_info(url)
703 except UnavailableVideoError:
704 self.report_error(u'unable to download video')
705 except MaxDownloadsReached:
706 self.to_screen(u'[info] Maximum number of downloaded files reached.')
707 raise
708
709 return self._download_retcode
710
711 def post_process(self, filename, ie_info):
712 """Run all the postprocessors on the given file."""
713 info = dict(ie_info)
714 info['filepath'] = filename
715 keep_video = None
716 for pp in self._pps:
717 try:
718 keep_video_wish,new_info = pp.run(info)
719 if keep_video_wish is not None:
720 if keep_video_wish:
721 keep_video = keep_video_wish
722 elif keep_video is None:
723 # No clear decision yet, let IE decide
724 keep_video = keep_video_wish
725 except PostProcessingError as e:
726 self.report_error(e.msg)
727 if keep_video is False and not self.params.get('keepvideo', False):
728 try:
729 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
730 os.remove(encodeFilename(filename))
731 except (IOError, OSError):
732 self.report_warning(u'Unable to remove downloaded video file')
733
734 def in_download_archive(self, info_dict):
735 fn = self.params.get('download_archive')
736 if fn is None:
737 return False
738 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
739 try:
740 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
741 for line in archive_file:
742 if line.strip() == vid_id:
743 return True
744 except IOError as ioe:
745 if ioe.errno != errno.ENOENT:
746 raise
747 return False
748
749 def record_download_archive(self, info_dict):
750 fn = self.params.get('download_archive')
751 if fn is None:
752 return
753 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
754 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
755 archive_file.write(vid_id + u'\n')
756
757 def list_formats(self, info_dict):
758 formats_s = []
759 for format in info_dict.get('formats', [info_dict]):
760 formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'],
761 format['ext'],
762 format.get('format', '???'),
763 )
764 )
765 if len(formats_s) != 1:
766 formats_s[0] += ' (worst)'
767 formats_s[-1] += ' (best)'
768 formats_s = "\n".join(formats_s)
769 self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s))