]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
extractor: youtube: Set extension of AAC audio formats to m4a.
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de
JMF
7import io
8import os
9import re
10import shutil
11import socket
12import sys
13import time
14import traceback
15
16from .utils import *
023fa8c4 17from .extractor import get_info_extractor, gen_extractors
8222d8de
JMF
18from .FileDownloader import FileDownloader
19
20
21class YoutubeDL(object):
22 """YoutubeDL class.
23
24 YoutubeDL objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, YoutubeDL objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the YoutubeDL object handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 YoutubeDL process the extracted information, possibly using a File
37 Downloader to download the video.
38
39 YoutubeDL objects accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The YoutubeDL also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
c6c19746 50 videopassword: Password for acces a video.
8222d8de
JMF
51 usenetrc: Use netrc for authentication instead.
52 verbose: Print additional info to stdout.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forceid: Force printing ID.
57 forcethumbnail: Force printing thumbnail URL.
58 forcedescription: Force printing description.
59 forcefilename: Force printing final filename.
60 simulate: Do not download the video files.
61 format: Video format code.
62 format_limit: Highest quality format to try.
63 outtmpl: Template for output names.
64 restrictfilenames: Do not allow "&" and spaces in file names
65 ignoreerrors: Do not stop on download errors.
66 nooverwrites: Prevent overwriting files.
67 playliststart: Playlist item to start at.
68 playlistend: Playlist item to end at.
69 matchtitle: Download only matching titles.
70 rejecttitle: Reject downloads for matching titles.
71 logtostderr: Log messages to stderr instead of stdout.
72 writedescription: Write the video description to a .description file
73 writeinfojson: Write the video description to a .info.json file
1fb07d10 74 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
75 writethumbnail: Write the thumbnail image to a file
76 writesubtitles: Write the video subtitles to a file
b004821f 77 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 78 allsubtitles: Downloads all the subtitles of the video
0b7f3118 79 (requires writesubtitles or writeautomaticsub)
8222d8de 80 listsubtitles: Lists all available subtitles for the video
b98a6b2f 81 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 82 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
83 keepvideo: Keep the video file after post-processing
84 daterange: A DateRange object, download only if the upload_date is in the range.
85 skip_download: Skip the actual download of the video file
c35f9e72 86 cachedir: Location of the cache files in the filesystem.
c3c88a26 87 None to disable filesystem cache.
47192f92 88 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
89 age_limit: An integer representing the user's age in years.
90 Unsuitable videos for the given age are skipped.
c1c9a79c
PH
91 downloadarchive: File name of a file where all downloads are recorded.
92 Videos already present in the file are not downloaded
93 again.
8222d8de
JMF
94
95 The following parameters are not used by YoutubeDL itself, they are used by
96 the FileDownloader:
97 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
98 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 """
100
101 params = None
102 _ies = []
103 _pps = []
104 _download_retcode = None
105 _num_downloads = None
106 _screen_file = None
107
108 def __init__(self, params):
109 """Create a FileDownloader object with the given options."""
110 self._ies = []
56c73665 111 self._ies_instances = {}
8222d8de
JMF
112 self._pps = []
113 self._progress_hooks = []
114 self._download_retcode = 0
115 self._num_downloads = 0
116 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
34308b30
PH
117
118 if (sys.version_info >= (3,) and sys.platform != 'win32' and
119 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
120 and not params['restrictfilenames']):
121 # On Python 3, the Unicode filesystem API will throw errors (#1474)
122 self.report_warning(
1d368c75 123 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
124 u'cannot encode all charactes. '
125 u'Set the LC_ALL environment variable to fix this.')
126 params['restrictfilenames'] = True
127
8222d8de
JMF
128 self.params = params
129 self.fd = FileDownloader(self, self.params)
130
131 if '%(stitle)s' in self.params['outtmpl']:
132 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
133
134 def add_info_extractor(self, ie):
135 """Add an InfoExtractor object to the end of the list."""
136 self._ies.append(ie)
56c73665 137 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
138 ie.set_downloader(self)
139
56c73665
JMF
140 def get_info_extractor(self, ie_key):
141 """
142 Get an instance of an IE with name ie_key, it will try to get one from
143 the _ies list, if there's no instance it will create a new one and add
144 it to the extractor list.
145 """
146 ie = self._ies_instances.get(ie_key)
147 if ie is None:
148 ie = get_info_extractor(ie_key)()
149 self.add_info_extractor(ie)
150 return ie
151
023fa8c4
JMF
152 def add_default_info_extractors(self):
153 """
154 Add the InfoExtractors returned by gen_extractors to the end of the list
155 """
156 for ie in gen_extractors():
157 self.add_info_extractor(ie)
158
8222d8de
JMF
159 def add_post_processor(self, pp):
160 """Add a PostProcessor object to the end of the chain."""
161 self._pps.append(pp)
162 pp.set_downloader(self)
163
164 def to_screen(self, message, skip_eol=False):
165 """Print message to stdout if not in quiet mode."""
8222d8de
JMF
166 if not self.params.get('quiet', False):
167 terminator = [u'\n', u''][skip_eol]
168 output = message + terminator
7459e3a2 169 write_string(output, self._screen_file)
8222d8de
JMF
170
171 def to_stderr(self, message):
172 """Print message to stderr."""
173 assert type(message) == type(u'')
174 output = message + u'\n'
175 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
176 output = output.encode(preferredencoding())
177 sys.stderr.write(output)
178
179 def fixed_template(self):
180 """Checks if the output template is fixed."""
181 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
182
183 def trouble(self, message=None, tb=None):
184 """Determine action to take when a download problem appears.
185
186 Depending on if the downloader has been configured to ignore
187 download errors or not, this method may throw an exception or
188 not when errors are found, after printing the message.
189
190 tb, if given, is additional traceback information.
191 """
192 if message is not None:
193 self.to_stderr(message)
194 if self.params.get('verbose'):
195 if tb is None:
196 if sys.exc_info()[0]: # if .trouble has been called from an except block
197 tb = u''
198 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
199 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
200 tb += compat_str(traceback.format_exc())
201 else:
202 tb_data = traceback.format_list(traceback.extract_stack())
203 tb = u''.join(tb_data)
204 self.to_stderr(tb)
205 if not self.params.get('ignoreerrors', False):
206 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
207 exc_info = sys.exc_info()[1].exc_info
208 else:
209 exc_info = sys.exc_info()
210 raise DownloadError(message, exc_info)
211 self._download_retcode = 1
212
213 def report_warning(self, message):
214 '''
215 Print the message to stderr, it will be prefixed with 'WARNING:'
216 If stderr is a tty file the 'WARNING:' will be colored
217 '''
218 if sys.stderr.isatty() and os.name != 'nt':
219 _msg_header=u'\033[0;33mWARNING:\033[0m'
220 else:
221 _msg_header=u'WARNING:'
222 warning_message=u'%s %s' % (_msg_header,message)
223 self.to_stderr(warning_message)
224
225 def report_error(self, message, tb=None):
226 '''
227 Do the same as trouble, but prefixes the message with 'ERROR:', colored
228 in red if stderr is a tty file.
229 '''
230 if sys.stderr.isatty() and os.name != 'nt':
231 _msg_header = u'\033[0;31mERROR:\033[0m'
232 else:
233 _msg_header = u'ERROR:'
234 error_message = u'%s %s' % (_msg_header, message)
235 self.trouble(error_message, tb)
236
237 def slow_down(self, start_time, byte_counter):
238 """Sleep if the download speed is over the rate limit."""
239 rate_limit = self.params.get('ratelimit', None)
240 if rate_limit is None or byte_counter == 0:
241 return
242 now = time.time()
243 elapsed = now - start_time
244 if elapsed <= 0.0:
245 return
246 speed = float(byte_counter) / elapsed
247 if speed > rate_limit:
248 time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
249
250 def report_writedescription(self, descfn):
251 """ Report that the description file is being written """
252 self.to_screen(u'[info] Writing video description to: ' + descfn)
253
254 def report_writesubtitles(self, sub_filename):
255 """ Report that the subtitles file is being written """
256 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
257
258 def report_writeinfojson(self, infofn):
259 """ Report that the metadata file has been written """
260 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
261
1fb07d10
JG
262 def report_writeannotations(self, annofn):
263 """ Report that the annotations file has been written. """
264 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
265
8222d8de
JMF
266 def report_file_already_downloaded(self, file_name):
267 """Report file has already been fully downloaded."""
268 try:
269 self.to_screen(u'[download] %s has already been downloaded' % file_name)
270 except (UnicodeEncodeError) as err:
271 self.to_screen(u'[download] The file has already been downloaded')
272
273 def increment_downloads(self):
274 """Increment the ordinal that assigns a number to each file."""
275 self._num_downloads += 1
276
277 def prepare_filename(self, info_dict):
278 """Generate the output filename."""
279 try:
280 template_dict = dict(info_dict)
281
282 template_dict['epoch'] = int(time.time())
283 autonumber_size = self.params.get('autonumber_size')
284 if autonumber_size is None:
285 autonumber_size = 5
286 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
287 template_dict['autonumber'] = autonumber_templ % self._num_downloads
288 if template_dict['playlist_index'] is not None:
289 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
290
291 sanitize = lambda k,v: sanitize_filename(
292 u'NA' if v is None else compat_str(v),
293 restricted=self.params.get('restrictfilenames'),
294 is_id=(k==u'id'))
295 template_dict = dict((k, sanitize(k, v)) for k,v in template_dict.items())
296
297 filename = self.params['outtmpl'] % template_dict
298 return filename
299 except KeyError as err:
300 self.report_error(u'Erroneous output template')
301 return None
302 except ValueError as err:
4efba05c 303 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
304 return None
305
306 def _match_entry(self, info_dict):
307 """ Returns None iff the file should be downloaded """
308
309 title = info_dict['title']
310 matchtitle = self.params.get('matchtitle', False)
311 if matchtitle:
312 if not re.search(matchtitle, title, re.IGNORECASE):
313 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
314 rejecttitle = self.params.get('rejecttitle', False)
315 if rejecttitle:
316 if re.search(rejecttitle, title, re.IGNORECASE):
317 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
318 date = info_dict.get('upload_date', None)
319 if date is not None:
320 dateRange = self.params.get('daterange', DateRange())
321 if date not in dateRange:
322 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
323 age_limit = self.params.get('age_limit')
324 if age_limit is not None:
cfadd183 325 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 326 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 327 if self.in_download_archive(info_dict):
ee6c9f95 328 return (u'%(title)s has already been recorded in archive'
c1c9a79c 329 % info_dict)
8222d8de
JMF
330 return None
331
332 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
333 '''
334 Returns a list with a dictionary for each video we find.
335 If 'download', also downloads the videos.
336 extra_info is a dict containing the extra values to add to each result
337 '''
338
339 if ie_key:
56c73665 340 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
341 else:
342 ies = self._ies
343
344 for ie in ies:
345 if not ie.suitable(url):
346 continue
347
348 if not ie.working():
349 self.report_warning(u'The program functionality for this site has been marked as broken, '
350 u'and will probably not work.')
351
352 try:
353 ie_result = ie.extract(url)
354 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
355 break
356 if isinstance(ie_result, list):
357 # Backwards compatibility: old IE result format
358 for result in ie_result:
359 result.update(extra_info)
360 ie_result = {
361 '_type': 'compat_list',
362 'entries': ie_result,
363 }
364 else:
365 ie_result.update(extra_info)
366 if 'extractor' not in ie_result:
367 ie_result['extractor'] = ie.IE_NAME
368 return self.process_ie_result(ie_result, download=download)
369 except ExtractorError as de: # An error we somewhat expected
370 self.report_error(compat_str(de), de.format_traceback())
371 break
372 except Exception as e:
373 if self.params.get('ignoreerrors', False):
374 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
375 break
376 else:
377 raise
378 else:
379 self.report_error(u'no suitable InfoExtractor: %s' % url)
380
381 def process_ie_result(self, ie_result, download=True, extra_info={}):
382 """
383 Take the result of the ie(may be modified) and resolve all unresolved
384 references (URLs, playlist items).
385
386 It will also download the videos if 'download'.
387 Returns the resolved ie_result.
388 """
389
390 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
391 if result_type == 'video':
a4311547 392 ie_result.update(extra_info)
dd82ffea 393 return self.process_video_result(ie_result)
8222d8de
JMF
394 elif result_type == 'url':
395 # We have to add extra_info to the results because it may be
396 # contained in a playlist
397 return self.extract_info(ie_result['url'],
398 download,
399 ie_key=ie_result.get('ie_key'),
400 extra_info=extra_info)
401 elif result_type == 'playlist':
402 # We process each entry in the playlist
403 playlist = ie_result.get('title', None) or ie_result.get('id', None)
404 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
405
406 playlist_results = []
407
408 n_all_entries = len(ie_result['entries'])
409 playliststart = self.params.get('playliststart', 1) - 1
410 playlistend = self.params.get('playlistend', -1)
411
412 if playlistend == -1:
413 entries = ie_result['entries'][playliststart:]
414 else:
415 entries = ie_result['entries'][playliststart:playlistend]
416
417 n_entries = len(entries)
418
419 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
420 (ie_result['extractor'], playlist, n_all_entries, n_entries))
421
422 for i,entry in enumerate(entries,1):
423 self.to_screen(u'[download] Downloading video #%s of %s' %(i, n_entries))
424 extra = {
425 'playlist': playlist,
426 'playlist_index': i + playliststart,
427 }
428 if not 'extractor' in entry:
429 # We set the extractor, if it's an url it will be set then to
430 # the new extractor, but if it's already a video we must make
431 # sure it's present: see issue #877
432 entry['extractor'] = ie_result['extractor']
433 entry_result = self.process_ie_result(entry,
434 download=download,
435 extra_info=extra)
436 playlist_results.append(entry_result)
437 ie_result['entries'] = playlist_results
438 return ie_result
439 elif result_type == 'compat_list':
440 def _fixup(r):
441 r.setdefault('extractor', ie_result['extractor'])
442 return r
443 ie_result['entries'] = [
444 self.process_ie_result(_fixup(r), download=download)
445 for r in ie_result['entries']
446 ]
447 return ie_result
448 else:
449 raise Exception('Invalid result type: %s' % result_type)
450
dd82ffea
JMF
451 def process_video_result(self, info_dict, download=True):
452 assert info_dict.get('_type', 'video') == 'video'
453
454 if 'playlist' not in info_dict:
455 # It isn't part of a playlist
456 info_dict['playlist'] = None
457 info_dict['playlist_index'] = None
458
6ff000b8
JMF
459 # This extractors handle format selection themselves
460 if info_dict['extractor'] in [u'youtube', u'Youku', u'YouPorn', u'mixcloud']:
461 self.process_info(info_dict)
462 return info_dict
463
dd82ffea
JMF
464 # We now pick which formats have to be downloaded
465 if info_dict.get('formats') is None:
466 # There's only one format available
467 formats = [info_dict]
468 else:
469 formats = info_dict['formats']
470
471 # We check that all the formats have the format and format_id fields
472 for (i, format) in enumerate(formats):
473 if format.get('format') is None:
79819f58
JMF
474 if format.get('height') is not None:
475 if format.get('width') is not None:
476 format_desc = u'%sx%s' % (format['width'], format['height'])
477 else:
478 format_desc = u'%sp' % format['height']
479 else:
8016c922 480 format_desc = '???'
79819f58 481 format['format'] = format_desc
dd82ffea 482 if format.get('format_id') is None:
8016c922 483 format['format_id'] = compat_str(i)
dd82ffea
JMF
484
485 if self.params.get('listformats', None):
486 self.list_formats(info_dict)
487 return
488
99e206d5
JMF
489 format_limit = self.params.get('format_limit', None)
490 if format_limit:
f4d96df0
PH
491 formats = list(takewhile_inclusive(
492 lambda f: f['format_id'] != format_limit, formats
493 ))
e028d0d1
JMF
494 if self.params.get('prefer_free_formats'):
495 def _free_formats_key(f):
496 try:
497 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
498 except ValueError:
499 ext_ord = -1
500 # We only compare the extension if they have the same height and width
501 return (f.get('height'), f.get('width'), ext_ord)
502 formats = sorted(formats, key=_free_formats_key)
99e206d5 503
dd82ffea
JMF
504 req_format = self.params.get('format', 'best')
505 formats_to_download = []
506 if req_format == 'best' or req_format is None:
507 formats_to_download = [formats[-1]]
508 elif req_format == 'worst':
509 formats_to_download = [formats[0]]
510 # The -1 is for supporting YoutubeIE
511 elif req_format in ('-1', 'all'):
512 formats_to_download = formats
513 else:
514 # We can accept formats requestd in the format: 34/10/5, we pick
416a5efc 515 # the first that is available, starting from left
dd82ffea
JMF
516 req_formats = req_format.split('/')
517 for rf in req_formats:
518 matches = filter(lambda f:f['format_id'] == rf ,formats)
519 if matches:
520 formats_to_download = [matches[0]]
521 break
522 if not formats_to_download:
523 raise ExtractorError(u'requested format not available')
524
525 if download:
526 if len(formats_to_download) > 1:
527 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
528 for format in formats_to_download:
529 new_info = dict(info_dict)
530 new_info.update(format)
531 self.process_info(new_info)
532 # We update the info dict with the best quality format (backwards compatibility)
533 info_dict.update(formats_to_download[-1])
534 return info_dict
535
8222d8de
JMF
536 def process_info(self, info_dict):
537 """Process a single resolved IE result."""
538
539 assert info_dict.get('_type', 'video') == 'video'
540 #We increment the download the download count here to match the previous behaviour.
541 self.increment_downloads()
542
543 info_dict['fulltitle'] = info_dict['title']
544 if len(info_dict['title']) > 200:
545 info_dict['title'] = info_dict['title'][:197] + u'...'
546
547 # Keep for backwards compatibility
548 info_dict['stitle'] = info_dict['title']
549
550 if not 'format' in info_dict:
551 info_dict['format'] = info_dict['ext']
552
553 reason = self._match_entry(info_dict)
554 if reason is not None:
555 self.to_screen(u'[download] ' + reason)
556 return
557
558 max_downloads = self.params.get('max_downloads')
559 if max_downloads is not None:
560 if self._num_downloads > int(max_downloads):
561 raise MaxDownloadsReached()
562
563 filename = self.prepare_filename(info_dict)
564
565 # Forced printings
566 if self.params.get('forcetitle', False):
567 compat_print(info_dict['title'])
568 if self.params.get('forceid', False):
569 compat_print(info_dict['id'])
570 if self.params.get('forceurl', False):
edde6c56
PH
571 # For RTMP URLs, also include the playpath
572 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
8222d8de
JMF
573 if self.params.get('forcethumbnail', False) and 'thumbnail' in info_dict:
574 compat_print(info_dict['thumbnail'])
575 if self.params.get('forcedescription', False) and 'description' in info_dict:
576 compat_print(info_dict['description'])
577 if self.params.get('forcefilename', False) and filename is not None:
578 compat_print(filename)
579 if self.params.get('forceformat', False):
580 compat_print(info_dict['format'])
581
582 # Do nothing else if in simulate mode
583 if self.params.get('simulate', False):
584 return
585
586 if filename is None:
587 return
588
589 try:
590 dn = os.path.dirname(encodeFilename(filename))
591 if dn != '' and not os.path.exists(dn):
592 os.makedirs(dn)
593 except (OSError, IOError) as err:
594 self.report_error(u'unable to create directory ' + compat_str(err))
595 return
596
597 if self.params.get('writedescription', False):
598 try:
599 descfn = filename + u'.description'
600 self.report_writedescription(descfn)
601 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
602 descfile.write(info_dict['description'])
b3f0e530 603 except (KeyError, TypeError):
535f59bb 604 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
605 except (OSError, IOError):
606 self.report_error(u'Cannot write description file ' + descfn)
607 return
608
1fb07d10
JG
609 if self.params.get('writeannotations', False):
610 try:
611 annofn = filename + u'.annotations.xml'
612 self.report_writeannotations(annofn)
613 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
614 annofile.write(info_dict['annotations'])
615 except (KeyError, TypeError):
616 self.report_warning(u'There are no annotations to write.')
617 except (OSError, IOError):
618 self.report_error(u'Cannot write annotations file: ' + annofn)
619 return
620
c4a91be7 621 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 622 self.params.get('writeautomaticsub')])
c4a91be7
JMF
623
624 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
625 # subtitles download errors are already managed as troubles in relevant IE
626 # that way it will silently go on when used with unsupporting IE
8222d8de 627 subtitles = info_dict['subtitles']
8222d8de 628 sub_format = self.params.get('subtitlesformat')
5d51a883
JMF
629 for sub_lang in subtitles.keys():
630 sub = subtitles[sub_lang]
6804038d
JMF
631 if sub is None:
632 continue
8222d8de 633 try:
d4051a8e 634 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
635 self.report_writesubtitles(sub_filename)
636 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 637 subfile.write(sub)
8222d8de
JMF
638 except (OSError, IOError):
639 self.report_error(u'Cannot write subtitles file ' + descfn)
640 return
641
8222d8de
JMF
642 if self.params.get('writeinfojson', False):
643 infofn = filename + u'.info.json'
644 self.report_writeinfojson(infofn)
645 try:
646 json_info_dict = dict((k, v) for k,v in info_dict.items() if not k in ['urlhandle'])
647 write_json_file(json_info_dict, encodeFilename(infofn))
648 except (OSError, IOError):
649 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
650 return
651
652 if self.params.get('writethumbnail', False):
d8269e1d 653 if info_dict.get('thumbnail') is not None:
cbdbb766 654 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
655 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
656 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
657 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
658 try:
659 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
660 with open(thumb_filename, 'wb') as thumbf:
661 shutil.copyfileobj(uf, thumbf)
662 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
663 (info_dict['extractor'], info_dict['id'], thumb_filename))
664 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
665 self.report_warning(u'Unable to download thumbnail "%s": %s' %
666 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
667
668 if not self.params.get('skip_download', False):
669 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
670 success = True
671 else:
672 try:
673 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
674 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
675 self.report_error(u'unable to download video data: %s' % str(err))
676 return
c40c6aaa
JMF
677 except (OSError, IOError) as err:
678 raise UnavailableVideoError(err)
8222d8de
JMF
679 except (ContentTooShortError, ) as err:
680 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
681 return
682
683 if success:
684 try:
685 self.post_process(filename, info_dict)
686 except (PostProcessingError) as err:
687 self.report_error(u'postprocessing: %s' % str(err))
688 return
689
c1c9a79c
PH
690 self.record_download_archive(info_dict)
691
8222d8de
JMF
692 def download(self, url_list):
693 """Download a given list of URLs."""
694 if len(url_list) > 1 and self.fixed_template():
695 raise SameFileError(self.params['outtmpl'])
696
697 for url in url_list:
698 try:
699 #It also downloads the videos
700 videos = self.extract_info(url)
701 except UnavailableVideoError:
702 self.report_error(u'unable to download video')
703 except MaxDownloadsReached:
704 self.to_screen(u'[info] Maximum number of downloaded files reached.')
705 raise
706
707 return self._download_retcode
708
709 def post_process(self, filename, ie_info):
710 """Run all the postprocessors on the given file."""
711 info = dict(ie_info)
712 info['filepath'] = filename
713 keep_video = None
714 for pp in self._pps:
715 try:
716 keep_video_wish,new_info = pp.run(info)
717 if keep_video_wish is not None:
718 if keep_video_wish:
719 keep_video = keep_video_wish
720 elif keep_video is None:
721 # No clear decision yet, let IE decide
722 keep_video = keep_video_wish
723 except PostProcessingError as e:
bbcbf4d4 724 self.report_error(e.msg)
8222d8de
JMF
725 if keep_video is False and not self.params.get('keepvideo', False):
726 try:
727 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
728 os.remove(encodeFilename(filename))
729 except (IOError, OSError):
730 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c
PH
731
732 def in_download_archive(self, info_dict):
733 fn = self.params.get('download_archive')
734 if fn is None:
735 return False
736 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
737 try:
738 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
739 for line in archive_file:
740 if line.strip() == vid_id:
741 return True
742 except IOError as ioe:
743 if ioe.errno != errno.ENOENT:
744 raise
745 return False
746
747 def record_download_archive(self, info_dict):
748 fn = self.params.get('download_archive')
749 if fn is None:
750 return
751 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
752 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
753 archive_file.write(vid_id + u'\n')
dd82ffea
JMF
754
755 def list_formats(self, info_dict):
756 formats_s = []
757 for format in info_dict.get('formats', [info_dict]):
758 formats_s.append("%s\t:\t%s\t[%s]" % (format['format_id'],
759 format['ext'],
760 format.get('format', '???'),
761 )
762 )
763 if len(formats_s) != 1:
764 formats_s[0] += ' (worst)'
765 formats_s[-1] += ' (best)'
766 formats_s = "\n".join(formats_s)
767 self.to_screen(u"[info] Available formats for %s:\nformat code\textension\n%s" % (info_dict['id'], formats_s))