]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Add the 'webpage_url' field to info_dict
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
4from __future__ import absolute_import
5
c1c9a79c 6import errno
8222d8de
JMF
7import io
8import os
9import re
10import shutil
11import socket
12import sys
13import time
14import traceback
15
16from .utils import *
023fa8c4 17from .extractor import get_info_extractor, gen_extractors
8222d8de
JMF
18from .FileDownloader import FileDownloader
19
20
21class YoutubeDL(object):
22 """YoutubeDL class.
23
24 YoutubeDL objects are the ones responsible of downloading the
25 actual video file and writing it to disk if the user has requested
26 it, among some other tasks. In most cases there should be one per
27 program. As, given a video URL, the downloader doesn't know how to
28 extract all the needed information, task that InfoExtractors do, it
29 has to pass the URL to one of them.
30
31 For this, YoutubeDL objects have a method that allows
32 InfoExtractors to be registered in a given order. When it is passed
33 a URL, the YoutubeDL object handles it to the first InfoExtractor it
34 finds that reports being able to handle it. The InfoExtractor extracts
35 all the information about the video or videos the URL refers to, and
36 YoutubeDL process the extracted information, possibly using a File
37 Downloader to download the video.
38
39 YoutubeDL objects accept a lot of parameters. In order not to saturate
40 the object constructor with arguments, it receives a dictionary of
41 options instead. These options are available through the params
42 attribute for the InfoExtractors to use. The YoutubeDL also
43 registers itself as the downloader in charge for the InfoExtractors
44 that are added to it, so this is a "mutual registration".
45
46 Available options:
47
48 username: Username for authentication purposes.
49 password: Password for authentication purposes.
c6c19746 50 videopassword: Password for acces a video.
8222d8de
JMF
51 usenetrc: Use netrc for authentication instead.
52 verbose: Print additional info to stdout.
53 quiet: Do not print messages to stdout.
54 forceurl: Force printing final URL.
55 forcetitle: Force printing title.
56 forceid: Force printing ID.
57 forcethumbnail: Force printing thumbnail URL.
58 forcedescription: Force printing description.
59 forcefilename: Force printing final filename.
60 simulate: Do not download the video files.
61 format: Video format code.
62 format_limit: Highest quality format to try.
63 outtmpl: Template for output names.
64 restrictfilenames: Do not allow "&" and spaces in file names
65 ignoreerrors: Do not stop on download errors.
66 nooverwrites: Prevent overwriting files.
67 playliststart: Playlist item to start at.
68 playlistend: Playlist item to end at.
69 matchtitle: Download only matching titles.
70 rejecttitle: Reject downloads for matching titles.
71 logtostderr: Log messages to stderr instead of stdout.
72 writedescription: Write the video description to a .description file
73 writeinfojson: Write the video description to a .info.json file
1fb07d10 74 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
75 writethumbnail: Write the thumbnail image to a file
76 writesubtitles: Write the video subtitles to a file
b004821f 77 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 78 allsubtitles: Downloads all the subtitles of the video
0b7f3118 79 (requires writesubtitles or writeautomaticsub)
8222d8de 80 listsubtitles: Lists all available subtitles for the video
b98a6b2f 81 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 82 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
83 keepvideo: Keep the video file after post-processing
84 daterange: A DateRange object, download only if the upload_date is in the range.
85 skip_download: Skip the actual download of the video file
c35f9e72 86 cachedir: Location of the cache files in the filesystem.
c3c88a26 87 None to disable filesystem cache.
47192f92 88 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
89 age_limit: An integer representing the user's age in years.
90 Unsuitable videos for the given age are skipped.
c1c9a79c
PH
91 downloadarchive: File name of a file where all downloads are recorded.
92 Videos already present in the file are not downloaded
93 again.
fe7e0c98 94
8222d8de
JMF
95 The following parameters are not used by YoutubeDL itself, they are used by
96 the FileDownloader:
97 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
98 noresizebuffer, retries, continuedl, noprogress, consoletitle
99 """
100
101 params = None
102 _ies = []
103 _pps = []
104 _download_retcode = None
105 _num_downloads = None
106 _screen_file = None
107
108 def __init__(self, params):
109 """Create a FileDownloader object with the given options."""
110 self._ies = []
56c73665 111 self._ies_instances = {}
8222d8de
JMF
112 self._pps = []
113 self._progress_hooks = []
114 self._download_retcode = 0
115 self._num_downloads = 0
116 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
34308b30
PH
117
118 if (sys.version_info >= (3,) and sys.platform != 'win32' and
119 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
120 and not params['restrictfilenames']):
121 # On Python 3, the Unicode filesystem API will throw errors (#1474)
122 self.report_warning(
1d368c75 123 u'Assuming --restrict-filenames since file system encoding '
34308b30
PH
124 u'cannot encode all charactes. '
125 u'Set the LC_ALL environment variable to fix this.')
126 params['restrictfilenames'] = True
127
8222d8de
JMF
128 self.params = params
129 self.fd = FileDownloader(self, self.params)
130
131 if '%(stitle)s' in self.params['outtmpl']:
132 self.report_warning(u'%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
133
134 def add_info_extractor(self, ie):
135 """Add an InfoExtractor object to the end of the list."""
136 self._ies.append(ie)
56c73665 137 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
138 ie.set_downloader(self)
139
56c73665
JMF
140 def get_info_extractor(self, ie_key):
141 """
142 Get an instance of an IE with name ie_key, it will try to get one from
143 the _ies list, if there's no instance it will create a new one and add
144 it to the extractor list.
145 """
146 ie = self._ies_instances.get(ie_key)
147 if ie is None:
148 ie = get_info_extractor(ie_key)()
149 self.add_info_extractor(ie)
150 return ie
151
023fa8c4
JMF
152 def add_default_info_extractors(self):
153 """
154 Add the InfoExtractors returned by gen_extractors to the end of the list
155 """
156 for ie in gen_extractors():
157 self.add_info_extractor(ie)
158
8222d8de
JMF
159 def add_post_processor(self, pp):
160 """Add a PostProcessor object to the end of the chain."""
161 self._pps.append(pp)
162 pp.set_downloader(self)
163
164 def to_screen(self, message, skip_eol=False):
165 """Print message to stdout if not in quiet mode."""
8222d8de
JMF
166 if not self.params.get('quiet', False):
167 terminator = [u'\n', u''][skip_eol]
168 output = message + terminator
7459e3a2 169 write_string(output, self._screen_file)
8222d8de
JMF
170
171 def to_stderr(self, message):
172 """Print message to stderr."""
173 assert type(message) == type(u'')
174 output = message + u'\n'
175 if 'b' in getattr(self._screen_file, 'mode', '') or sys.version_info[0] < 3: # Python 2 lies about the mode of sys.stdout/sys.stderr
176 output = output.encode(preferredencoding())
177 sys.stderr.write(output)
178
179 def fixed_template(self):
180 """Checks if the output template is fixed."""
181 return (re.search(u'(?u)%\\(.+?\\)s', self.params['outtmpl']) is None)
182
183 def trouble(self, message=None, tb=None):
184 """Determine action to take when a download problem appears.
185
186 Depending on if the downloader has been configured to ignore
187 download errors or not, this method may throw an exception or
188 not when errors are found, after printing the message.
189
190 tb, if given, is additional traceback information.
191 """
192 if message is not None:
193 self.to_stderr(message)
194 if self.params.get('verbose'):
195 if tb is None:
196 if sys.exc_info()[0]: # if .trouble has been called from an except block
197 tb = u''
198 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
199 tb += u''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
200 tb += compat_str(traceback.format_exc())
201 else:
202 tb_data = traceback.format_list(traceback.extract_stack())
203 tb = u''.join(tb_data)
204 self.to_stderr(tb)
205 if not self.params.get('ignoreerrors', False):
206 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
207 exc_info = sys.exc_info()[1].exc_info
208 else:
209 exc_info = sys.exc_info()
210 raise DownloadError(message, exc_info)
211 self._download_retcode = 1
212
213 def report_warning(self, message):
214 '''
215 Print the message to stderr, it will be prefixed with 'WARNING:'
216 If stderr is a tty file the 'WARNING:' will be colored
217 '''
218 if sys.stderr.isatty() and os.name != 'nt':
fe7e0c98 219 _msg_header = u'\033[0;33mWARNING:\033[0m'
8222d8de 220 else:
fe7e0c98
JMF
221 _msg_header = u'WARNING:'
222 warning_message = u'%s %s' % (_msg_header, message)
8222d8de
JMF
223 self.to_stderr(warning_message)
224
225 def report_error(self, message, tb=None):
226 '''
227 Do the same as trouble, but prefixes the message with 'ERROR:', colored
228 in red if stderr is a tty file.
229 '''
230 if sys.stderr.isatty() and os.name != 'nt':
231 _msg_header = u'\033[0;31mERROR:\033[0m'
232 else:
233 _msg_header = u'ERROR:'
234 error_message = u'%s %s' % (_msg_header, message)
235 self.trouble(error_message, tb)
236
8222d8de
JMF
237 def report_writedescription(self, descfn):
238 """ Report that the description file is being written """
239 self.to_screen(u'[info] Writing video description to: ' + descfn)
240
241 def report_writesubtitles(self, sub_filename):
242 """ Report that the subtitles file is being written """
243 self.to_screen(u'[info] Writing video subtitles to: ' + sub_filename)
244
245 def report_writeinfojson(self, infofn):
246 """ Report that the metadata file has been written """
247 self.to_screen(u'[info] Video description metadata as JSON to: ' + infofn)
248
1fb07d10
JG
249 def report_writeannotations(self, annofn):
250 """ Report that the annotations file has been written. """
251 self.to_screen(u'[info] Writing video annotations to: ' + annofn)
252
8222d8de
JMF
253 def report_file_already_downloaded(self, file_name):
254 """Report file has already been fully downloaded."""
255 try:
256 self.to_screen(u'[download] %s has already been downloaded' % file_name)
257 except (UnicodeEncodeError) as err:
258 self.to_screen(u'[download] The file has already been downloaded')
259
260 def increment_downloads(self):
261 """Increment the ordinal that assigns a number to each file."""
262 self._num_downloads += 1
263
264 def prepare_filename(self, info_dict):
265 """Generate the output filename."""
266 try:
267 template_dict = dict(info_dict)
268
269 template_dict['epoch'] = int(time.time())
270 autonumber_size = self.params.get('autonumber_size')
271 if autonumber_size is None:
272 autonumber_size = 5
273 autonumber_templ = u'%0' + str(autonumber_size) + u'd'
274 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 275 if template_dict.get('playlist_index') is not None:
8222d8de
JMF
276 template_dict['playlist_index'] = u'%05d' % template_dict['playlist_index']
277
586a91b6 278 sanitize = lambda k, v: sanitize_filename(
8222d8de
JMF
279 u'NA' if v is None else compat_str(v),
280 restricted=self.params.get('restrictfilenames'),
586a91b6
PH
281 is_id=(k == u'id'))
282 template_dict = dict((k, sanitize(k, v))
283 for k, v in template_dict.items())
8222d8de 284
586a91b6
PH
285 tmpl = os.path.expanduser(self.params['outtmpl'])
286 filename = tmpl % template_dict
8222d8de
JMF
287 return filename
288 except KeyError as err:
289 self.report_error(u'Erroneous output template')
290 return None
291 except ValueError as err:
4efba05c 292 self.report_error(u'Error in output template: ' + str(err) + u' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
293 return None
294
295 def _match_entry(self, info_dict):
296 """ Returns None iff the file should be downloaded """
297
298 title = info_dict['title']
299 matchtitle = self.params.get('matchtitle', False)
300 if matchtitle:
301 if not re.search(matchtitle, title, re.IGNORECASE):
302 return u'[download] "' + title + '" title did not match pattern "' + matchtitle + '"'
303 rejecttitle = self.params.get('rejecttitle', False)
304 if rejecttitle:
305 if re.search(rejecttitle, title, re.IGNORECASE):
306 return u'"' + title + '" title matched reject pattern "' + rejecttitle + '"'
307 date = info_dict.get('upload_date', None)
308 if date is not None:
309 dateRange = self.params.get('daterange', DateRange())
310 if date not in dateRange:
311 return u'[download] %s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
8dbe9899
PH
312 age_limit = self.params.get('age_limit')
313 if age_limit is not None:
cfadd183 314 if age_limit < info_dict.get('age_limit', 0):
8dbe9899 315 return u'Skipping "' + title + '" because it is age restricted'
c1c9a79c 316 if self.in_download_archive(info_dict):
ee6c9f95 317 return (u'%(title)s has already been recorded in archive'
c1c9a79c 318 % info_dict)
8222d8de 319 return None
fe7e0c98 320
b6c45014
JMF
321 @staticmethod
322 def add_extra_info(info_dict, extra_info):
323 '''Set the keys from extra_info in info dict if they are missing'''
324 for key, value in extra_info.items():
325 info_dict.setdefault(key, value)
326
8222d8de
JMF
327 def extract_info(self, url, download=True, ie_key=None, extra_info={}):
328 '''
329 Returns a list with a dictionary for each video we find.
330 If 'download', also downloads the videos.
331 extra_info is a dict containing the extra values to add to each result
332 '''
fe7e0c98 333
8222d8de 334 if ie_key:
56c73665 335 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
336 else:
337 ies = self._ies
338
339 for ie in ies:
340 if not ie.suitable(url):
341 continue
342
343 if not ie.working():
344 self.report_warning(u'The program functionality for this site has been marked as broken, '
345 u'and will probably not work.')
346
347 try:
348 ie_result = ie.extract(url)
349 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
350 break
351 if isinstance(ie_result, list):
352 # Backwards compatibility: old IE result format
8222d8de
JMF
353 ie_result = {
354 '_type': 'compat_list',
355 'entries': ie_result,
356 }
9103bbc5
JMF
357 self.add_extra_info(ie_result,
358 {
359 'extractor': ie.IE_NAME,
360 'webpage_url': url
361 })
b6c45014 362 return self.process_ie_result(ie_result, download, extra_info)
8222d8de
JMF
363 except ExtractorError as de: # An error we somewhat expected
364 self.report_error(compat_str(de), de.format_traceback())
365 break
366 except Exception as e:
367 if self.params.get('ignoreerrors', False):
368 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
369 break
370 else:
371 raise
372 else:
373 self.report_error(u'no suitable InfoExtractor: %s' % url)
fe7e0c98 374
8222d8de
JMF
375 def process_ie_result(self, ie_result, download=True, extra_info={}):
376 """
377 Take the result of the ie(may be modified) and resolve all unresolved
378 references (URLs, playlist items).
379
380 It will also download the videos if 'download'.
381 Returns the resolved ie_result.
382 """
383
384 result_type = ie_result.get('_type', 'video') # If not given we suppose it's a video, support the default old system
385 if result_type == 'video':
b6c45014 386 self.add_extra_info(ie_result, extra_info)
dd82ffea 387 return self.process_video_result(ie_result)
8222d8de
JMF
388 elif result_type == 'url':
389 # We have to add extra_info to the results because it may be
390 # contained in a playlist
391 return self.extract_info(ie_result['url'],
392 download,
393 ie_key=ie_result.get('ie_key'),
394 extra_info=extra_info)
395 elif result_type == 'playlist':
b6c45014 396 self.add_extra_info(ie_result, extra_info)
8222d8de
JMF
397 # We process each entry in the playlist
398 playlist = ie_result.get('title', None) or ie_result.get('id', None)
fe7e0c98 399 self.to_screen(u'[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
400
401 playlist_results = []
402
403 n_all_entries = len(ie_result['entries'])
404 playliststart = self.params.get('playliststart', 1) - 1
405 playlistend = self.params.get('playlistend', -1)
406
407 if playlistend == -1:
408 entries = ie_result['entries'][playliststart:]
409 else:
410 entries = ie_result['entries'][playliststart:playlistend]
411
412 n_entries = len(entries)
413
414 self.to_screen(u"[%s] playlist '%s': Collected %d video ids (downloading %d of them)" %
415 (ie_result['extractor'], playlist, n_all_entries, n_entries))
416
fe7e0c98
JMF
417 for i, entry in enumerate(entries, 1):
418 self.to_screen(u'[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 419 extra = {
fe7e0c98
JMF
420 'playlist': playlist,
421 'playlist_index': i + playliststart,
b6c45014 422 'extractor': ie_result['extractor'],
9103bbc5 423 'webpage_url': ie_result['webpage_url'],
fe7e0c98 424 }
8222d8de
JMF
425 entry_result = self.process_ie_result(entry,
426 download=download,
427 extra_info=extra)
428 playlist_results.append(entry_result)
429 ie_result['entries'] = playlist_results
430 return ie_result
431 elif result_type == 'compat_list':
432 def _fixup(r):
b6c45014 433 self.add_extra_info(r,
9103bbc5
JMF
434 {
435 'extractor': ie_result['extractor'],
436 'webpage_url': ie_result['webpage_url'],
437 })
8222d8de
JMF
438 return r
439 ie_result['entries'] = [
b6c45014 440 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
441 for r in ie_result['entries']
442 ]
443 return ie_result
444 else:
445 raise Exception('Invalid result type: %s' % result_type)
446
a9c58ad9
JMF
447 def select_format(self, format_spec, available_formats):
448 if format_spec == 'best' or format_spec is None:
449 return available_formats[-1]
450 elif format_spec == 'worst':
451 return available_formats[0]
452 else:
49e86983
JMF
453 extensions = [u'mp4', u'flv', u'webm', u'3gp']
454 if format_spec in extensions:
455 filter_f = lambda f: f['ext'] == format_spec
456 else:
457 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 458 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
459 if matches:
460 return matches[-1]
461 return None
462
dd82ffea
JMF
463 def process_video_result(self, info_dict, download=True):
464 assert info_dict.get('_type', 'video') == 'video'
465
466 if 'playlist' not in info_dict:
467 # It isn't part of a playlist
468 info_dict['playlist'] = None
469 info_dict['playlist_index'] = None
470
6ff000b8 471 # This extractors handle format selection themselves
a7685f3b 472 if info_dict['extractor'] in [u'youtube', u'Youku']:
12893efe
JMF
473 if download:
474 self.process_info(info_dict)
6ff000b8
JMF
475 return info_dict
476
dd82ffea
JMF
477 # We now pick which formats have to be downloaded
478 if info_dict.get('formats') is None:
479 # There's only one format available
480 formats = [info_dict]
481 else:
482 formats = info_dict['formats']
483
484 # We check that all the formats have the format and format_id fields
485 for (i, format) in enumerate(formats):
dd82ffea 486 if format.get('format_id') is None:
8016c922 487 format['format_id'] = compat_str(i)
8c51aa65
JMF
488 if format.get('format') is None:
489 format['format'] = u'{id} - {res}{note}'.format(
490 id=format['format_id'],
491 res=self.format_resolution(format),
71934988 492 note=u' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 493 )
c1002e96
PH
494 # Automatically determine file extension if missing
495 if 'ext' not in format:
496 format['ext'] = determine_ext(format['url'])
dd82ffea
JMF
497
498 if self.params.get('listformats', None):
499 self.list_formats(info_dict)
500 return
501
99e206d5
JMF
502 format_limit = self.params.get('format_limit', None)
503 if format_limit:
f4d96df0
PH
504 formats = list(takewhile_inclusive(
505 lambda f: f['format_id'] != format_limit, formats
506 ))
e028d0d1
JMF
507 if self.params.get('prefer_free_formats'):
508 def _free_formats_key(f):
509 try:
510 ext_ord = [u'flv', u'mp4', u'webm'].index(f['ext'])
511 except ValueError:
512 ext_ord = -1
513 # We only compare the extension if they have the same height and width
514 return (f.get('height'), f.get('width'), ext_ord)
515 formats = sorted(formats, key=_free_formats_key)
99e206d5 516
dd82ffea 517 req_format = self.params.get('format', 'best')
a9c58ad9
JMF
518 if req_format is None:
519 req_format = 'best'
dd82ffea 520 formats_to_download = []
dd82ffea 521 # The -1 is for supporting YoutubeIE
a9c58ad9 522 if req_format in ('-1', 'all'):
dd82ffea
JMF
523 formats_to_download = formats
524 else:
a9c58ad9 525 # We can accept formats requestd in the format: 34/5/best, we pick
416a5efc 526 # the first that is available, starting from left
dd82ffea
JMF
527 req_formats = req_format.split('/')
528 for rf in req_formats:
a9c58ad9
JMF
529 selected_format = self.select_format(rf, formats)
530 if selected_format is not None:
531 formats_to_download = [selected_format]
dd82ffea
JMF
532 break
533 if not formats_to_download:
78a3a9f8
PH
534 raise ExtractorError(u'requested format not available',
535 expected=True)
dd82ffea
JMF
536
537 if download:
538 if len(formats_to_download) > 1:
539 self.to_screen(u'[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
540 for format in formats_to_download:
541 new_info = dict(info_dict)
542 new_info.update(format)
543 self.process_info(new_info)
544 # We update the info dict with the best quality format (backwards compatibility)
545 info_dict.update(formats_to_download[-1])
546 return info_dict
547
8222d8de
JMF
548 def process_info(self, info_dict):
549 """Process a single resolved IE result."""
550
551 assert info_dict.get('_type', 'video') == 'video'
552 #We increment the download the download count here to match the previous behaviour.
553 self.increment_downloads()
554
555 info_dict['fulltitle'] = info_dict['title']
556 if len(info_dict['title']) > 200:
557 info_dict['title'] = info_dict['title'][:197] + u'...'
558
559 # Keep for backwards compatibility
560 info_dict['stitle'] = info_dict['title']
561
562 if not 'format' in info_dict:
563 info_dict['format'] = info_dict['ext']
564
565 reason = self._match_entry(info_dict)
566 if reason is not None:
567 self.to_screen(u'[download] ' + reason)
568 return
569
570 max_downloads = self.params.get('max_downloads')
571 if max_downloads is not None:
572 if self._num_downloads > int(max_downloads):
573 raise MaxDownloadsReached()
574
575 filename = self.prepare_filename(info_dict)
576
577 # Forced printings
578 if self.params.get('forcetitle', False):
579 compat_print(info_dict['title'])
580 if self.params.get('forceid', False):
581 compat_print(info_dict['id'])
582 if self.params.get('forceurl', False):
edde6c56
PH
583 # For RTMP URLs, also include the playpath
584 compat_print(info_dict['url'] + info_dict.get('play_path', u''))
216d71d0 585 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
8222d8de 586 compat_print(info_dict['thumbnail'])
216d71d0 587 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
8222d8de
JMF
588 compat_print(info_dict['description'])
589 if self.params.get('forcefilename', False) and filename is not None:
590 compat_print(filename)
591 if self.params.get('forceformat', False):
592 compat_print(info_dict['format'])
593
594 # Do nothing else if in simulate mode
595 if self.params.get('simulate', False):
596 return
597
598 if filename is None:
599 return
600
601 try:
602 dn = os.path.dirname(encodeFilename(filename))
603 if dn != '' and not os.path.exists(dn):
604 os.makedirs(dn)
605 except (OSError, IOError) as err:
606 self.report_error(u'unable to create directory ' + compat_str(err))
607 return
608
609 if self.params.get('writedescription', False):
610 try:
611 descfn = filename + u'.description'
612 self.report_writedescription(descfn)
613 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
614 descfile.write(info_dict['description'])
b3f0e530 615 except (KeyError, TypeError):
535f59bb 616 self.report_warning(u'There\'s no description to write.')
8222d8de
JMF
617 except (OSError, IOError):
618 self.report_error(u'Cannot write description file ' + descfn)
619 return
620
1fb07d10
JG
621 if self.params.get('writeannotations', False):
622 try:
fe7e0c98
JMF
623 annofn = filename + u'.annotations.xml'
624 self.report_writeannotations(annofn)
625 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
626 annofile.write(info_dict['annotations'])
1fb07d10
JG
627 except (KeyError, TypeError):
628 self.report_warning(u'There are no annotations to write.')
629 except (OSError, IOError):
fe7e0c98
JMF
630 self.report_error(u'Cannot write annotations file: ' + annofn)
631 return
1fb07d10 632
c4a91be7 633 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 634 self.params.get('writeautomaticsub')])
c4a91be7 635
fe7e0c98 636 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
637 # subtitles download errors are already managed as troubles in relevant IE
638 # that way it will silently go on when used with unsupporting IE
8222d8de 639 subtitles = info_dict['subtitles']
8222d8de 640 sub_format = self.params.get('subtitlesformat')
5d51a883
JMF
641 for sub_lang in subtitles.keys():
642 sub = subtitles[sub_lang]
6804038d
JMF
643 if sub is None:
644 continue
8222d8de 645 try:
d4051a8e 646 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
8222d8de
JMF
647 self.report_writesubtitles(sub_filename)
648 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5d51a883 649 subfile.write(sub)
8222d8de
JMF
650 except (OSError, IOError):
651 self.report_error(u'Cannot write subtitles file ' + descfn)
652 return
653
8222d8de
JMF
654 if self.params.get('writeinfojson', False):
655 infofn = filename + u'.info.json'
656 self.report_writeinfojson(infofn)
657 try:
fe7e0c98 658 json_info_dict = dict((k, v) for k, v in info_dict.items() if not k in ['urlhandle'])
8222d8de
JMF
659 write_json_file(json_info_dict, encodeFilename(infofn))
660 except (OSError, IOError):
661 self.report_error(u'Cannot write metadata to JSON file ' + infofn)
662 return
663
664 if self.params.get('writethumbnail', False):
d8269e1d 665 if info_dict.get('thumbnail') is not None:
cbdbb766 666 thumb_format = determine_ext(info_dict['thumbnail'], u'jpg')
8222d8de
JMF
667 thumb_filename = filename.rpartition('.')[0] + u'.' + thumb_format
668 self.to_screen(u'[%s] %s: Downloading thumbnail ...' %
669 (info_dict['extractor'], info_dict['id']))
0a60edcf
JMF
670 try:
671 uf = compat_urllib_request.urlopen(info_dict['thumbnail'])
672 with open(thumb_filename, 'wb') as thumbf:
673 shutil.copyfileobj(uf, thumbf)
674 self.to_screen(u'[%s] %s: Writing thumbnail to: %s' %
675 (info_dict['extractor'], info_dict['id'], thumb_filename))
676 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
677 self.report_warning(u'Unable to download thumbnail "%s": %s' %
678 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
679
680 if not self.params.get('skip_download', False):
681 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
682 success = True
683 else:
684 try:
685 success = self.fd._do_download(filename, info_dict)
8222d8de
JMF
686 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
687 self.report_error(u'unable to download video data: %s' % str(err))
688 return
c40c6aaa
JMF
689 except (OSError, IOError) as err:
690 raise UnavailableVideoError(err)
8222d8de
JMF
691 except (ContentTooShortError, ) as err:
692 self.report_error(u'content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
693 return
694
695 if success:
696 try:
697 self.post_process(filename, info_dict)
698 except (PostProcessingError) as err:
699 self.report_error(u'postprocessing: %s' % str(err))
700 return
701
c1c9a79c
PH
702 self.record_download_archive(info_dict)
703
8222d8de
JMF
704 def download(self, url_list):
705 """Download a given list of URLs."""
706 if len(url_list) > 1 and self.fixed_template():
707 raise SameFileError(self.params['outtmpl'])
708
709 for url in url_list:
710 try:
711 #It also downloads the videos
712 videos = self.extract_info(url)
713 except UnavailableVideoError:
714 self.report_error(u'unable to download video')
715 except MaxDownloadsReached:
716 self.to_screen(u'[info] Maximum number of downloaded files reached.')
717 raise
718
719 return self._download_retcode
720
721 def post_process(self, filename, ie_info):
722 """Run all the postprocessors on the given file."""
723 info = dict(ie_info)
724 info['filepath'] = filename
725 keep_video = None
726 for pp in self._pps:
727 try:
fe7e0c98 728 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
729 if keep_video_wish is not None:
730 if keep_video_wish:
731 keep_video = keep_video_wish
732 elif keep_video is None:
733 # No clear decision yet, let IE decide
734 keep_video = keep_video_wish
735 except PostProcessingError as e:
bbcbf4d4 736 self.report_error(e.msg)
8222d8de
JMF
737 if keep_video is False and not self.params.get('keepvideo', False):
738 try:
739 self.to_screen(u'Deleting original file %s (pass -k to keep)' % filename)
740 os.remove(encodeFilename(filename))
741 except (IOError, OSError):
742 self.report_warning(u'Unable to remove downloaded video file')
c1c9a79c
PH
743
744 def in_download_archive(self, info_dict):
745 fn = self.params.get('download_archive')
746 if fn is None:
747 return False
748 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
749 try:
750 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
751 for line in archive_file:
752 if line.strip() == vid_id:
753 return True
754 except IOError as ioe:
755 if ioe.errno != errno.ENOENT:
756 raise
757 return False
758
759 def record_download_archive(self, info_dict):
760 fn = self.params.get('download_archive')
761 if fn is None:
762 return
763 vid_id = info_dict['extractor'] + u' ' + info_dict['id']
764 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
765 archive_file.write(vid_id + u'\n')
dd82ffea 766
8c51aa65 767 @staticmethod
8abeeb94 768 def format_resolution(format, default='unknown'):
57dd9a8f
PH
769 if format.get('_resolution') is not None:
770 return format['_resolution']
8c51aa65
JMF
771 if format.get('height') is not None:
772 if format.get('width') is not None:
773 res = u'%sx%s' % (format['width'], format['height'])
774 else:
775 res = u'%sp' % format['height']
776 else:
8abeeb94 777 res = default
8c51aa65
JMF
778 return res
779
dd82ffea 780 def list_formats(self, info_dict):
57dd9a8f
PH
781 def line(format):
782 return (u'%-15s%-10s%-12s%s' % (
8c51aa65
JMF
783 format['format_id'],
784 format['ext'],
8c51aa65 785 self.format_resolution(format),
57dd9a8f 786 format.get('format_note', ''),
8c51aa65
JMF
787 )
788 )
57dd9a8f 789
94badb25
PH
790 formats = info_dict.get('formats', [info_dict])
791 formats_s = list(map(line, formats))
792 if len(formats) > 1:
793 formats_s[0] += (' ' if formats[0].get('format_note') else '') + '(worst)'
794 formats_s[-1] += (' ' if formats[-1].get('format_note') else '') + '(best)'
57dd9a8f
PH
795
796 header_line = line({
797 'format_id': u'format code', 'ext': u'extension',
798 '_resolution': u'resolution', 'format_note': u'note'})
799 self.to_screen(u'[info] Available formats for %s:\n%s\n%s' %
800 (info_dict['id'], header_line, u"\n".join(formats_s)))