]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[YoutubeDL] Include rtmpdump in exe versions -v output
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
62fec3b2 11import locale
8222d8de 12import os
dca08720 13import platform
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de
JMF
17import socket
18import sys
19import time
20import traceback
21
1e5b9a95
PH
22if os.name == 'nt':
23 import ctypes
24
ce02ed60 25from .utils import (
dca08720 26 compat_cookiejar,
4644ac55 27 compat_expanduser,
ce02ed60 28 compat_http_client,
ce02ed60
PH
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
d05cfe06 32 escape_url,
ce02ed60
PH
33 ContentTooShortError,
34 date_from_str,
35 DateRange,
acd69589 36 DEFAULT_OUTTMPL,
ce02ed60
PH
37 determine_ext,
38 DownloadError,
39 encodeFilename,
40 ExtractorError,
02dbf93f 41 format_bytes,
525ef922 42 formatSeconds,
1c088fa8 43 get_term_width,
ce02ed60 44 locked_file,
dca08720 45 make_HTTPS_handler,
ce02ed60 46 MaxDownloadsReached,
b7ab0590 47 PagedList,
ce02ed60 48 PostProcessingError,
dca08720 49 platform_name,
ce02ed60
PH
50 preferredencoding,
51 SameFileError,
52 sanitize_filename,
53 subtitles_filename,
54 takewhile_inclusive,
55 UnavailableVideoError,
29eb5174 56 url_basename,
ce02ed60
PH
57 write_json_file,
58 write_string,
dca08720 59 YoutubeDLHandler,
6350728b 60 prepend_extension,
ce02ed60 61)
a0e07d31 62from .cache import Cache
023fa8c4 63from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 64from .downloader import get_suitable_downloader
4c83c967 65from .downloader.rtmp import rtmpdump_version
d28b5171 66from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
dca08720 67from .version import __version__
8222d8de
JMF
68
69
70class YoutubeDL(object):
71 """YoutubeDL class.
72
73 YoutubeDL objects are the ones responsible of downloading the
74 actual video file and writing it to disk if the user has requested
75 it, among some other tasks. In most cases there should be one per
76 program. As, given a video URL, the downloader doesn't know how to
77 extract all the needed information, task that InfoExtractors do, it
78 has to pass the URL to one of them.
79
80 For this, YoutubeDL objects have a method that allows
81 InfoExtractors to be registered in a given order. When it is passed
82 a URL, the YoutubeDL object handles it to the first InfoExtractor it
83 finds that reports being able to handle it. The InfoExtractor extracts
84 all the information about the video or videos the URL refers to, and
85 YoutubeDL process the extracted information, possibly using a File
86 Downloader to download the video.
87
88 YoutubeDL objects accept a lot of parameters. In order not to saturate
89 the object constructor with arguments, it receives a dictionary of
90 options instead. These options are available through the params
91 attribute for the InfoExtractors to use. The YoutubeDL also
92 registers itself as the downloader in charge for the InfoExtractors
93 that are added to it, so this is a "mutual registration".
94
95 Available options:
96
97 username: Username for authentication purposes.
98 password: Password for authentication purposes.
c6c19746 99 videopassword: Password for acces a video.
8222d8de
JMF
100 usenetrc: Use netrc for authentication instead.
101 verbose: Print additional info to stdout.
102 quiet: Do not print messages to stdout.
ad8915b7 103 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
104 forceurl: Force printing final URL.
105 forcetitle: Force printing title.
106 forceid: Force printing ID.
107 forcethumbnail: Force printing thumbnail URL.
108 forcedescription: Force printing description.
109 forcefilename: Force printing final filename.
525ef922 110 forceduration: Force printing duration.
8694c600 111 forcejson: Force printing info_dict as JSON.
63e0be34
PH
112 dump_single_json: Force printing the info_dict of the whole playlist
113 (or video) as a single JSON line.
8222d8de
JMF
114 simulate: Do not download the video files.
115 format: Video format code.
116 format_limit: Highest quality format to try.
117 outtmpl: Template for output names.
118 restrictfilenames: Do not allow "&" and spaces in file names
119 ignoreerrors: Do not stop on download errors.
120 nooverwrites: Prevent overwriting files.
121 playliststart: Playlist item to start at.
122 playlistend: Playlist item to end at.
123 matchtitle: Download only matching titles.
124 rejecttitle: Reject downloads for matching titles.
8bf9319e 125 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
126 logtostderr: Log messages to stderr instead of stdout.
127 writedescription: Write the video description to a .description file
128 writeinfojson: Write the video description to a .info.json file
1fb07d10 129 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
130 writethumbnail: Write the thumbnail image to a file
131 writesubtitles: Write the video subtitles to a file
b004821f 132 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 133 allsubtitles: Downloads all the subtitles of the video
0b7f3118 134 (requires writesubtitles or writeautomaticsub)
8222d8de 135 listsubtitles: Lists all available subtitles for the video
b98a6b2f 136 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 137 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
138 keepvideo: Keep the video file after post-processing
139 daterange: A DateRange object, download only if the upload_date is in the range.
140 skip_download: Skip the actual download of the video file
c35f9e72 141 cachedir: Location of the cache files in the filesystem.
a0e07d31 142 False to disable filesystem cache.
47192f92 143 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
144 age_limit: An integer representing the user's age in years.
145 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
146 min_views: An integer representing the minimum view count the video
147 must have in order to not be skipped.
148 Videos without view count information are always
149 downloaded. None for no limit.
150 max_views: An integer representing the maximum view count.
151 Videos that are more popular than that are not
152 downloaded.
153 Videos without view count information are always
154 downloaded. None for no limit.
155 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
156 Videos already present in the file are not downloaded
157 again.
dca08720 158 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 159 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
160 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
161 At the moment, this is only supported by YouTube.
a1ee09e8 162 proxy: URL of the proxy server to use
e344693b 163 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
164 bidi_workaround: Work around buggy terminals without bidirectional text
165 support, using fridibi
a0ddb8a2 166 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 167 include_ads: Download ads as well
04b4d394
PH
168 default_search: Prepend this string if an input url is not valid.
169 'auto' for elaborate guessing
62fec3b2 170 encoding: Use this encoding instead of the system-specified.
e8ee972c 171 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
172 Pass in 'in_playlist' to only show this behavior for
173 playlist items.
fe7e0c98 174
8222d8de
JMF
175 The following parameters are not used by YoutubeDL itself, they are used by
176 the FileDownloader:
177 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
178 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
179
180 The following options are used by the post processors:
181 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
182 otherwise prefer avconv.
8d31fa3c 183 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
184 """
185
186 params = None
187 _ies = []
188 _pps = []
189 _download_retcode = None
190 _num_downloads = None
191 _screen_file = None
192
3511266b 193 def __init__(self, params=None, auto_init=True):
8222d8de 194 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
195 if params is None:
196 params = {}
8222d8de 197 self._ies = []
56c73665 198 self._ies_instances = {}
8222d8de 199 self._pps = []
933605d7 200 self._progress_hooks = []
8222d8de
JMF
201 self._download_retcode = 0
202 self._num_downloads = 0
203 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 204 self._err_file = sys.stderr
e9f9a10f 205 self.params = params
a0e07d31 206 self.cache = Cache(self)
34308b30 207
0783b09b 208 if params.get('bidi_workaround', False):
1c088fa8
PH
209 try:
210 import pty
211 master, slave = pty.openpty()
212 width = get_term_width()
213 if width is None:
214 width_args = []
215 else:
216 width_args = ['-w', str(width)]
5d681e96 217 sp_kwargs = dict(
1c088fa8
PH
218 stdin=subprocess.PIPE,
219 stdout=slave,
220 stderr=self._err_file)
5d681e96
PH
221 try:
222 self._output_process = subprocess.Popen(
223 ['bidiv'] + width_args, **sp_kwargs
224 )
225 except OSError:
5d681e96
PH
226 self._output_process = subprocess.Popen(
227 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
228 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
229 except OSError as ose:
230 if ose.errno == 2:
6febd1c1 231 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
232 else:
233 raise
0783b09b 234
34308b30
PH
235 if (sys.version_info >= (3,) and sys.platform != 'win32' and
236 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 237 and not params.get('restrictfilenames', False)):
34308b30
PH
238 # On Python 3, the Unicode filesystem API will throw errors (#1474)
239 self.report_warning(
6febd1c1 240 'Assuming --restrict-filenames since file system encoding '
1b725173 241 'cannot encode all characters. '
6febd1c1 242 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 243 self.params['restrictfilenames'] = True
34308b30 244
a3927cf7 245 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 246 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 247
dca08720
PH
248 self._setup_opener()
249
3511266b
PH
250 if auto_init:
251 self.print_debug_header()
252 self.add_default_info_extractors()
253
8222d8de
JMF
254 def add_info_extractor(self, ie):
255 """Add an InfoExtractor object to the end of the list."""
256 self._ies.append(ie)
56c73665 257 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
258 ie.set_downloader(self)
259
56c73665
JMF
260 def get_info_extractor(self, ie_key):
261 """
262 Get an instance of an IE with name ie_key, it will try to get one from
263 the _ies list, if there's no instance it will create a new one and add
264 it to the extractor list.
265 """
266 ie = self._ies_instances.get(ie_key)
267 if ie is None:
268 ie = get_info_extractor(ie_key)()
269 self.add_info_extractor(ie)
270 return ie
271
023fa8c4
JMF
272 def add_default_info_extractors(self):
273 """
274 Add the InfoExtractors returned by gen_extractors to the end of the list
275 """
276 for ie in gen_extractors():
277 self.add_info_extractor(ie)
278
8222d8de
JMF
279 def add_post_processor(self, pp):
280 """Add a PostProcessor object to the end of the chain."""
281 self._pps.append(pp)
282 pp.set_downloader(self)
283
933605d7
JMF
284 def add_progress_hook(self, ph):
285 """Add the progress hook (currently only for the file downloader)"""
286 self._progress_hooks.append(ph)
8ab470f1 287
1c088fa8 288 def _bidi_workaround(self, message):
5d681e96 289 if not hasattr(self, '_output_channel'):
1c088fa8
PH
290 return message
291
5d681e96 292 assert hasattr(self, '_output_process')
11b85ce6 293 assert isinstance(message, compat_str)
6febd1c1
PH
294 line_count = message.count('\n') + 1
295 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 296 self._output_process.stdin.flush()
6febd1c1 297 res = ''.join(self._output_channel.readline().decode('utf-8')
1c088fa8 298 for _ in range(line_count))
6febd1c1 299 return res[:-len('\n')]
1c088fa8 300
8222d8de 301 def to_screen(self, message, skip_eol=False):
0783b09b
PH
302 """Print message to stdout if not in quiet mode."""
303 return self.to_stdout(message, skip_eol, check_quiet=True)
304
734f90bb 305 def _write_string(self, s, out=None):
b58ddb32 306 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 307
0783b09b 308 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 309 """Print message to stdout if not in quiet mode."""
8bf9319e 310 if self.params.get('logger'):
43afe285 311 self.params['logger'].debug(message)
0783b09b 312 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 313 message = self._bidi_workaround(message)
6febd1c1 314 terminator = ['\n', ''][skip_eol]
8222d8de 315 output = message + terminator
1c088fa8 316
734f90bb 317 self._write_string(output, self._screen_file)
8222d8de
JMF
318
319 def to_stderr(self, message):
320 """Print message to stderr."""
11b85ce6 321 assert isinstance(message, compat_str)
8bf9319e 322 if self.params.get('logger'):
43afe285
IB
323 self.params['logger'].error(message)
324 else:
1c088fa8 325 message = self._bidi_workaround(message)
6febd1c1 326 output = message + '\n'
734f90bb 327 self._write_string(output, self._err_file)
8222d8de 328
1e5b9a95
PH
329 def to_console_title(self, message):
330 if not self.params.get('consoletitle', False):
331 return
332 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
333 # c_wchar_p() might not be necessary if `message` is
334 # already of type unicode()
335 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
336 elif 'TERM' in os.environ:
734f90bb 337 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 338
bdde425c
PH
339 def save_console_title(self):
340 if not self.params.get('consoletitle', False):
341 return
342 if 'TERM' in os.environ:
efd6c574 343 # Save the title on stack
734f90bb 344 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
345
346 def restore_console_title(self):
347 if not self.params.get('consoletitle', False):
348 return
349 if 'TERM' in os.environ:
efd6c574 350 # Restore the title from stack
734f90bb 351 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
352
353 def __enter__(self):
354 self.save_console_title()
355 return self
356
357 def __exit__(self, *args):
358 self.restore_console_title()
f89197d7 359
dca08720
PH
360 if self.params.get('cookiefile') is not None:
361 self.cookiejar.save()
bdde425c 362
8222d8de
JMF
363 def trouble(self, message=None, tb=None):
364 """Determine action to take when a download problem appears.
365
366 Depending on if the downloader has been configured to ignore
367 download errors or not, this method may throw an exception or
368 not when errors are found, after printing the message.
369
370 tb, if given, is additional traceback information.
371 """
372 if message is not None:
373 self.to_stderr(message)
374 if self.params.get('verbose'):
375 if tb is None:
376 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 377 tb = ''
8222d8de 378 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 379 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
380 tb += compat_str(traceback.format_exc())
381 else:
382 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 383 tb = ''.join(tb_data)
8222d8de
JMF
384 self.to_stderr(tb)
385 if not self.params.get('ignoreerrors', False):
386 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
387 exc_info = sys.exc_info()[1].exc_info
388 else:
389 exc_info = sys.exc_info()
390 raise DownloadError(message, exc_info)
391 self._download_retcode = 1
392
393 def report_warning(self, message):
394 '''
395 Print the message to stderr, it will be prefixed with 'WARNING:'
396 If stderr is a tty file the 'WARNING:' will be colored
397 '''
6d07ce01
JMF
398 if self.params.get('logger') is not None:
399 self.params['logger'].warning(message)
8222d8de 400 else:
ad8915b7
PH
401 if self.params.get('no_warnings'):
402 return
6d07ce01
JMF
403 if self._err_file.isatty() and os.name != 'nt':
404 _msg_header = '\033[0;33mWARNING:\033[0m'
405 else:
406 _msg_header = 'WARNING:'
407 warning_message = '%s %s' % (_msg_header, message)
408 self.to_stderr(warning_message)
8222d8de
JMF
409
410 def report_error(self, message, tb=None):
411 '''
412 Do the same as trouble, but prefixes the message with 'ERROR:', colored
413 in red if stderr is a tty file.
414 '''
0783b09b 415 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 416 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 417 else:
6febd1c1
PH
418 _msg_header = 'ERROR:'
419 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
420 self.trouble(error_message, tb)
421
8222d8de
JMF
422 def report_file_already_downloaded(self, file_name):
423 """Report file has already been fully downloaded."""
424 try:
6febd1c1 425 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 426 except UnicodeEncodeError:
6febd1c1 427 self.to_screen('[download] The file has already been downloaded')
8222d8de 428
8222d8de
JMF
429 def prepare_filename(self, info_dict):
430 """Generate the output filename."""
431 try:
432 template_dict = dict(info_dict)
433
434 template_dict['epoch'] = int(time.time())
435 autonumber_size = self.params.get('autonumber_size')
436 if autonumber_size is None:
437 autonumber_size = 5
6febd1c1 438 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 439 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 440 if template_dict.get('playlist_index') is not None:
c6b4132a 441 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
442 if template_dict.get('resolution') is None:
443 if template_dict.get('width') and template_dict.get('height'):
444 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
445 elif template_dict.get('height'):
805ef3c6 446 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 447 elif template_dict.get('width'):
805ef3c6 448 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 449
586a91b6 450 sanitize = lambda k, v: sanitize_filename(
45598aab 451 compat_str(v),
8222d8de 452 restricted=self.params.get('restrictfilenames'),
6febd1c1 453 is_id=(k == 'id'))
586a91b6 454 template_dict = dict((k, sanitize(k, v))
45598aab
PH
455 for k, v in template_dict.items()
456 if v is not None)
6febd1c1 457 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 458
acd69589 459 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 460 tmpl = compat_expanduser(outtmpl)
586a91b6 461 filename = tmpl % template_dict
8222d8de 462 return filename
8222d8de 463 except ValueError as err:
6febd1c1 464 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
465 return None
466
467 def _match_entry(self, info_dict):
468 """ Returns None iff the file should be downloaded """
469
6febd1c1 470 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
471 if 'title' in info_dict:
472 # This can happen when we're just evaluating the playlist
473 title = info_dict['title']
474 matchtitle = self.params.get('matchtitle', False)
475 if matchtitle:
476 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 477 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
478 rejecttitle = self.params.get('rejecttitle', False)
479 if rejecttitle:
480 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 481 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
482 date = info_dict.get('upload_date', None)
483 if date is not None:
484 dateRange = self.params.get('daterange', DateRange())
485 if date not in dateRange:
6febd1c1 486 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
487 view_count = info_dict.get('view_count', None)
488 if view_count is not None:
489 min_views = self.params.get('min_views')
490 if min_views is not None and view_count < min_views:
6febd1c1 491 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
492 max_views = self.params.get('max_views')
493 if max_views is not None and view_count > max_views:
6febd1c1 494 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
495 age_limit = self.params.get('age_limit')
496 if age_limit is not None:
be843678
PH
497 actual_age_limit = info_dict.get('age_limit')
498 if actual_age_limit is None:
499 actual_age_limit = 0
500 if age_limit < actual_age_limit:
6febd1c1 501 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 502 if self.in_download_archive(info_dict):
6febd1c1 503 return '%s has already been recorded in archive' % video_title
8222d8de 504 return None
fe7e0c98 505
b6c45014
JMF
506 @staticmethod
507 def add_extra_info(info_dict, extra_info):
508 '''Set the keys from extra_info in info dict if they are missing'''
509 for key, value in extra_info.items():
510 info_dict.setdefault(key, value)
511
7fc3fa05
PH
512 def extract_info(self, url, download=True, ie_key=None, extra_info={},
513 process=True):
8222d8de
JMF
514 '''
515 Returns a list with a dictionary for each video we find.
516 If 'download', also downloads the videos.
517 extra_info is a dict containing the extra values to add to each result
518 '''
fe7e0c98 519
8222d8de 520 if ie_key:
56c73665 521 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
522 else:
523 ies = self._ies
524
525 for ie in ies:
526 if not ie.suitable(url):
527 continue
528
529 if not ie.working():
6febd1c1
PH
530 self.report_warning('The program functionality for this site has been marked as broken, '
531 'and will probably not work.')
8222d8de
JMF
532
533 try:
534 ie_result = ie.extract(url)
535 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
536 break
537 if isinstance(ie_result, list):
538 # Backwards compatibility: old IE result format
8222d8de
JMF
539 ie_result = {
540 '_type': 'compat_list',
541 'entries': ie_result,
542 }
ea38e55f 543 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
544 if process:
545 return self.process_ie_result(ie_result, download, extra_info)
546 else:
547 return ie_result
8222d8de
JMF
548 except ExtractorError as de: # An error we somewhat expected
549 self.report_error(compat_str(de), de.format_traceback())
550 break
d3e5bbf4
PH
551 except MaxDownloadsReached:
552 raise
8222d8de
JMF
553 except Exception as e:
554 if self.params.get('ignoreerrors', False):
555 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
556 break
557 else:
558 raise
559 else:
1a489545 560 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 561
ea38e55f
PH
562 def add_default_extra_info(self, ie_result, ie, url):
563 self.add_extra_info(ie_result, {
564 'extractor': ie.IE_NAME,
565 'webpage_url': url,
566 'webpage_url_basename': url_basename(url),
567 'extractor_key': ie.ie_key(),
568 })
569
8222d8de
JMF
570 def process_ie_result(self, ie_result, download=True, extra_info={}):
571 """
572 Take the result of the ie(may be modified) and resolve all unresolved
573 references (URLs, playlist items).
574
575 It will also download the videos if 'download'.
576 Returns the resolved ie_result.
577 """
578
e8ee972c
PH
579 result_type = ie_result.get('_type', 'video')
580
057a5206
PH
581 if result_type in ('url', 'url_transparent'):
582 extract_flat = self.params.get('extract_flat', False)
583 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
584 extract_flat is True):
057a5206
PH
585 if self.params.get('forcejson', False):
586 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
587 return ie_result
588
8222d8de 589 if result_type == 'video':
b6c45014 590 self.add_extra_info(ie_result, extra_info)
feee2ecf 591 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
592 elif result_type == 'url':
593 # We have to add extra_info to the results because it may be
594 # contained in a playlist
595 return self.extract_info(ie_result['url'],
596 download,
597 ie_key=ie_result.get('ie_key'),
598 extra_info=extra_info)
7fc3fa05
PH
599 elif result_type == 'url_transparent':
600 # Use the information from the embedding page
601 info = self.extract_info(
602 ie_result['url'], ie_key=ie_result.get('ie_key'),
603 extra_info=extra_info, download=False, process=False)
604
605 def make_result(embedded_info):
606 new_result = ie_result.copy()
607 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 608 'entries', 'ie_key', 'duration',
ef4fd848
PH
609 'subtitles', 'annotations', 'format',
610 'thumbnail', 'thumbnails'):
7fc3fa05
PH
611 if f in new_result:
612 del new_result[f]
613 if f in embedded_info:
614 new_result[f] = embedded_info[f]
615 return new_result
616 new_result = make_result(info)
617
618 assert new_result.get('_type') != 'url_transparent'
619 if new_result.get('_type') == 'compat_list':
620 new_result['entries'] = [
621 make_result(e) for e in new_result['entries']]
622
623 return self.process_ie_result(
624 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
625 elif result_type == 'playlist':
626 # We process each entry in the playlist
627 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 628 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
629
630 playlist_results = []
631
8222d8de 632 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
633 playlistend = self.params.get('playlistend', None)
634 # For backwards compatibility, interpret -1 as whole list
8222d8de 635 if playlistend == -1:
a19fd00c 636 playlistend = None
8222d8de 637
b7ab0590
PH
638 if isinstance(ie_result['entries'], list):
639 n_all_entries = len(ie_result['entries'])
640 entries = ie_result['entries'][playliststart:playlistend]
641 n_entries = len(entries)
642 self.to_screen(
643 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
644 (ie_result['extractor'], playlist, n_all_entries, n_entries))
645 else:
646 assert isinstance(ie_result['entries'], PagedList)
647 entries = ie_result['entries'].getslice(
648 playliststart, playlistend)
649 n_entries = len(entries)
650 self.to_screen(
651 "[%s] playlist %s: Downloading %d videos" %
652 (ie_result['extractor'], playlist, n_entries))
8222d8de 653
fe7e0c98 654 for i, entry in enumerate(entries, 1):
6febd1c1 655 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 656 extra = {
c6b4132a 657 'n_entries': n_entries,
fe7e0c98
JMF
658 'playlist': playlist,
659 'playlist_index': i + playliststart,
b6c45014 660 'extractor': ie_result['extractor'],
9103bbc5 661 'webpage_url': ie_result['webpage_url'],
29eb5174 662 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 663 'extractor_key': ie_result['extractor_key'],
fe7e0c98 664 }
7012b23c
PH
665
666 reason = self._match_entry(entry)
667 if reason is not None:
6febd1c1 668 self.to_screen('[download] ' + reason)
7012b23c
PH
669 continue
670
8222d8de
JMF
671 entry_result = self.process_ie_result(entry,
672 download=download,
673 extra_info=extra)
674 playlist_results.append(entry_result)
675 ie_result['entries'] = playlist_results
676 return ie_result
677 elif result_type == 'compat_list':
678 def _fixup(r):
b6c45014 679 self.add_extra_info(r,
9103bbc5
JMF
680 {
681 'extractor': ie_result['extractor'],
682 'webpage_url': ie_result['webpage_url'],
29eb5174 683 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 684 'extractor_key': ie_result['extractor_key'],
9103bbc5 685 })
8222d8de
JMF
686 return r
687 ie_result['entries'] = [
b6c45014 688 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
689 for r in ie_result['entries']
690 ]
691 return ie_result
692 else:
693 raise Exception('Invalid result type: %s' % result_type)
694
a9c58ad9
JMF
695 def select_format(self, format_spec, available_formats):
696 if format_spec == 'best' or format_spec is None:
697 return available_formats[-1]
698 elif format_spec == 'worst':
699 return available_formats[0]
ba7678f9
PH
700 elif format_spec == 'bestaudio':
701 audio_formats = [
702 f for f in available_formats
703 if f.get('vcodec') == 'none']
704 if audio_formats:
705 return audio_formats[-1]
706 elif format_spec == 'worstaudio':
707 audio_formats = [
708 f for f in available_formats
709 if f.get('vcodec') == 'none']
710 if audio_formats:
711 return audio_formats[0]
bc6d5978
JMF
712 elif format_spec == 'bestvideo':
713 video_formats = [
714 f for f in available_formats
715 if f.get('acodec') == 'none']
716 if video_formats:
717 return video_formats[-1]
718 elif format_spec == 'worstvideo':
719 video_formats = [
720 f for f in available_formats
721 if f.get('acodec') == 'none']
722 if video_formats:
723 return video_formats[0]
a9c58ad9 724 else:
e2e5dae6 725 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
726 if format_spec in extensions:
727 filter_f = lambda f: f['ext'] == format_spec
728 else:
729 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 730 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
731 if matches:
732 return matches[-1]
733 return None
734
dd82ffea
JMF
735 def process_video_result(self, info_dict, download=True):
736 assert info_dict.get('_type', 'video') == 'video'
737
bec1fad2
PH
738 if 'id' not in info_dict:
739 raise ExtractorError('Missing "id" field in extractor result')
740 if 'title' not in info_dict:
741 raise ExtractorError('Missing "title" field in extractor result')
742
dd82ffea
JMF
743 if 'playlist' not in info_dict:
744 # It isn't part of a playlist
745 info_dict['playlist'] = None
746 info_dict['playlist_index'] = None
747
d5519808
PH
748 thumbnails = info_dict.get('thumbnails')
749 if thumbnails:
be6d7229
PH
750 thumbnails.sort(key=lambda t: (
751 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
752 for t in thumbnails:
753 if 'width' in t and 'height' in t:
754 t['resolution'] = '%dx%d' % (t['width'], t['height'])
755
756 if thumbnails and 'thumbnail' not in info_dict:
757 info_dict['thumbnail'] = thumbnails[-1]['url']
758
c9ae7b95 759 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
760 info_dict['display_id'] = info_dict['id']
761
955c4514 762 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
9d2ecdbc 763 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 764 info_dict['timestamp'])
9d2ecdbc
PH
765 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
766
6ff000b8 767 # This extractors handle format selection themselves
6febd1c1 768 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
769 if download:
770 self.process_info(info_dict)
6ff000b8
JMF
771 return info_dict
772
dd82ffea
JMF
773 # We now pick which formats have to be downloaded
774 if info_dict.get('formats') is None:
775 # There's only one format available
776 formats = [info_dict]
777 else:
778 formats = info_dict['formats']
779
db95dc13
PH
780 if not formats:
781 raise ExtractorError('No video formats found!')
782
dd82ffea 783 # We check that all the formats have the format and format_id fields
db95dc13 784 for i, format in enumerate(formats):
bec1fad2
PH
785 if 'url' not in format:
786 raise ExtractorError('Missing "url" key in result (index %d)' % i)
787
dd82ffea 788 if format.get('format_id') is None:
8016c922 789 format['format_id'] = compat_str(i)
8c51aa65 790 if format.get('format') is None:
6febd1c1 791 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
792 id=format['format_id'],
793 res=self.format_resolution(format),
6febd1c1 794 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 795 )
c1002e96
PH
796 # Automatically determine file extension if missing
797 if 'ext' not in format:
cce929ea 798 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 799
99e206d5
JMF
800 format_limit = self.params.get('format_limit', None)
801 if format_limit:
f4d96df0
PH
802 formats = list(takewhile_inclusive(
803 lambda f: f['format_id'] != format_limit, formats
804 ))
4bcc7bd1
PH
805
806 # TODO Central sorting goes here
99e206d5 807
f89197d7 808 if formats[0] is not info_dict:
b3d9ef88
JMF
809 # only set the 'formats' fields if the original info_dict list them
810 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 811 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
812 # wich can't be exported to json
813 info_dict['formats'] = formats
bfaae0a7 814 if self.params.get('listformats', None):
815 self.list_formats(info_dict)
816 return
817
de3ef3ed 818 req_format = self.params.get('format')
a9c58ad9
JMF
819 if req_format is None:
820 req_format = 'best'
dd82ffea 821 formats_to_download = []
dd82ffea 822 # The -1 is for supporting YoutubeIE
a9c58ad9 823 if req_format in ('-1', 'all'):
dd82ffea
JMF
824 formats_to_download = formats
825 else:
1de33faf
PH
826 for rfstr in req_format.split(','):
827 # We can accept formats requested in the format: 34/5/best, we pick
828 # the first that is available, starting from left
829 req_formats = rfstr.split('/')
830 for rf in req_formats:
831 if re.match(r'.+?\+.+?', rf) is not None:
832 # Two formats have been requested like '137+139'
833 format_1, format_2 = rf.split('+')
834 formats_info = (self.select_format(format_1, formats),
835 self.select_format(format_2, formats))
836 if all(formats_info):
837 selected_format = {
838 'requested_formats': formats_info,
839 'format': rf,
840 'ext': formats_info[0]['ext'],
841 }
842 else:
843 selected_format = None
6350728b 844 else:
1de33faf
PH
845 selected_format = self.select_format(rf, formats)
846 if selected_format is not None:
847 formats_to_download.append(selected_format)
848 break
dd82ffea 849 if not formats_to_download:
6febd1c1 850 raise ExtractorError('requested format not available',
78a3a9f8 851 expected=True)
dd82ffea
JMF
852
853 if download:
854 if len(formats_to_download) > 1:
6febd1c1 855 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
856 for format in formats_to_download:
857 new_info = dict(info_dict)
858 new_info.update(format)
859 self.process_info(new_info)
860 # We update the info dict with the best quality format (backwards compatibility)
861 info_dict.update(formats_to_download[-1])
862 return info_dict
863
8222d8de
JMF
864 def process_info(self, info_dict):
865 """Process a single resolved IE result."""
866
867 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
868
869 max_downloads = self.params.get('max_downloads')
870 if max_downloads is not None:
871 if self._num_downloads >= int(max_downloads):
872 raise MaxDownloadsReached()
8222d8de
JMF
873
874 info_dict['fulltitle'] = info_dict['title']
875 if len(info_dict['title']) > 200:
6febd1c1 876 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
877
878 # Keep for backwards compatibility
879 info_dict['stitle'] = info_dict['title']
880
11b85ce6 881 if 'format' not in info_dict:
8222d8de
JMF
882 info_dict['format'] = info_dict['ext']
883
884 reason = self._match_entry(info_dict)
885 if reason is not None:
6febd1c1 886 self.to_screen('[download] ' + reason)
8222d8de
JMF
887 return
888
fd288278 889 self._num_downloads += 1
8222d8de
JMF
890
891 filename = self.prepare_filename(info_dict)
892
893 # Forced printings
894 if self.params.get('forcetitle', False):
0783b09b 895 self.to_stdout(info_dict['fulltitle'])
8222d8de 896 if self.params.get('forceid', False):
0783b09b 897 self.to_stdout(info_dict['id'])
8222d8de 898 if self.params.get('forceurl', False):
edde6c56 899 # For RTMP URLs, also include the playpath
6febd1c1 900 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 901 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 902 self.to_stdout(info_dict['thumbnail'])
216d71d0 903 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 904 self.to_stdout(info_dict['description'])
8222d8de 905 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 906 self.to_stdout(filename)
525ef922
PH
907 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
908 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 909 if self.params.get('forceformat', False):
0783b09b 910 self.to_stdout(info_dict['format'])
9d153818 911 if self.params.get('forcejson', False):
a0d96c98 912 info_dict['_filename'] = filename
0783b09b 913 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
914 if self.params.get('dump_single_json', False):
915 info_dict['_filename'] = filename
8222d8de
JMF
916
917 # Do nothing else if in simulate mode
918 if self.params.get('simulate', False):
919 return
920
921 if filename is None:
922 return
923
924 try:
925 dn = os.path.dirname(encodeFilename(filename))
d26e981d 926 if dn and not os.path.exists(dn):
8222d8de
JMF
927 os.makedirs(dn)
928 except (OSError, IOError) as err:
6febd1c1 929 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
930 return
931
932 if self.params.get('writedescription', False):
6febd1c1 933 descfn = filename + '.description'
7b6fefc9 934 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 935 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
936 else:
937 try:
6febd1c1 938 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
939 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
940 descfile.write(info_dict['description'])
941 except (KeyError, TypeError):
6febd1c1 942 self.report_warning('There\'s no description to write.')
7b6fefc9 943 except (OSError, IOError):
6febd1c1 944 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 945 return
8222d8de 946
1fb07d10 947 if self.params.get('writeannotations', False):
6febd1c1 948 annofn = filename + '.annotations.xml'
7b6fefc9 949 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 950 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
951 else:
952 try:
6febd1c1 953 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
954 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
955 annofile.write(info_dict['annotations'])
956 except (KeyError, TypeError):
6febd1c1 957 self.report_warning('There are no annotations to write.')
7b6fefc9 958 except (OSError, IOError):
6febd1c1 959 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 960 return
1fb07d10 961
c4a91be7 962 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 963 self.params.get('writeautomaticsub')])
c4a91be7 964
fe7e0c98 965 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
966 # subtitles download errors are already managed as troubles in relevant IE
967 # that way it will silently go on when used with unsupporting IE
8222d8de 968 subtitles = info_dict['subtitles']
ca715127 969 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
970 for sub_lang in subtitles.keys():
971 sub = subtitles[sub_lang]
6804038d
JMF
972 if sub is None:
973 continue
8222d8de 974 try:
d4051a8e 975 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 976 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 977 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 978 else:
6febd1c1 979 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9
PH
980 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
981 subfile.write(sub)
8222d8de 982 except (OSError, IOError):
e4db1951 983 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
984 return
985
8222d8de 986 if self.params.get('writeinfojson', False):
6febd1c1 987 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 988 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 989 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 990 else:
6febd1c1 991 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 992 try:
1538eff6 993 write_json_file(info_dict, encodeFilename(infofn))
7b6fefc9 994 except (OSError, IOError):
6febd1c1 995 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 996 return
8222d8de
JMF
997
998 if self.params.get('writethumbnail', False):
d8269e1d 999 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1000 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1001 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1002 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1003 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1004 (info_dict['extractor'], info_dict['id']))
1005 else:
6febd1c1 1006 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1007 (info_dict['extractor'], info_dict['id']))
1008 try:
e9c092f1 1009 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1010 with open(thumb_filename, 'wb') as thumbf:
1011 shutil.copyfileobj(uf, thumbf)
6febd1c1 1012 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
7b6fefc9
PH
1013 (info_dict['extractor'], info_dict['id'], thumb_filename))
1014 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1015 self.report_warning('Unable to download thumbnail "%s": %s' %
7b6fefc9 1016 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1017
1018 if not self.params.get('skip_download', False):
1019 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1020 success = True
1021 else:
1022 try:
6350728b
JMF
1023 def dl(name, info):
1024 fd = get_suitable_downloader(info)(self, self.params)
1025 for ph in self._progress_hooks:
1026 fd.add_progress_hook(ph)
8d5797b0
PH
1027 if self.params.get('verbose'):
1028 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1029 return fd.download(name, info)
1030 if info_dict.get('requested_formats') is not None:
1031 downloaded = []
1032 success = True
b7f81164 1033 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1034 if not merger._executable:
58c3c7ae
JMF
1035 postprocessors = []
1036 self.report_warning('You have requested multiple '
1037 'formats but ffmpeg or avconv are not installed.'
1038 ' The formats won\'t be merged')
1039 else:
1040 postprocessors = [merger]
6350728b
JMF
1041 for f in info_dict['requested_formats']:
1042 new_info = dict(info_dict)
1043 new_info.update(f)
1044 fname = self.prepare_filename(new_info)
1045 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1046 downloaded.append(fname)
1047 partial_success = dl(fname, new_info)
1048 success = success and partial_success
58c3c7ae 1049 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1050 info_dict['__files_to_merge'] = downloaded
1051 else:
1052 # Just a single file
1053 success = dl(filename, info_dict)
8222d8de 1054 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1055 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1056 return
c40c6aaa
JMF
1057 except (OSError, IOError) as err:
1058 raise UnavailableVideoError(err)
8222d8de 1059 except (ContentTooShortError, ) as err:
6febd1c1 1060 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1061 return
1062
1063 if success:
1064 try:
1065 self.post_process(filename, info_dict)
1066 except (PostProcessingError) as err:
6febd1c1 1067 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
1068 return
1069
c1c9a79c
PH
1070 self.record_download_archive(info_dict)
1071
8222d8de
JMF
1072 def download(self, url_list):
1073 """Download a given list of URLs."""
acd69589 1074 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1075 if (len(url_list) > 1 and
acd69589 1076 '%' not in outtmpl
0c75c3fa 1077 and self.params.get('max_downloads') != 1):
acd69589 1078 raise SameFileError(outtmpl)
8222d8de
JMF
1079
1080 for url in url_list:
1081 try:
1082 #It also downloads the videos
63e0be34 1083 res = self.extract_info(url)
8222d8de 1084 except UnavailableVideoError:
6febd1c1 1085 self.report_error('unable to download video')
8222d8de 1086 except MaxDownloadsReached:
6febd1c1 1087 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1088 raise
63e0be34
PH
1089 else:
1090 if self.params.get('dump_single_json', False):
1091 self.to_stdout(json.dumps(res))
8222d8de
JMF
1092
1093 return self._download_retcode
1094
1dcc4c0c 1095 def download_with_info_file(self, info_filename):
395293a8 1096 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1097 info = json.load(f)
d4943898
JMF
1098 try:
1099 self.process_ie_result(info, download=True)
1100 except DownloadError:
1101 webpage_url = info.get('webpage_url')
1102 if webpage_url is not None:
6febd1c1 1103 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1104 return self.download([webpage_url])
1105 else:
1106 raise
1107 return self._download_retcode
1dcc4c0c 1108
8222d8de
JMF
1109 def post_process(self, filename, ie_info):
1110 """Run all the postprocessors on the given file."""
1111 info = dict(ie_info)
1112 info['filepath'] = filename
1113 keep_video = None
6350728b
JMF
1114 pps_chain = []
1115 if ie_info.get('__postprocessors') is not None:
1116 pps_chain.extend(ie_info['__postprocessors'])
1117 pps_chain.extend(self._pps)
1118 for pp in pps_chain:
8222d8de 1119 try:
fe7e0c98 1120 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1121 if keep_video_wish is not None:
1122 if keep_video_wish:
1123 keep_video = keep_video_wish
1124 elif keep_video is None:
1125 # No clear decision yet, let IE decide
1126 keep_video = keep_video_wish
1127 except PostProcessingError as e:
bbcbf4d4 1128 self.report_error(e.msg)
8222d8de
JMF
1129 if keep_video is False and not self.params.get('keepvideo', False):
1130 try:
6febd1c1 1131 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1132 os.remove(encodeFilename(filename))
1133 except (IOError, OSError):
6febd1c1 1134 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1135
5db07df6
PH
1136 def _make_archive_id(self, info_dict):
1137 # Future-proof against any change in case
1138 # and backwards compatibility with prior versions
d31209a1 1139 extractor = info_dict.get('extractor_key')
7012b23c
PH
1140 if extractor is None:
1141 if 'id' in info_dict:
1142 extractor = info_dict.get('ie_key') # key in a playlist
1143 if extractor is None:
5db07df6 1144 return None # Incomplete video information
6febd1c1 1145 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1146
1147 def in_download_archive(self, info_dict):
1148 fn = self.params.get('download_archive')
1149 if fn is None:
1150 return False
1151
1152 vid_id = self._make_archive_id(info_dict)
1153 if vid_id is None:
7012b23c 1154 return False # Incomplete video information
5db07df6 1155
c1c9a79c
PH
1156 try:
1157 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1158 for line in archive_file:
1159 if line.strip() == vid_id:
1160 return True
1161 except IOError as ioe:
1162 if ioe.errno != errno.ENOENT:
1163 raise
1164 return False
1165
1166 def record_download_archive(self, info_dict):
1167 fn = self.params.get('download_archive')
1168 if fn is None:
1169 return
5db07df6
PH
1170 vid_id = self._make_archive_id(info_dict)
1171 assert vid_id
c1c9a79c 1172 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1173 archive_file.write(vid_id + '\n')
dd82ffea 1174
8c51aa65 1175 @staticmethod
8abeeb94 1176 def format_resolution(format, default='unknown'):
fb04e403
PH
1177 if format.get('vcodec') == 'none':
1178 return 'audio only'
f49d89ee
PH
1179 if format.get('resolution') is not None:
1180 return format['resolution']
8c51aa65
JMF
1181 if format.get('height') is not None:
1182 if format.get('width') is not None:
6febd1c1 1183 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1184 else:
6febd1c1 1185 res = '%sp' % format['height']
f49d89ee 1186 elif format.get('width') is not None:
6febd1c1 1187 res = '?x%d' % format['width']
8c51aa65 1188 else:
8abeeb94 1189 res = default
8c51aa65
JMF
1190 return res
1191
c57f7757
PH
1192 def _format_note(self, fdict):
1193 res = ''
1194 if fdict.get('ext') in ['f4f', 'f4m']:
1195 res += '(unsupported) '
1196 if fdict.get('format_note') is not None:
1197 res += fdict['format_note'] + ' '
1198 if fdict.get('tbr') is not None:
1199 res += '%4dk ' % fdict['tbr']
1200 if fdict.get('container') is not None:
1201 if res:
1202 res += ', '
1203 res += '%s container' % fdict['container']
1204 if (fdict.get('vcodec') is not None and
1205 fdict.get('vcodec') != 'none'):
1206 if res:
1207 res += ', '
1208 res += fdict['vcodec']
91c7271a 1209 if fdict.get('vbr') is not None:
c57f7757
PH
1210 res += '@'
1211 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1212 res += 'video@'
1213 if fdict.get('vbr') is not None:
1214 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1215 if fdict.get('fps') is not None:
1216 res += ', %sfps' % fdict['fps']
c57f7757
PH
1217 if fdict.get('acodec') is not None:
1218 if res:
1219 res += ', '
1220 if fdict['acodec'] == 'none':
1221 res += 'video only'
1222 else:
1223 res += '%-5s' % fdict['acodec']
1224 elif fdict.get('abr') is not None:
1225 if res:
1226 res += ', '
1227 res += 'audio'
1228 if fdict.get('abr') is not None:
1229 res += '@%3dk' % fdict['abr']
1230 if fdict.get('asr') is not None:
1231 res += ' (%5dHz)' % fdict['asr']
1232 if fdict.get('filesize') is not None:
1233 if res:
1234 res += ', '
1235 res += format_bytes(fdict['filesize'])
9732d77e
PH
1236 elif fdict.get('filesize_approx') is not None:
1237 if res:
1238 res += ', '
1239 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1240 return res
91c7271a 1241
c57f7757 1242 def list_formats(self, info_dict):
02dbf93f 1243 def line(format, idlen=20):
6febd1c1 1244 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1245 format['format_id'],
1246 format['ext'],
8c51aa65 1247 self.format_resolution(format),
c57f7757 1248 self._format_note(format),
02dbf93f 1249 ))
57dd9a8f 1250
94badb25 1251 formats = info_dict.get('formats', [info_dict])
6febd1c1 1252 idlen = max(len('format code'),
02dbf93f
PH
1253 max(len(f['format_id']) for f in formats))
1254 formats_s = [line(f, idlen) for f in formats]
94badb25 1255 if len(formats) > 1:
c57f7757
PH
1256 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1257 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1258
1259 header_line = line({
6febd1c1
PH
1260 'format_id': 'format code', 'ext': 'extension',
1261 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1262 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1263 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1264
1265 def urlopen(self, req):
1266 """ Start an HTTP download """
37419b4f 1267
d05cfe06
S
1268 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1269 # always respected by websites, some tend to give out URLs with non percent-encoded
1270 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1271 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1272 # To work around aforementioned issue we will replace request's original URL with
1273 # percent-encoded one
ee0d9070 1274 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1275 url = req if req_is_string else req.get_full_url()
d05cfe06 1276 url_escaped = escape_url(url)
37419b4f
S
1277
1278 # Substitute URL if any change after escaping
1279 if url != url_escaped:
68b09730 1280 if req_is_string:
37419b4f
S
1281 req = url_escaped
1282 else:
1283 req = compat_urllib_request.Request(
1284 url_escaped, data=req.data, headers=req.headers,
1285 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1286
19a41fc6 1287 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1288
1289 def print_debug_header(self):
1290 if not self.params.get('verbose'):
1291 return
62fec3b2 1292
4192b51c
PH
1293 if type('') is not compat_str:
1294 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1295 self.report_warning(
1296 'Your Python is broken! Update to a newer and supported version')
1297
b0472057 1298 encoding_str = (
734f90bb
PH
1299 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1300 locale.getpreferredencoding(),
1301 sys.getfilesystemencoding(),
1302 sys.stdout.encoding,
b0472057 1303 self.get_encoding()))
4192b51c 1304 write_string(encoding_str, encoding=None)
734f90bb
PH
1305
1306 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1307 try:
1308 sp = subprocess.Popen(
1309 ['git', 'rev-parse', '--short', 'HEAD'],
1310 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1311 cwd=os.path.dirname(os.path.abspath(__file__)))
1312 out, err = sp.communicate()
1313 out = out.decode().strip()
1314 if re.match('[0-9a-f]+', out):
734f90bb 1315 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1316 except:
1317 try:
1318 sys.exc_clear()
1319 except:
1320 pass
d28b5171
PH
1321 self._write_string('[debug] Python version %s - %s\n' % (
1322 platform.python_version(), platform_name()))
1323
1324 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1325 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1326 exe_str = ', '.join(
1327 '%s %s' % (exe, v)
1328 for exe, v in sorted(exe_versions.items())
1329 if v
1330 )
1331 if not exe_str:
1332 exe_str = 'none'
1333 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1334
1335 proxy_map = {}
1336 for handler in self._opener.handlers:
1337 if hasattr(handler, 'proxies'):
1338 proxy_map.update(handler.proxies)
734f90bb 1339 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1340
e344693b 1341 def _setup_opener(self):
6ad14cab 1342 timeout_val = self.params.get('socket_timeout')
19a41fc6 1343 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1344
dca08720
PH
1345 opts_cookiefile = self.params.get('cookiefile')
1346 opts_proxy = self.params.get('proxy')
1347
1348 if opts_cookiefile is None:
1349 self.cookiejar = compat_cookiejar.CookieJar()
1350 else:
1351 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1352 opts_cookiefile)
1353 if os.access(opts_cookiefile, os.R_OK):
1354 self.cookiejar.load()
1355
1356 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1357 self.cookiejar)
1358 if opts_proxy is not None:
1359 if opts_proxy == '':
1360 proxies = {}
1361 else:
1362 proxies = {'http': opts_proxy, 'https': opts_proxy}
1363 else:
1364 proxies = compat_urllib_request.getproxies()
1365 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1366 if 'http' in proxies and 'https' not in proxies:
1367 proxies['https'] = proxies['http']
1368 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1369
1370 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1371 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1372 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1373 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1374 opener = compat_urllib_request.build_opener(
a0ddb8a2 1375 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1376 # Delete the default user-agent header, which would otherwise apply in
1377 # cases where our custom HTTP handler doesn't come into play
1378 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1379 opener.addheaders = []
1380 self._opener = opener
62fec3b2
PH
1381
1382 def encode(self, s):
1383 if isinstance(s, bytes):
1384 return s # Already encoded
1385
1386 try:
1387 return s.encode(self.get_encoding())
1388 except UnicodeEncodeError as err:
1389 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1390 raise
1391
1392 def get_encoding(self):
1393 encoding = self.params.get('encoding')
1394 if encoding is None:
1395 encoding = preferredencoding()
1396 return encoding