]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[smotri] Adapt to new API and modernize
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
62fec3b2 11import locale
8222d8de 12import os
dca08720 13import platform
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de
JMF
17import socket
18import sys
19import time
20import traceback
21
1e5b9a95
PH
22if os.name == 'nt':
23 import ctypes
24
8c25f81b 25from .compat import (
dca08720 26 compat_cookiejar,
4644ac55 27 compat_expanduser,
ce02ed60 28 compat_http_client,
ce02ed60
PH
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
7d4111ed 32 shlex_quote,
8c25f81b
PH
33)
34from .utils import (
d05cfe06 35 escape_url,
ce02ed60
PH
36 ContentTooShortError,
37 date_from_str,
38 DateRange,
acd69589 39 DEFAULT_OUTTMPL,
ce02ed60
PH
40 determine_ext,
41 DownloadError,
42 encodeFilename,
43 ExtractorError,
02dbf93f 44 format_bytes,
525ef922 45 formatSeconds,
1c088fa8 46 get_term_width,
ce02ed60 47 locked_file,
dca08720 48 make_HTTPS_handler,
ce02ed60 49 MaxDownloadsReached,
b7ab0590 50 PagedList,
ce02ed60 51 PostProcessingError,
dca08720 52 platform_name,
ce02ed60
PH
53 preferredencoding,
54 SameFileError,
55 sanitize_filename,
56 subtitles_filename,
57 takewhile_inclusive,
58 UnavailableVideoError,
29eb5174 59 url_basename,
ce02ed60
PH
60 write_json_file,
61 write_string,
dca08720 62 YoutubeDLHandler,
6350728b 63 prepend_extension,
7d4111ed 64 args_to_str,
ce02ed60 65)
a0e07d31 66from .cache import Cache
023fa8c4 67from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 68from .downloader import get_suitable_downloader
4c83c967 69from .downloader.rtmp import rtmpdump_version
d28b5171 70from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
dca08720 71from .version import __version__
8222d8de
JMF
72
73
74class YoutubeDL(object):
75 """YoutubeDL class.
76
77 YoutubeDL objects are the ones responsible of downloading the
78 actual video file and writing it to disk if the user has requested
79 it, among some other tasks. In most cases there should be one per
80 program. As, given a video URL, the downloader doesn't know how to
81 extract all the needed information, task that InfoExtractors do, it
82 has to pass the URL to one of them.
83
84 For this, YoutubeDL objects have a method that allows
85 InfoExtractors to be registered in a given order. When it is passed
86 a URL, the YoutubeDL object handles it to the first InfoExtractor it
87 finds that reports being able to handle it. The InfoExtractor extracts
88 all the information about the video or videos the URL refers to, and
89 YoutubeDL process the extracted information, possibly using a File
90 Downloader to download the video.
91
92 YoutubeDL objects accept a lot of parameters. In order not to saturate
93 the object constructor with arguments, it receives a dictionary of
94 options instead. These options are available through the params
95 attribute for the InfoExtractors to use. The YoutubeDL also
96 registers itself as the downloader in charge for the InfoExtractors
97 that are added to it, so this is a "mutual registration".
98
99 Available options:
100
101 username: Username for authentication purposes.
102 password: Password for authentication purposes.
c6c19746 103 videopassword: Password for acces a video.
8222d8de
JMF
104 usenetrc: Use netrc for authentication instead.
105 verbose: Print additional info to stdout.
106 quiet: Do not print messages to stdout.
ad8915b7 107 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
108 forceurl: Force printing final URL.
109 forcetitle: Force printing title.
110 forceid: Force printing ID.
111 forcethumbnail: Force printing thumbnail URL.
112 forcedescription: Force printing description.
113 forcefilename: Force printing final filename.
525ef922 114 forceduration: Force printing duration.
8694c600 115 forcejson: Force printing info_dict as JSON.
63e0be34
PH
116 dump_single_json: Force printing the info_dict of the whole playlist
117 (or video) as a single JSON line.
8222d8de
JMF
118 simulate: Do not download the video files.
119 format: Video format code.
120 format_limit: Highest quality format to try.
121 outtmpl: Template for output names.
122 restrictfilenames: Do not allow "&" and spaces in file names
123 ignoreerrors: Do not stop on download errors.
124 nooverwrites: Prevent overwriting files.
125 playliststart: Playlist item to start at.
126 playlistend: Playlist item to end at.
127 matchtitle: Download only matching titles.
128 rejecttitle: Reject downloads for matching titles.
8bf9319e 129 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
130 logtostderr: Log messages to stderr instead of stdout.
131 writedescription: Write the video description to a .description file
132 writeinfojson: Write the video description to a .info.json file
1fb07d10 133 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
134 writethumbnail: Write the thumbnail image to a file
135 writesubtitles: Write the video subtitles to a file
b004821f 136 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 137 allsubtitles: Downloads all the subtitles of the video
0b7f3118 138 (requires writesubtitles or writeautomaticsub)
8222d8de 139 listsubtitles: Lists all available subtitles for the video
b98a6b2f 140 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 141 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
142 keepvideo: Keep the video file after post-processing
143 daterange: A DateRange object, download only if the upload_date is in the range.
144 skip_download: Skip the actual download of the video file
c35f9e72 145 cachedir: Location of the cache files in the filesystem.
a0e07d31 146 False to disable filesystem cache.
47192f92 147 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
148 age_limit: An integer representing the user's age in years.
149 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
150 min_views: An integer representing the minimum view count the video
151 must have in order to not be skipped.
152 Videos without view count information are always
153 downloaded. None for no limit.
154 max_views: An integer representing the maximum view count.
155 Videos that are more popular than that are not
156 downloaded.
157 Videos without view count information are always
158 downloaded. None for no limit.
159 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
160 Videos already present in the file are not downloaded
161 again.
dca08720 162 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 163 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
164 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
165 At the moment, this is only supported by YouTube.
a1ee09e8 166 proxy: URL of the proxy server to use
e344693b 167 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
168 bidi_workaround: Work around buggy terminals without bidirectional text
169 support, using fridibi
a0ddb8a2 170 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 171 include_ads: Download ads as well
04b4d394
PH
172 default_search: Prepend this string if an input url is not valid.
173 'auto' for elaborate guessing
62fec3b2 174 encoding: Use this encoding instead of the system-specified.
e8ee972c 175 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
176 Pass in 'in_playlist' to only show this behavior for
177 playlist items.
fe7e0c98 178
8222d8de
JMF
179 The following parameters are not used by YoutubeDL itself, they are used by
180 the FileDownloader:
181 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
182 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
183
184 The following options are used by the post processors:
185 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
186 otherwise prefer avconv.
8d31fa3c 187 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
188 """
189
190 params = None
191 _ies = []
192 _pps = []
193 _download_retcode = None
194 _num_downloads = None
195 _screen_file = None
196
3511266b 197 def __init__(self, params=None, auto_init=True):
8222d8de 198 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
199 if params is None:
200 params = {}
8222d8de 201 self._ies = []
56c73665 202 self._ies_instances = {}
8222d8de 203 self._pps = []
933605d7 204 self._progress_hooks = []
8222d8de
JMF
205 self._download_retcode = 0
206 self._num_downloads = 0
207 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 208 self._err_file = sys.stderr
e9f9a10f 209 self.params = params
a0e07d31 210 self.cache = Cache(self)
34308b30 211
0783b09b 212 if params.get('bidi_workaround', False):
1c088fa8
PH
213 try:
214 import pty
215 master, slave = pty.openpty()
216 width = get_term_width()
217 if width is None:
218 width_args = []
219 else:
220 width_args = ['-w', str(width)]
5d681e96 221 sp_kwargs = dict(
1c088fa8
PH
222 stdin=subprocess.PIPE,
223 stdout=slave,
224 stderr=self._err_file)
5d681e96
PH
225 try:
226 self._output_process = subprocess.Popen(
227 ['bidiv'] + width_args, **sp_kwargs
228 )
229 except OSError:
5d681e96
PH
230 self._output_process = subprocess.Popen(
231 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
232 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
233 except OSError as ose:
234 if ose.errno == 2:
6febd1c1 235 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
236 else:
237 raise
0783b09b 238
34308b30
PH
239 if (sys.version_info >= (3,) and sys.platform != 'win32' and
240 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 241 and not params.get('restrictfilenames', False)):
34308b30
PH
242 # On Python 3, the Unicode filesystem API will throw errors (#1474)
243 self.report_warning(
6febd1c1 244 'Assuming --restrict-filenames since file system encoding '
1b725173 245 'cannot encode all characters. '
6febd1c1 246 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 247 self.params['restrictfilenames'] = True
34308b30 248
a3927cf7 249 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 250 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 251
dca08720
PH
252 self._setup_opener()
253
3511266b
PH
254 if auto_init:
255 self.print_debug_header()
256 self.add_default_info_extractors()
257
7d4111ed
PH
258 def warn_if_short_id(self, argv):
259 # short YouTube ID starting with dash?
260 idxs = [
261 i for i, a in enumerate(argv)
262 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
263 if idxs:
264 correct_argv = (
265 ['youtube-dl'] +
266 [a for i, a in enumerate(argv) if i not in idxs] +
267 ['--'] + [argv[i] for i in idxs]
268 )
269 self.report_warning(
270 'Long argument string detected. '
271 'Use -- to separate parameters and URLs, like this:\n%s\n' %
272 args_to_str(correct_argv))
273
8222d8de
JMF
274 def add_info_extractor(self, ie):
275 """Add an InfoExtractor object to the end of the list."""
276 self._ies.append(ie)
56c73665 277 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
278 ie.set_downloader(self)
279
56c73665
JMF
280 def get_info_extractor(self, ie_key):
281 """
282 Get an instance of an IE with name ie_key, it will try to get one from
283 the _ies list, if there's no instance it will create a new one and add
284 it to the extractor list.
285 """
286 ie = self._ies_instances.get(ie_key)
287 if ie is None:
288 ie = get_info_extractor(ie_key)()
289 self.add_info_extractor(ie)
290 return ie
291
023fa8c4
JMF
292 def add_default_info_extractors(self):
293 """
294 Add the InfoExtractors returned by gen_extractors to the end of the list
295 """
296 for ie in gen_extractors():
297 self.add_info_extractor(ie)
298
8222d8de
JMF
299 def add_post_processor(self, pp):
300 """Add a PostProcessor object to the end of the chain."""
301 self._pps.append(pp)
302 pp.set_downloader(self)
303
933605d7
JMF
304 def add_progress_hook(self, ph):
305 """Add the progress hook (currently only for the file downloader)"""
306 self._progress_hooks.append(ph)
8ab470f1 307
1c088fa8 308 def _bidi_workaround(self, message):
5d681e96 309 if not hasattr(self, '_output_channel'):
1c088fa8
PH
310 return message
311
5d681e96 312 assert hasattr(self, '_output_process')
11b85ce6 313 assert isinstance(message, compat_str)
6febd1c1
PH
314 line_count = message.count('\n') + 1
315 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 316 self._output_process.stdin.flush()
6febd1c1 317 res = ''.join(self._output_channel.readline().decode('utf-8')
1c088fa8 318 for _ in range(line_count))
6febd1c1 319 return res[:-len('\n')]
1c088fa8 320
8222d8de 321 def to_screen(self, message, skip_eol=False):
0783b09b
PH
322 """Print message to stdout if not in quiet mode."""
323 return self.to_stdout(message, skip_eol, check_quiet=True)
324
734f90bb 325 def _write_string(self, s, out=None):
b58ddb32 326 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 327
0783b09b 328 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 329 """Print message to stdout if not in quiet mode."""
8bf9319e 330 if self.params.get('logger'):
43afe285 331 self.params['logger'].debug(message)
0783b09b 332 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 333 message = self._bidi_workaround(message)
6febd1c1 334 terminator = ['\n', ''][skip_eol]
8222d8de 335 output = message + terminator
1c088fa8 336
734f90bb 337 self._write_string(output, self._screen_file)
8222d8de
JMF
338
339 def to_stderr(self, message):
340 """Print message to stderr."""
11b85ce6 341 assert isinstance(message, compat_str)
8bf9319e 342 if self.params.get('logger'):
43afe285
IB
343 self.params['logger'].error(message)
344 else:
1c088fa8 345 message = self._bidi_workaround(message)
6febd1c1 346 output = message + '\n'
734f90bb 347 self._write_string(output, self._err_file)
8222d8de 348
1e5b9a95
PH
349 def to_console_title(self, message):
350 if not self.params.get('consoletitle', False):
351 return
352 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
353 # c_wchar_p() might not be necessary if `message` is
354 # already of type unicode()
355 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
356 elif 'TERM' in os.environ:
734f90bb 357 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 358
bdde425c
PH
359 def save_console_title(self):
360 if not self.params.get('consoletitle', False):
361 return
362 if 'TERM' in os.environ:
efd6c574 363 # Save the title on stack
734f90bb 364 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
365
366 def restore_console_title(self):
367 if not self.params.get('consoletitle', False):
368 return
369 if 'TERM' in os.environ:
efd6c574 370 # Restore the title from stack
734f90bb 371 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
372
373 def __enter__(self):
374 self.save_console_title()
375 return self
376
377 def __exit__(self, *args):
378 self.restore_console_title()
f89197d7 379
dca08720
PH
380 if self.params.get('cookiefile') is not None:
381 self.cookiejar.save()
bdde425c 382
8222d8de
JMF
383 def trouble(self, message=None, tb=None):
384 """Determine action to take when a download problem appears.
385
386 Depending on if the downloader has been configured to ignore
387 download errors or not, this method may throw an exception or
388 not when errors are found, after printing the message.
389
390 tb, if given, is additional traceback information.
391 """
392 if message is not None:
393 self.to_stderr(message)
394 if self.params.get('verbose'):
395 if tb is None:
396 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 397 tb = ''
8222d8de 398 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 399 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
400 tb += compat_str(traceback.format_exc())
401 else:
402 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 403 tb = ''.join(tb_data)
8222d8de
JMF
404 self.to_stderr(tb)
405 if not self.params.get('ignoreerrors', False):
406 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
407 exc_info = sys.exc_info()[1].exc_info
408 else:
409 exc_info = sys.exc_info()
410 raise DownloadError(message, exc_info)
411 self._download_retcode = 1
412
413 def report_warning(self, message):
414 '''
415 Print the message to stderr, it will be prefixed with 'WARNING:'
416 If stderr is a tty file the 'WARNING:' will be colored
417 '''
6d07ce01
JMF
418 if self.params.get('logger') is not None:
419 self.params['logger'].warning(message)
8222d8de 420 else:
ad8915b7
PH
421 if self.params.get('no_warnings'):
422 return
6d07ce01
JMF
423 if self._err_file.isatty() and os.name != 'nt':
424 _msg_header = '\033[0;33mWARNING:\033[0m'
425 else:
426 _msg_header = 'WARNING:'
427 warning_message = '%s %s' % (_msg_header, message)
428 self.to_stderr(warning_message)
8222d8de
JMF
429
430 def report_error(self, message, tb=None):
431 '''
432 Do the same as trouble, but prefixes the message with 'ERROR:', colored
433 in red if stderr is a tty file.
434 '''
0783b09b 435 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 436 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 437 else:
6febd1c1
PH
438 _msg_header = 'ERROR:'
439 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
440 self.trouble(error_message, tb)
441
8222d8de
JMF
442 def report_file_already_downloaded(self, file_name):
443 """Report file has already been fully downloaded."""
444 try:
6febd1c1 445 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 446 except UnicodeEncodeError:
6febd1c1 447 self.to_screen('[download] The file has already been downloaded')
8222d8de 448
8222d8de
JMF
449 def prepare_filename(self, info_dict):
450 """Generate the output filename."""
451 try:
452 template_dict = dict(info_dict)
453
454 template_dict['epoch'] = int(time.time())
455 autonumber_size = self.params.get('autonumber_size')
456 if autonumber_size is None:
457 autonumber_size = 5
6febd1c1 458 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 459 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 460 if template_dict.get('playlist_index') is not None:
c6b4132a 461 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
462 if template_dict.get('resolution') is None:
463 if template_dict.get('width') and template_dict.get('height'):
464 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
465 elif template_dict.get('height'):
805ef3c6 466 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 467 elif template_dict.get('width'):
805ef3c6 468 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 469
586a91b6 470 sanitize = lambda k, v: sanitize_filename(
45598aab 471 compat_str(v),
8222d8de 472 restricted=self.params.get('restrictfilenames'),
6febd1c1 473 is_id=(k == 'id'))
586a91b6 474 template_dict = dict((k, sanitize(k, v))
45598aab
PH
475 for k, v in template_dict.items()
476 if v is not None)
6febd1c1 477 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 478
acd69589 479 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 480 tmpl = compat_expanduser(outtmpl)
586a91b6 481 filename = tmpl % template_dict
8222d8de 482 return filename
8222d8de 483 except ValueError as err:
6febd1c1 484 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
485 return None
486
487 def _match_entry(self, info_dict):
488 """ Returns None iff the file should be downloaded """
489
6febd1c1 490 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
491 if 'title' in info_dict:
492 # This can happen when we're just evaluating the playlist
493 title = info_dict['title']
494 matchtitle = self.params.get('matchtitle', False)
495 if matchtitle:
496 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 497 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
498 rejecttitle = self.params.get('rejecttitle', False)
499 if rejecttitle:
500 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 501 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
502 date = info_dict.get('upload_date', None)
503 if date is not None:
504 dateRange = self.params.get('daterange', DateRange())
505 if date not in dateRange:
6febd1c1 506 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
507 view_count = info_dict.get('view_count', None)
508 if view_count is not None:
509 min_views = self.params.get('min_views')
510 if min_views is not None and view_count < min_views:
6febd1c1 511 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
512 max_views = self.params.get('max_views')
513 if max_views is not None and view_count > max_views:
6febd1c1 514 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
515 age_limit = self.params.get('age_limit')
516 if age_limit is not None:
be843678
PH
517 actual_age_limit = info_dict.get('age_limit')
518 if actual_age_limit is None:
519 actual_age_limit = 0
520 if age_limit < actual_age_limit:
6febd1c1 521 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 522 if self.in_download_archive(info_dict):
6febd1c1 523 return '%s has already been recorded in archive' % video_title
8222d8de 524 return None
fe7e0c98 525
b6c45014
JMF
526 @staticmethod
527 def add_extra_info(info_dict, extra_info):
528 '''Set the keys from extra_info in info dict if they are missing'''
529 for key, value in extra_info.items():
530 info_dict.setdefault(key, value)
531
7fc3fa05
PH
532 def extract_info(self, url, download=True, ie_key=None, extra_info={},
533 process=True):
8222d8de
JMF
534 '''
535 Returns a list with a dictionary for each video we find.
536 If 'download', also downloads the videos.
537 extra_info is a dict containing the extra values to add to each result
538 '''
fe7e0c98 539
8222d8de 540 if ie_key:
56c73665 541 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
542 else:
543 ies = self._ies
544
545 for ie in ies:
546 if not ie.suitable(url):
547 continue
548
549 if not ie.working():
6febd1c1
PH
550 self.report_warning('The program functionality for this site has been marked as broken, '
551 'and will probably not work.')
8222d8de
JMF
552
553 try:
554 ie_result = ie.extract(url)
555 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
556 break
557 if isinstance(ie_result, list):
558 # Backwards compatibility: old IE result format
8222d8de
JMF
559 ie_result = {
560 '_type': 'compat_list',
561 'entries': ie_result,
562 }
ea38e55f 563 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
564 if process:
565 return self.process_ie_result(ie_result, download, extra_info)
566 else:
567 return ie_result
8222d8de
JMF
568 except ExtractorError as de: # An error we somewhat expected
569 self.report_error(compat_str(de), de.format_traceback())
570 break
d3e5bbf4
PH
571 except MaxDownloadsReached:
572 raise
8222d8de
JMF
573 except Exception as e:
574 if self.params.get('ignoreerrors', False):
575 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
576 break
577 else:
578 raise
579 else:
1a489545 580 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 581
ea38e55f
PH
582 def add_default_extra_info(self, ie_result, ie, url):
583 self.add_extra_info(ie_result, {
584 'extractor': ie.IE_NAME,
585 'webpage_url': url,
586 'webpage_url_basename': url_basename(url),
587 'extractor_key': ie.ie_key(),
588 })
589
8222d8de
JMF
590 def process_ie_result(self, ie_result, download=True, extra_info={}):
591 """
592 Take the result of the ie(may be modified) and resolve all unresolved
593 references (URLs, playlist items).
594
595 It will also download the videos if 'download'.
596 Returns the resolved ie_result.
597 """
598
e8ee972c
PH
599 result_type = ie_result.get('_type', 'video')
600
057a5206
PH
601 if result_type in ('url', 'url_transparent'):
602 extract_flat = self.params.get('extract_flat', False)
603 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
604 extract_flat is True):
057a5206
PH
605 if self.params.get('forcejson', False):
606 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
607 return ie_result
608
8222d8de 609 if result_type == 'video':
b6c45014 610 self.add_extra_info(ie_result, extra_info)
feee2ecf 611 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
612 elif result_type == 'url':
613 # We have to add extra_info to the results because it may be
614 # contained in a playlist
615 return self.extract_info(ie_result['url'],
616 download,
617 ie_key=ie_result.get('ie_key'),
618 extra_info=extra_info)
7fc3fa05
PH
619 elif result_type == 'url_transparent':
620 # Use the information from the embedding page
621 info = self.extract_info(
622 ie_result['url'], ie_key=ie_result.get('ie_key'),
623 extra_info=extra_info, download=False, process=False)
624
625 def make_result(embedded_info):
626 new_result = ie_result.copy()
627 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 628 'entries', 'ie_key', 'duration',
ef4fd848
PH
629 'subtitles', 'annotations', 'format',
630 'thumbnail', 'thumbnails'):
7fc3fa05
PH
631 if f in new_result:
632 del new_result[f]
633 if f in embedded_info:
634 new_result[f] = embedded_info[f]
635 return new_result
636 new_result = make_result(info)
637
638 assert new_result.get('_type') != 'url_transparent'
639 if new_result.get('_type') == 'compat_list':
640 new_result['entries'] = [
641 make_result(e) for e in new_result['entries']]
642
643 return self.process_ie_result(
644 new_result, download=download, extra_info=extra_info)
42e12102 645 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
646 # We process each entry in the playlist
647 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 648 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
649
650 playlist_results = []
651
8222d8de 652 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
653 playlistend = self.params.get('playlistend', None)
654 # For backwards compatibility, interpret -1 as whole list
8222d8de 655 if playlistend == -1:
a19fd00c 656 playlistend = None
8222d8de 657
b7ab0590
PH
658 if isinstance(ie_result['entries'], list):
659 n_all_entries = len(ie_result['entries'])
660 entries = ie_result['entries'][playliststart:playlistend]
661 n_entries = len(entries)
662 self.to_screen(
663 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
664 (ie_result['extractor'], playlist, n_all_entries, n_entries))
665 else:
666 assert isinstance(ie_result['entries'], PagedList)
667 entries = ie_result['entries'].getslice(
668 playliststart, playlistend)
669 n_entries = len(entries)
670 self.to_screen(
671 "[%s] playlist %s: Downloading %d videos" %
672 (ie_result['extractor'], playlist, n_entries))
8222d8de 673
fe7e0c98 674 for i, entry in enumerate(entries, 1):
6febd1c1 675 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 676 extra = {
c6b4132a 677 'n_entries': n_entries,
fe7e0c98 678 'playlist': playlist,
a1cf99d0
PH
679 'playlist_id': ie_result.get('id'),
680 'playlist_title': ie_result.get('title'),
fe7e0c98 681 'playlist_index': i + playliststart,
b6c45014 682 'extractor': ie_result['extractor'],
9103bbc5 683 'webpage_url': ie_result['webpage_url'],
29eb5174 684 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 685 'extractor_key': ie_result['extractor_key'],
fe7e0c98 686 }
7012b23c
PH
687
688 reason = self._match_entry(entry)
689 if reason is not None:
6febd1c1 690 self.to_screen('[download] ' + reason)
7012b23c
PH
691 continue
692
8222d8de
JMF
693 entry_result = self.process_ie_result(entry,
694 download=download,
695 extra_info=extra)
696 playlist_results.append(entry_result)
697 ie_result['entries'] = playlist_results
698 return ie_result
699 elif result_type == 'compat_list':
c9bf4114
PH
700 self.report_warning(
701 'Extractor %s returned a compat_list result. '
702 'It needs to be updated.' % ie_result.get('extractor'))
8222d8de 703 def _fixup(r):
b6c45014 704 self.add_extra_info(r,
9103bbc5
JMF
705 {
706 'extractor': ie_result['extractor'],
707 'webpage_url': ie_result['webpage_url'],
29eb5174 708 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 709 'extractor_key': ie_result['extractor_key'],
9103bbc5 710 })
8222d8de
JMF
711 return r
712 ie_result['entries'] = [
b6c45014 713 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
714 for r in ie_result['entries']
715 ]
716 return ie_result
717 else:
718 raise Exception('Invalid result type: %s' % result_type)
719
a9c58ad9
JMF
720 def select_format(self, format_spec, available_formats):
721 if format_spec == 'best' or format_spec is None:
722 return available_formats[-1]
723 elif format_spec == 'worst':
724 return available_formats[0]
ba7678f9
PH
725 elif format_spec == 'bestaudio':
726 audio_formats = [
727 f for f in available_formats
728 if f.get('vcodec') == 'none']
729 if audio_formats:
730 return audio_formats[-1]
731 elif format_spec == 'worstaudio':
732 audio_formats = [
733 f for f in available_formats
734 if f.get('vcodec') == 'none']
735 if audio_formats:
736 return audio_formats[0]
bc6d5978
JMF
737 elif format_spec == 'bestvideo':
738 video_formats = [
739 f for f in available_formats
740 if f.get('acodec') == 'none']
741 if video_formats:
742 return video_formats[-1]
743 elif format_spec == 'worstvideo':
744 video_formats = [
745 f for f in available_formats
746 if f.get('acodec') == 'none']
747 if video_formats:
748 return video_formats[0]
a9c58ad9 749 else:
e2e5dae6 750 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
751 if format_spec in extensions:
752 filter_f = lambda f: f['ext'] == format_spec
753 else:
754 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 755 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
756 if matches:
757 return matches[-1]
758 return None
759
dd82ffea
JMF
760 def process_video_result(self, info_dict, download=True):
761 assert info_dict.get('_type', 'video') == 'video'
762
bec1fad2
PH
763 if 'id' not in info_dict:
764 raise ExtractorError('Missing "id" field in extractor result')
765 if 'title' not in info_dict:
766 raise ExtractorError('Missing "title" field in extractor result')
767
dd82ffea
JMF
768 if 'playlist' not in info_dict:
769 # It isn't part of a playlist
770 info_dict['playlist'] = None
771 info_dict['playlist_index'] = None
772
d5519808
PH
773 thumbnails = info_dict.get('thumbnails')
774 if thumbnails:
be6d7229
PH
775 thumbnails.sort(key=lambda t: (
776 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
777 for t in thumbnails:
778 if 'width' in t and 'height' in t:
779 t['resolution'] = '%dx%d' % (t['width'], t['height'])
780
781 if thumbnails and 'thumbnail' not in info_dict:
782 info_dict['thumbnail'] = thumbnails[-1]['url']
783
c9ae7b95 784 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
785 info_dict['display_id'] = info_dict['id']
786
955c4514 787 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
9d2ecdbc 788 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 789 info_dict['timestamp'])
9d2ecdbc
PH
790 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
791
6ff000b8 792 # This extractors handle format selection themselves
6febd1c1 793 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
794 if download:
795 self.process_info(info_dict)
6ff000b8
JMF
796 return info_dict
797
dd82ffea
JMF
798 # We now pick which formats have to be downloaded
799 if info_dict.get('formats') is None:
800 # There's only one format available
801 formats = [info_dict]
802 else:
803 formats = info_dict['formats']
804
db95dc13
PH
805 if not formats:
806 raise ExtractorError('No video formats found!')
807
dd82ffea 808 # We check that all the formats have the format and format_id fields
db95dc13 809 for i, format in enumerate(formats):
bec1fad2
PH
810 if 'url' not in format:
811 raise ExtractorError('Missing "url" key in result (index %d)' % i)
812
dd82ffea 813 if format.get('format_id') is None:
8016c922 814 format['format_id'] = compat_str(i)
8c51aa65 815 if format.get('format') is None:
6febd1c1 816 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
817 id=format['format_id'],
818 res=self.format_resolution(format),
6febd1c1 819 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 820 )
c1002e96
PH
821 # Automatically determine file extension if missing
822 if 'ext' not in format:
cce929ea 823 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 824
99e206d5
JMF
825 format_limit = self.params.get('format_limit', None)
826 if format_limit:
f4d96df0
PH
827 formats = list(takewhile_inclusive(
828 lambda f: f['format_id'] != format_limit, formats
829 ))
4bcc7bd1
PH
830
831 # TODO Central sorting goes here
99e206d5 832
f89197d7 833 if formats[0] is not info_dict:
b3d9ef88
JMF
834 # only set the 'formats' fields if the original info_dict list them
835 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 836 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
837 # wich can't be exported to json
838 info_dict['formats'] = formats
bfaae0a7 839 if self.params.get('listformats', None):
840 self.list_formats(info_dict)
841 return
842
de3ef3ed 843 req_format = self.params.get('format')
a9c58ad9
JMF
844 if req_format is None:
845 req_format = 'best'
dd82ffea 846 formats_to_download = []
dd82ffea 847 # The -1 is for supporting YoutubeIE
a9c58ad9 848 if req_format in ('-1', 'all'):
dd82ffea
JMF
849 formats_to_download = formats
850 else:
1de33faf
PH
851 for rfstr in req_format.split(','):
852 # We can accept formats requested in the format: 34/5/best, we pick
853 # the first that is available, starting from left
854 req_formats = rfstr.split('/')
855 for rf in req_formats:
856 if re.match(r'.+?\+.+?', rf) is not None:
857 # Two formats have been requested like '137+139'
858 format_1, format_2 = rf.split('+')
859 formats_info = (self.select_format(format_1, formats),
860 self.select_format(format_2, formats))
861 if all(formats_info):
c2954908
JMF
862 # The first format must contain the video and the
863 # second the audio
864 if formats_info[0].get('vcodec') == 'none':
865 self.report_error('The first format must '
866 'contain the video, try using '
867 '"-f %s+%s"' % (format_2, format_1))
868 return
1de33faf
PH
869 selected_format = {
870 'requested_formats': formats_info,
871 'format': rf,
872 'ext': formats_info[0]['ext'],
873 }
874 else:
875 selected_format = None
6350728b 876 else:
1de33faf
PH
877 selected_format = self.select_format(rf, formats)
878 if selected_format is not None:
879 formats_to_download.append(selected_format)
880 break
dd82ffea 881 if not formats_to_download:
6febd1c1 882 raise ExtractorError('requested format not available',
78a3a9f8 883 expected=True)
dd82ffea
JMF
884
885 if download:
886 if len(formats_to_download) > 1:
6febd1c1 887 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
888 for format in formats_to_download:
889 new_info = dict(info_dict)
890 new_info.update(format)
891 self.process_info(new_info)
892 # We update the info dict with the best quality format (backwards compatibility)
893 info_dict.update(formats_to_download[-1])
894 return info_dict
895
8222d8de
JMF
896 def process_info(self, info_dict):
897 """Process a single resolved IE result."""
898
899 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
900
901 max_downloads = self.params.get('max_downloads')
902 if max_downloads is not None:
903 if self._num_downloads >= int(max_downloads):
904 raise MaxDownloadsReached()
8222d8de
JMF
905
906 info_dict['fulltitle'] = info_dict['title']
907 if len(info_dict['title']) > 200:
6febd1c1 908 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
909
910 # Keep for backwards compatibility
911 info_dict['stitle'] = info_dict['title']
912
11b85ce6 913 if 'format' not in info_dict:
8222d8de
JMF
914 info_dict['format'] = info_dict['ext']
915
916 reason = self._match_entry(info_dict)
917 if reason is not None:
6febd1c1 918 self.to_screen('[download] ' + reason)
8222d8de
JMF
919 return
920
fd288278 921 self._num_downloads += 1
8222d8de
JMF
922
923 filename = self.prepare_filename(info_dict)
924
925 # Forced printings
926 if self.params.get('forcetitle', False):
0783b09b 927 self.to_stdout(info_dict['fulltitle'])
8222d8de 928 if self.params.get('forceid', False):
0783b09b 929 self.to_stdout(info_dict['id'])
8222d8de 930 if self.params.get('forceurl', False):
edde6c56 931 # For RTMP URLs, also include the playpath
6febd1c1 932 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 933 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 934 self.to_stdout(info_dict['thumbnail'])
216d71d0 935 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 936 self.to_stdout(info_dict['description'])
8222d8de 937 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 938 self.to_stdout(filename)
525ef922
PH
939 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
940 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 941 if self.params.get('forceformat', False):
0783b09b 942 self.to_stdout(info_dict['format'])
9d153818 943 if self.params.get('forcejson', False):
a0d96c98 944 info_dict['_filename'] = filename
0783b09b 945 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
946 if self.params.get('dump_single_json', False):
947 info_dict['_filename'] = filename
8222d8de
JMF
948
949 # Do nothing else if in simulate mode
950 if self.params.get('simulate', False):
951 return
952
953 if filename is None:
954 return
955
956 try:
957 dn = os.path.dirname(encodeFilename(filename))
d26e981d 958 if dn and not os.path.exists(dn):
8222d8de
JMF
959 os.makedirs(dn)
960 except (OSError, IOError) as err:
6febd1c1 961 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
962 return
963
964 if self.params.get('writedescription', False):
6febd1c1 965 descfn = filename + '.description'
7b6fefc9 966 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 967 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
968 else:
969 try:
6febd1c1 970 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
971 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
972 descfile.write(info_dict['description'])
973 except (KeyError, TypeError):
6febd1c1 974 self.report_warning('There\'s no description to write.')
7b6fefc9 975 except (OSError, IOError):
6febd1c1 976 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 977 return
8222d8de 978
1fb07d10 979 if self.params.get('writeannotations', False):
6febd1c1 980 annofn = filename + '.annotations.xml'
7b6fefc9 981 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 982 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
983 else:
984 try:
6febd1c1 985 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
986 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
987 annofile.write(info_dict['annotations'])
988 except (KeyError, TypeError):
6febd1c1 989 self.report_warning('There are no annotations to write.')
7b6fefc9 990 except (OSError, IOError):
6febd1c1 991 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 992 return
1fb07d10 993
c4a91be7 994 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 995 self.params.get('writeautomaticsub')])
c4a91be7 996
fe7e0c98 997 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
998 # subtitles download errors are already managed as troubles in relevant IE
999 # that way it will silently go on when used with unsupporting IE
8222d8de 1000 subtitles = info_dict['subtitles']
ca715127 1001 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1002 for sub_lang in subtitles.keys():
1003 sub = subtitles[sub_lang]
6804038d
JMF
1004 if sub is None:
1005 continue
8222d8de 1006 try:
d4051a8e 1007 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1008 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1009 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1010 else:
6febd1c1 1011 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9
PH
1012 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
1013 subfile.write(sub)
8222d8de 1014 except (OSError, IOError):
e4db1951 1015 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1016 return
1017
8222d8de 1018 if self.params.get('writeinfojson', False):
6febd1c1 1019 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1020 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1021 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1022 else:
6febd1c1 1023 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1024 try:
92120217 1025 write_json_file(info_dict, infofn)
7b6fefc9 1026 except (OSError, IOError):
6febd1c1 1027 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1028 return
8222d8de
JMF
1029
1030 if self.params.get('writethumbnail', False):
d8269e1d 1031 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1032 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1033 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1034 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1035 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1036 (info_dict['extractor'], info_dict['id']))
1037 else:
6febd1c1 1038 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1039 (info_dict['extractor'], info_dict['id']))
1040 try:
e9c092f1 1041 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1042 with open(thumb_filename, 'wb') as thumbf:
1043 shutil.copyfileobj(uf, thumbf)
6febd1c1 1044 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
7b6fefc9
PH
1045 (info_dict['extractor'], info_dict['id'], thumb_filename))
1046 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1047 self.report_warning('Unable to download thumbnail "%s": %s' %
7b6fefc9 1048 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1049
1050 if not self.params.get('skip_download', False):
1051 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1052 success = True
1053 else:
1054 try:
6350728b
JMF
1055 def dl(name, info):
1056 fd = get_suitable_downloader(info)(self, self.params)
1057 for ph in self._progress_hooks:
1058 fd.add_progress_hook(ph)
8d5797b0
PH
1059 if self.params.get('verbose'):
1060 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1061 return fd.download(name, info)
1062 if info_dict.get('requested_formats') is not None:
1063 downloaded = []
1064 success = True
b7f81164 1065 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1066 if not merger._executable:
58c3c7ae
JMF
1067 postprocessors = []
1068 self.report_warning('You have requested multiple '
1069 'formats but ffmpeg or avconv are not installed.'
1070 ' The formats won\'t be merged')
1071 else:
1072 postprocessors = [merger]
6350728b
JMF
1073 for f in info_dict['requested_formats']:
1074 new_info = dict(info_dict)
1075 new_info.update(f)
1076 fname = self.prepare_filename(new_info)
1077 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1078 downloaded.append(fname)
1079 partial_success = dl(fname, new_info)
1080 success = success and partial_success
58c3c7ae 1081 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1082 info_dict['__files_to_merge'] = downloaded
1083 else:
1084 # Just a single file
1085 success = dl(filename, info_dict)
8222d8de 1086 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1087 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1088 return
c40c6aaa
JMF
1089 except (OSError, IOError) as err:
1090 raise UnavailableVideoError(err)
8222d8de 1091 except (ContentTooShortError, ) as err:
6febd1c1 1092 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1093 return
1094
1095 if success:
1096 try:
1097 self.post_process(filename, info_dict)
1098 except (PostProcessingError) as err:
6febd1c1 1099 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
1100 return
1101
c1c9a79c
PH
1102 self.record_download_archive(info_dict)
1103
8222d8de
JMF
1104 def download(self, url_list):
1105 """Download a given list of URLs."""
acd69589 1106 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1107 if (len(url_list) > 1 and
acd69589 1108 '%' not in outtmpl
0c75c3fa 1109 and self.params.get('max_downloads') != 1):
acd69589 1110 raise SameFileError(outtmpl)
8222d8de
JMF
1111
1112 for url in url_list:
1113 try:
1114 #It also downloads the videos
63e0be34 1115 res = self.extract_info(url)
8222d8de 1116 except UnavailableVideoError:
6febd1c1 1117 self.report_error('unable to download video')
8222d8de 1118 except MaxDownloadsReached:
6febd1c1 1119 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1120 raise
63e0be34
PH
1121 else:
1122 if self.params.get('dump_single_json', False):
1123 self.to_stdout(json.dumps(res))
8222d8de
JMF
1124
1125 return self._download_retcode
1126
1dcc4c0c 1127 def download_with_info_file(self, info_filename):
395293a8 1128 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1129 info = json.load(f)
d4943898
JMF
1130 try:
1131 self.process_ie_result(info, download=True)
1132 except DownloadError:
1133 webpage_url = info.get('webpage_url')
1134 if webpage_url is not None:
6febd1c1 1135 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1136 return self.download([webpage_url])
1137 else:
1138 raise
1139 return self._download_retcode
1dcc4c0c 1140
8222d8de
JMF
1141 def post_process(self, filename, ie_info):
1142 """Run all the postprocessors on the given file."""
1143 info = dict(ie_info)
1144 info['filepath'] = filename
1145 keep_video = None
6350728b
JMF
1146 pps_chain = []
1147 if ie_info.get('__postprocessors') is not None:
1148 pps_chain.extend(ie_info['__postprocessors'])
1149 pps_chain.extend(self._pps)
1150 for pp in pps_chain:
8222d8de 1151 try:
fe7e0c98 1152 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1153 if keep_video_wish is not None:
1154 if keep_video_wish:
1155 keep_video = keep_video_wish
1156 elif keep_video is None:
1157 # No clear decision yet, let IE decide
1158 keep_video = keep_video_wish
1159 except PostProcessingError as e:
bbcbf4d4 1160 self.report_error(e.msg)
8222d8de
JMF
1161 if keep_video is False and not self.params.get('keepvideo', False):
1162 try:
6febd1c1 1163 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1164 os.remove(encodeFilename(filename))
1165 except (IOError, OSError):
6febd1c1 1166 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1167
5db07df6
PH
1168 def _make_archive_id(self, info_dict):
1169 # Future-proof against any change in case
1170 # and backwards compatibility with prior versions
d31209a1 1171 extractor = info_dict.get('extractor_key')
7012b23c
PH
1172 if extractor is None:
1173 if 'id' in info_dict:
1174 extractor = info_dict.get('ie_key') # key in a playlist
1175 if extractor is None:
5db07df6 1176 return None # Incomplete video information
6febd1c1 1177 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1178
1179 def in_download_archive(self, info_dict):
1180 fn = self.params.get('download_archive')
1181 if fn is None:
1182 return False
1183
1184 vid_id = self._make_archive_id(info_dict)
1185 if vid_id is None:
7012b23c 1186 return False # Incomplete video information
5db07df6 1187
c1c9a79c
PH
1188 try:
1189 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1190 for line in archive_file:
1191 if line.strip() == vid_id:
1192 return True
1193 except IOError as ioe:
1194 if ioe.errno != errno.ENOENT:
1195 raise
1196 return False
1197
1198 def record_download_archive(self, info_dict):
1199 fn = self.params.get('download_archive')
1200 if fn is None:
1201 return
5db07df6
PH
1202 vid_id = self._make_archive_id(info_dict)
1203 assert vid_id
c1c9a79c 1204 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1205 archive_file.write(vid_id + '\n')
dd82ffea 1206
8c51aa65 1207 @staticmethod
8abeeb94 1208 def format_resolution(format, default='unknown'):
fb04e403
PH
1209 if format.get('vcodec') == 'none':
1210 return 'audio only'
f49d89ee
PH
1211 if format.get('resolution') is not None:
1212 return format['resolution']
8c51aa65
JMF
1213 if format.get('height') is not None:
1214 if format.get('width') is not None:
6febd1c1 1215 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1216 else:
6febd1c1 1217 res = '%sp' % format['height']
f49d89ee 1218 elif format.get('width') is not None:
6febd1c1 1219 res = '?x%d' % format['width']
8c51aa65 1220 else:
8abeeb94 1221 res = default
8c51aa65
JMF
1222 return res
1223
c57f7757
PH
1224 def _format_note(self, fdict):
1225 res = ''
1226 if fdict.get('ext') in ['f4f', 'f4m']:
1227 res += '(unsupported) '
1228 if fdict.get('format_note') is not None:
1229 res += fdict['format_note'] + ' '
1230 if fdict.get('tbr') is not None:
1231 res += '%4dk ' % fdict['tbr']
1232 if fdict.get('container') is not None:
1233 if res:
1234 res += ', '
1235 res += '%s container' % fdict['container']
1236 if (fdict.get('vcodec') is not None and
1237 fdict.get('vcodec') != 'none'):
1238 if res:
1239 res += ', '
1240 res += fdict['vcodec']
91c7271a 1241 if fdict.get('vbr') is not None:
c57f7757
PH
1242 res += '@'
1243 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1244 res += 'video@'
1245 if fdict.get('vbr') is not None:
1246 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1247 if fdict.get('fps') is not None:
1248 res += ', %sfps' % fdict['fps']
c57f7757
PH
1249 if fdict.get('acodec') is not None:
1250 if res:
1251 res += ', '
1252 if fdict['acodec'] == 'none':
1253 res += 'video only'
1254 else:
1255 res += '%-5s' % fdict['acodec']
1256 elif fdict.get('abr') is not None:
1257 if res:
1258 res += ', '
1259 res += 'audio'
1260 if fdict.get('abr') is not None:
1261 res += '@%3dk' % fdict['abr']
1262 if fdict.get('asr') is not None:
1263 res += ' (%5dHz)' % fdict['asr']
1264 if fdict.get('filesize') is not None:
1265 if res:
1266 res += ', '
1267 res += format_bytes(fdict['filesize'])
9732d77e
PH
1268 elif fdict.get('filesize_approx') is not None:
1269 if res:
1270 res += ', '
1271 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1272 return res
91c7271a 1273
c57f7757 1274 def list_formats(self, info_dict):
02dbf93f 1275 def line(format, idlen=20):
6febd1c1 1276 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1277 format['format_id'],
1278 format['ext'],
8c51aa65 1279 self.format_resolution(format),
c57f7757 1280 self._format_note(format),
02dbf93f 1281 ))
57dd9a8f 1282
94badb25 1283 formats = info_dict.get('formats', [info_dict])
6febd1c1 1284 idlen = max(len('format code'),
02dbf93f
PH
1285 max(len(f['format_id']) for f in formats))
1286 formats_s = [line(f, idlen) for f in formats]
94badb25 1287 if len(formats) > 1:
c57f7757
PH
1288 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1289 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1290
1291 header_line = line({
6febd1c1
PH
1292 'format_id': 'format code', 'ext': 'extension',
1293 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1294 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1295 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1296
1297 def urlopen(self, req):
1298 """ Start an HTTP download """
37419b4f 1299
d05cfe06
S
1300 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1301 # always respected by websites, some tend to give out URLs with non percent-encoded
1302 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1303 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1304 # To work around aforementioned issue we will replace request's original URL with
1305 # percent-encoded one
ee0d9070 1306 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1307 url = req if req_is_string else req.get_full_url()
d05cfe06 1308 url_escaped = escape_url(url)
37419b4f
S
1309
1310 # Substitute URL if any change after escaping
1311 if url != url_escaped:
68b09730 1312 if req_is_string:
37419b4f
S
1313 req = url_escaped
1314 else:
1315 req = compat_urllib_request.Request(
1316 url_escaped, data=req.data, headers=req.headers,
1317 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1318
19a41fc6 1319 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1320
1321 def print_debug_header(self):
1322 if not self.params.get('verbose'):
1323 return
62fec3b2 1324
4192b51c
PH
1325 if type('') is not compat_str:
1326 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1327 self.report_warning(
1328 'Your Python is broken! Update to a newer and supported version')
1329
c6afed48
PH
1330 stdout_encoding = getattr(
1331 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1332 encoding_str = (
734f90bb
PH
1333 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1334 locale.getpreferredencoding(),
1335 sys.getfilesystemencoding(),
c6afed48 1336 stdout_encoding,
b0472057 1337 self.get_encoding()))
4192b51c 1338 write_string(encoding_str, encoding=None)
734f90bb
PH
1339
1340 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1341 try:
1342 sp = subprocess.Popen(
1343 ['git', 'rev-parse', '--short', 'HEAD'],
1344 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1345 cwd=os.path.dirname(os.path.abspath(__file__)))
1346 out, err = sp.communicate()
1347 out = out.decode().strip()
1348 if re.match('[0-9a-f]+', out):
734f90bb 1349 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1350 except:
1351 try:
1352 sys.exc_clear()
1353 except:
1354 pass
d28b5171
PH
1355 self._write_string('[debug] Python version %s - %s\n' % (
1356 platform.python_version(), platform_name()))
1357
1358 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1359 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1360 exe_str = ', '.join(
1361 '%s %s' % (exe, v)
1362 for exe, v in sorted(exe_versions.items())
1363 if v
1364 )
1365 if not exe_str:
1366 exe_str = 'none'
1367 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1368
1369 proxy_map = {}
1370 for handler in self._opener.handlers:
1371 if hasattr(handler, 'proxies'):
1372 proxy_map.update(handler.proxies)
734f90bb 1373 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1374
e344693b 1375 def _setup_opener(self):
6ad14cab 1376 timeout_val = self.params.get('socket_timeout')
19a41fc6 1377 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1378
dca08720
PH
1379 opts_cookiefile = self.params.get('cookiefile')
1380 opts_proxy = self.params.get('proxy')
1381
1382 if opts_cookiefile is None:
1383 self.cookiejar = compat_cookiejar.CookieJar()
1384 else:
1385 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1386 opts_cookiefile)
1387 if os.access(opts_cookiefile, os.R_OK):
1388 self.cookiejar.load()
1389
1390 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1391 self.cookiejar)
1392 if opts_proxy is not None:
1393 if opts_proxy == '':
1394 proxies = {}
1395 else:
1396 proxies = {'http': opts_proxy, 'https': opts_proxy}
1397 else:
1398 proxies = compat_urllib_request.getproxies()
1399 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1400 if 'http' in proxies and 'https' not in proxies:
1401 proxies['https'] = proxies['http']
1402 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1403
1404 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1405 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1406 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1407 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1408 opener = compat_urllib_request.build_opener(
a0ddb8a2 1409 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1410 # Delete the default user-agent header, which would otherwise apply in
1411 # cases where our custom HTTP handler doesn't come into play
1412 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1413 opener.addheaders = []
1414 self._opener = opener
62fec3b2
PH
1415
1416 def encode(self, s):
1417 if isinstance(s, bytes):
1418 return s # Already encoded
1419
1420 try:
1421 return s.encode(self.get_encoding())
1422 except UnicodeEncodeError as err:
1423 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1424 raise
1425
1426 def get_encoding(self):
1427 encoding = self.params.get('encoding')
1428 if encoding is None:
1429 encoding = preferredencoding()
1430 return encoding
7d4111ed 1431