]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[abc] Update test case
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
8694c600 10import json
62fec3b2 11import locale
8222d8de 12import os
dca08720 13import platform
8222d8de
JMF
14import re
15import shutil
dca08720 16import subprocess
8222d8de
JMF
17import socket
18import sys
19import time
20import traceback
21
1e5b9a95
PH
22if os.name == 'nt':
23 import ctypes
24
8c25f81b 25from .compat import (
dca08720 26 compat_cookiejar,
4644ac55 27 compat_expanduser,
ce02ed60 28 compat_http_client,
ce02ed60
PH
29 compat_str,
30 compat_urllib_error,
31 compat_urllib_request,
8c25f81b
PH
32)
33from .utils import (
d05cfe06 34 escape_url,
ce02ed60
PH
35 ContentTooShortError,
36 date_from_str,
37 DateRange,
acd69589 38 DEFAULT_OUTTMPL,
ce02ed60
PH
39 determine_ext,
40 DownloadError,
41 encodeFilename,
42 ExtractorError,
02dbf93f 43 format_bytes,
525ef922 44 formatSeconds,
1c088fa8 45 get_term_width,
ce02ed60 46 locked_file,
dca08720 47 make_HTTPS_handler,
ce02ed60 48 MaxDownloadsReached,
b7ab0590 49 PagedList,
ce02ed60 50 PostProcessingError,
dca08720 51 platform_name,
ce02ed60
PH
52 preferredencoding,
53 SameFileError,
54 sanitize_filename,
55 subtitles_filename,
56 takewhile_inclusive,
57 UnavailableVideoError,
29eb5174 58 url_basename,
ce02ed60
PH
59 write_json_file,
60 write_string,
dca08720 61 YoutubeDLHandler,
6350728b 62 prepend_extension,
ce02ed60 63)
a0e07d31 64from .cache import Cache
023fa8c4 65from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 66from .downloader import get_suitable_downloader
4c83c967 67from .downloader.rtmp import rtmpdump_version
d28b5171 68from .postprocessor import FFmpegMergerPP, FFmpegPostProcessor
dca08720 69from .version import __version__
8222d8de
JMF
70
71
72class YoutubeDL(object):
73 """YoutubeDL class.
74
75 YoutubeDL objects are the ones responsible of downloading the
76 actual video file and writing it to disk if the user has requested
77 it, among some other tasks. In most cases there should be one per
78 program. As, given a video URL, the downloader doesn't know how to
79 extract all the needed information, task that InfoExtractors do, it
80 has to pass the URL to one of them.
81
82 For this, YoutubeDL objects have a method that allows
83 InfoExtractors to be registered in a given order. When it is passed
84 a URL, the YoutubeDL object handles it to the first InfoExtractor it
85 finds that reports being able to handle it. The InfoExtractor extracts
86 all the information about the video or videos the URL refers to, and
87 YoutubeDL process the extracted information, possibly using a File
88 Downloader to download the video.
89
90 YoutubeDL objects accept a lot of parameters. In order not to saturate
91 the object constructor with arguments, it receives a dictionary of
92 options instead. These options are available through the params
93 attribute for the InfoExtractors to use. The YoutubeDL also
94 registers itself as the downloader in charge for the InfoExtractors
95 that are added to it, so this is a "mutual registration".
96
97 Available options:
98
99 username: Username for authentication purposes.
100 password: Password for authentication purposes.
c6c19746 101 videopassword: Password for acces a video.
8222d8de
JMF
102 usenetrc: Use netrc for authentication instead.
103 verbose: Print additional info to stdout.
104 quiet: Do not print messages to stdout.
ad8915b7 105 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
106 forceurl: Force printing final URL.
107 forcetitle: Force printing title.
108 forceid: Force printing ID.
109 forcethumbnail: Force printing thumbnail URL.
110 forcedescription: Force printing description.
111 forcefilename: Force printing final filename.
525ef922 112 forceduration: Force printing duration.
8694c600 113 forcejson: Force printing info_dict as JSON.
63e0be34
PH
114 dump_single_json: Force printing the info_dict of the whole playlist
115 (or video) as a single JSON line.
8222d8de
JMF
116 simulate: Do not download the video files.
117 format: Video format code.
118 format_limit: Highest quality format to try.
119 outtmpl: Template for output names.
120 restrictfilenames: Do not allow "&" and spaces in file names
121 ignoreerrors: Do not stop on download errors.
122 nooverwrites: Prevent overwriting files.
123 playliststart: Playlist item to start at.
124 playlistend: Playlist item to end at.
125 matchtitle: Download only matching titles.
126 rejecttitle: Reject downloads for matching titles.
8bf9319e 127 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
128 logtostderr: Log messages to stderr instead of stdout.
129 writedescription: Write the video description to a .description file
130 writeinfojson: Write the video description to a .info.json file
1fb07d10 131 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
132 writethumbnail: Write the thumbnail image to a file
133 writesubtitles: Write the video subtitles to a file
b004821f 134 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 135 allsubtitles: Downloads all the subtitles of the video
0b7f3118 136 (requires writesubtitles or writeautomaticsub)
8222d8de 137 listsubtitles: Lists all available subtitles for the video
b98a6b2f 138 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 139 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
140 keepvideo: Keep the video file after post-processing
141 daterange: A DateRange object, download only if the upload_date is in the range.
142 skip_download: Skip the actual download of the video file
c35f9e72 143 cachedir: Location of the cache files in the filesystem.
a0e07d31 144 False to disable filesystem cache.
47192f92 145 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
146 age_limit: An integer representing the user's age in years.
147 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
148 min_views: An integer representing the minimum view count the video
149 must have in order to not be skipped.
150 Videos without view count information are always
151 downloaded. None for no limit.
152 max_views: An integer representing the maximum view count.
153 Videos that are more popular than that are not
154 downloaded.
155 Videos without view count information are always
156 downloaded. None for no limit.
157 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
158 Videos already present in the file are not downloaded
159 again.
dca08720 160 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 161 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
162 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
163 At the moment, this is only supported by YouTube.
a1ee09e8 164 proxy: URL of the proxy server to use
e344693b 165 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
166 bidi_workaround: Work around buggy terminals without bidirectional text
167 support, using fridibi
a0ddb8a2 168 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 169 include_ads: Download ads as well
04b4d394
PH
170 default_search: Prepend this string if an input url is not valid.
171 'auto' for elaborate guessing
62fec3b2 172 encoding: Use this encoding instead of the system-specified.
e8ee972c 173 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
174 Pass in 'in_playlist' to only show this behavior for
175 playlist items.
fe7e0c98 176
8222d8de
JMF
177 The following parameters are not used by YoutubeDL itself, they are used by
178 the FileDownloader:
179 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
180 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
181
182 The following options are used by the post processors:
183 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
184 otherwise prefer avconv.
8d31fa3c 185 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
186 """
187
188 params = None
189 _ies = []
190 _pps = []
191 _download_retcode = None
192 _num_downloads = None
193 _screen_file = None
194
3511266b 195 def __init__(self, params=None, auto_init=True):
8222d8de 196 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
197 if params is None:
198 params = {}
8222d8de 199 self._ies = []
56c73665 200 self._ies_instances = {}
8222d8de 201 self._pps = []
933605d7 202 self._progress_hooks = []
8222d8de
JMF
203 self._download_retcode = 0
204 self._num_downloads = 0
205 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 206 self._err_file = sys.stderr
e9f9a10f 207 self.params = params
a0e07d31 208 self.cache = Cache(self)
34308b30 209
0783b09b 210 if params.get('bidi_workaround', False):
1c088fa8
PH
211 try:
212 import pty
213 master, slave = pty.openpty()
214 width = get_term_width()
215 if width is None:
216 width_args = []
217 else:
218 width_args = ['-w', str(width)]
5d681e96 219 sp_kwargs = dict(
1c088fa8
PH
220 stdin=subprocess.PIPE,
221 stdout=slave,
222 stderr=self._err_file)
5d681e96
PH
223 try:
224 self._output_process = subprocess.Popen(
225 ['bidiv'] + width_args, **sp_kwargs
226 )
227 except OSError:
5d681e96
PH
228 self._output_process = subprocess.Popen(
229 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
230 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
231 except OSError as ose:
232 if ose.errno == 2:
6febd1c1 233 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
234 else:
235 raise
0783b09b 236
34308b30
PH
237 if (sys.version_info >= (3,) and sys.platform != 'win32' and
238 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 239 and not params.get('restrictfilenames', False)):
34308b30
PH
240 # On Python 3, the Unicode filesystem API will throw errors (#1474)
241 self.report_warning(
6febd1c1 242 'Assuming --restrict-filenames since file system encoding '
1b725173 243 'cannot encode all characters. '
6febd1c1 244 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 245 self.params['restrictfilenames'] = True
34308b30 246
a3927cf7 247 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 248 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 249
dca08720
PH
250 self._setup_opener()
251
3511266b
PH
252 if auto_init:
253 self.print_debug_header()
254 self.add_default_info_extractors()
255
8222d8de
JMF
256 def add_info_extractor(self, ie):
257 """Add an InfoExtractor object to the end of the list."""
258 self._ies.append(ie)
56c73665 259 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
260 ie.set_downloader(self)
261
56c73665
JMF
262 def get_info_extractor(self, ie_key):
263 """
264 Get an instance of an IE with name ie_key, it will try to get one from
265 the _ies list, if there's no instance it will create a new one and add
266 it to the extractor list.
267 """
268 ie = self._ies_instances.get(ie_key)
269 if ie is None:
270 ie = get_info_extractor(ie_key)()
271 self.add_info_extractor(ie)
272 return ie
273
023fa8c4
JMF
274 def add_default_info_extractors(self):
275 """
276 Add the InfoExtractors returned by gen_extractors to the end of the list
277 """
278 for ie in gen_extractors():
279 self.add_info_extractor(ie)
280
8222d8de
JMF
281 def add_post_processor(self, pp):
282 """Add a PostProcessor object to the end of the chain."""
283 self._pps.append(pp)
284 pp.set_downloader(self)
285
933605d7
JMF
286 def add_progress_hook(self, ph):
287 """Add the progress hook (currently only for the file downloader)"""
288 self._progress_hooks.append(ph)
8ab470f1 289
1c088fa8 290 def _bidi_workaround(self, message):
5d681e96 291 if not hasattr(self, '_output_channel'):
1c088fa8
PH
292 return message
293
5d681e96 294 assert hasattr(self, '_output_process')
11b85ce6 295 assert isinstance(message, compat_str)
6febd1c1
PH
296 line_count = message.count('\n') + 1
297 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 298 self._output_process.stdin.flush()
6febd1c1 299 res = ''.join(self._output_channel.readline().decode('utf-8')
1c088fa8 300 for _ in range(line_count))
6febd1c1 301 return res[:-len('\n')]
1c088fa8 302
8222d8de 303 def to_screen(self, message, skip_eol=False):
0783b09b
PH
304 """Print message to stdout if not in quiet mode."""
305 return self.to_stdout(message, skip_eol, check_quiet=True)
306
734f90bb 307 def _write_string(self, s, out=None):
b58ddb32 308 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 309
0783b09b 310 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 311 """Print message to stdout if not in quiet mode."""
8bf9319e 312 if self.params.get('logger'):
43afe285 313 self.params['logger'].debug(message)
0783b09b 314 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 315 message = self._bidi_workaround(message)
6febd1c1 316 terminator = ['\n', ''][skip_eol]
8222d8de 317 output = message + terminator
1c088fa8 318
734f90bb 319 self._write_string(output, self._screen_file)
8222d8de
JMF
320
321 def to_stderr(self, message):
322 """Print message to stderr."""
11b85ce6 323 assert isinstance(message, compat_str)
8bf9319e 324 if self.params.get('logger'):
43afe285
IB
325 self.params['logger'].error(message)
326 else:
1c088fa8 327 message = self._bidi_workaround(message)
6febd1c1 328 output = message + '\n'
734f90bb 329 self._write_string(output, self._err_file)
8222d8de 330
1e5b9a95
PH
331 def to_console_title(self, message):
332 if not self.params.get('consoletitle', False):
333 return
334 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
335 # c_wchar_p() might not be necessary if `message` is
336 # already of type unicode()
337 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
338 elif 'TERM' in os.environ:
734f90bb 339 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 340
bdde425c
PH
341 def save_console_title(self):
342 if not self.params.get('consoletitle', False):
343 return
344 if 'TERM' in os.environ:
efd6c574 345 # Save the title on stack
734f90bb 346 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
347
348 def restore_console_title(self):
349 if not self.params.get('consoletitle', False):
350 return
351 if 'TERM' in os.environ:
efd6c574 352 # Restore the title from stack
734f90bb 353 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
354
355 def __enter__(self):
356 self.save_console_title()
357 return self
358
359 def __exit__(self, *args):
360 self.restore_console_title()
f89197d7 361
dca08720
PH
362 if self.params.get('cookiefile') is not None:
363 self.cookiejar.save()
bdde425c 364
8222d8de
JMF
365 def trouble(self, message=None, tb=None):
366 """Determine action to take when a download problem appears.
367
368 Depending on if the downloader has been configured to ignore
369 download errors or not, this method may throw an exception or
370 not when errors are found, after printing the message.
371
372 tb, if given, is additional traceback information.
373 """
374 if message is not None:
375 self.to_stderr(message)
376 if self.params.get('verbose'):
377 if tb is None:
378 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 379 tb = ''
8222d8de 380 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 381 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
382 tb += compat_str(traceback.format_exc())
383 else:
384 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 385 tb = ''.join(tb_data)
8222d8de
JMF
386 self.to_stderr(tb)
387 if not self.params.get('ignoreerrors', False):
388 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
389 exc_info = sys.exc_info()[1].exc_info
390 else:
391 exc_info = sys.exc_info()
392 raise DownloadError(message, exc_info)
393 self._download_retcode = 1
394
395 def report_warning(self, message):
396 '''
397 Print the message to stderr, it will be prefixed with 'WARNING:'
398 If stderr is a tty file the 'WARNING:' will be colored
399 '''
6d07ce01
JMF
400 if self.params.get('logger') is not None:
401 self.params['logger'].warning(message)
8222d8de 402 else:
ad8915b7
PH
403 if self.params.get('no_warnings'):
404 return
6d07ce01
JMF
405 if self._err_file.isatty() and os.name != 'nt':
406 _msg_header = '\033[0;33mWARNING:\033[0m'
407 else:
408 _msg_header = 'WARNING:'
409 warning_message = '%s %s' % (_msg_header, message)
410 self.to_stderr(warning_message)
8222d8de
JMF
411
412 def report_error(self, message, tb=None):
413 '''
414 Do the same as trouble, but prefixes the message with 'ERROR:', colored
415 in red if stderr is a tty file.
416 '''
0783b09b 417 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 418 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 419 else:
6febd1c1
PH
420 _msg_header = 'ERROR:'
421 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
422 self.trouble(error_message, tb)
423
8222d8de
JMF
424 def report_file_already_downloaded(self, file_name):
425 """Report file has already been fully downloaded."""
426 try:
6febd1c1 427 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 428 except UnicodeEncodeError:
6febd1c1 429 self.to_screen('[download] The file has already been downloaded')
8222d8de 430
8222d8de
JMF
431 def prepare_filename(self, info_dict):
432 """Generate the output filename."""
433 try:
434 template_dict = dict(info_dict)
435
436 template_dict['epoch'] = int(time.time())
437 autonumber_size = self.params.get('autonumber_size')
438 if autonumber_size is None:
439 autonumber_size = 5
6febd1c1 440 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 441 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 442 if template_dict.get('playlist_index') is not None:
c6b4132a 443 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
444 if template_dict.get('resolution') is None:
445 if template_dict.get('width') and template_dict.get('height'):
446 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
447 elif template_dict.get('height'):
805ef3c6 448 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 449 elif template_dict.get('width'):
805ef3c6 450 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 451
586a91b6 452 sanitize = lambda k, v: sanitize_filename(
45598aab 453 compat_str(v),
8222d8de 454 restricted=self.params.get('restrictfilenames'),
6febd1c1 455 is_id=(k == 'id'))
586a91b6 456 template_dict = dict((k, sanitize(k, v))
45598aab
PH
457 for k, v in template_dict.items()
458 if v is not None)
6febd1c1 459 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 460
acd69589 461 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 462 tmpl = compat_expanduser(outtmpl)
586a91b6 463 filename = tmpl % template_dict
8222d8de 464 return filename
8222d8de 465 except ValueError as err:
6febd1c1 466 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
467 return None
468
469 def _match_entry(self, info_dict):
470 """ Returns None iff the file should be downloaded """
471
6febd1c1 472 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
473 if 'title' in info_dict:
474 # This can happen when we're just evaluating the playlist
475 title = info_dict['title']
476 matchtitle = self.params.get('matchtitle', False)
477 if matchtitle:
478 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 479 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
480 rejecttitle = self.params.get('rejecttitle', False)
481 if rejecttitle:
482 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 483 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
484 date = info_dict.get('upload_date', None)
485 if date is not None:
486 dateRange = self.params.get('daterange', DateRange())
487 if date not in dateRange:
6febd1c1 488 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
489 view_count = info_dict.get('view_count', None)
490 if view_count is not None:
491 min_views = self.params.get('min_views')
492 if min_views is not None and view_count < min_views:
6febd1c1 493 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
494 max_views = self.params.get('max_views')
495 if max_views is not None and view_count > max_views:
6febd1c1 496 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
8dbe9899
PH
497 age_limit = self.params.get('age_limit')
498 if age_limit is not None:
be843678
PH
499 actual_age_limit = info_dict.get('age_limit')
500 if actual_age_limit is None:
501 actual_age_limit = 0
502 if age_limit < actual_age_limit:
6febd1c1 503 return 'Skipping "' + title + '" because it is age restricted'
c1c9a79c 504 if self.in_download_archive(info_dict):
6febd1c1 505 return '%s has already been recorded in archive' % video_title
8222d8de 506 return None
fe7e0c98 507
b6c45014
JMF
508 @staticmethod
509 def add_extra_info(info_dict, extra_info):
510 '''Set the keys from extra_info in info dict if they are missing'''
511 for key, value in extra_info.items():
512 info_dict.setdefault(key, value)
513
7fc3fa05
PH
514 def extract_info(self, url, download=True, ie_key=None, extra_info={},
515 process=True):
8222d8de
JMF
516 '''
517 Returns a list with a dictionary for each video we find.
518 If 'download', also downloads the videos.
519 extra_info is a dict containing the extra values to add to each result
520 '''
fe7e0c98 521
8222d8de 522 if ie_key:
56c73665 523 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
524 else:
525 ies = self._ies
526
527 for ie in ies:
528 if not ie.suitable(url):
529 continue
530
531 if not ie.working():
6febd1c1
PH
532 self.report_warning('The program functionality for this site has been marked as broken, '
533 'and will probably not work.')
8222d8de
JMF
534
535 try:
536 ie_result = ie.extract(url)
537 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
538 break
539 if isinstance(ie_result, list):
540 # Backwards compatibility: old IE result format
8222d8de
JMF
541 ie_result = {
542 '_type': 'compat_list',
543 'entries': ie_result,
544 }
ea38e55f 545 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
546 if process:
547 return self.process_ie_result(ie_result, download, extra_info)
548 else:
549 return ie_result
8222d8de
JMF
550 except ExtractorError as de: # An error we somewhat expected
551 self.report_error(compat_str(de), de.format_traceback())
552 break
d3e5bbf4
PH
553 except MaxDownloadsReached:
554 raise
8222d8de
JMF
555 except Exception as e:
556 if self.params.get('ignoreerrors', False):
557 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
558 break
559 else:
560 raise
561 else:
1a489545 562 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 563
ea38e55f
PH
564 def add_default_extra_info(self, ie_result, ie, url):
565 self.add_extra_info(ie_result, {
566 'extractor': ie.IE_NAME,
567 'webpage_url': url,
568 'webpage_url_basename': url_basename(url),
569 'extractor_key': ie.ie_key(),
570 })
571
8222d8de
JMF
572 def process_ie_result(self, ie_result, download=True, extra_info={}):
573 """
574 Take the result of the ie(may be modified) and resolve all unresolved
575 references (URLs, playlist items).
576
577 It will also download the videos if 'download'.
578 Returns the resolved ie_result.
579 """
580
e8ee972c
PH
581 result_type = ie_result.get('_type', 'video')
582
057a5206
PH
583 if result_type in ('url', 'url_transparent'):
584 extract_flat = self.params.get('extract_flat', False)
585 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
586 extract_flat is True):
057a5206
PH
587 if self.params.get('forcejson', False):
588 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
589 return ie_result
590
8222d8de 591 if result_type == 'video':
b6c45014 592 self.add_extra_info(ie_result, extra_info)
feee2ecf 593 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
594 elif result_type == 'url':
595 # We have to add extra_info to the results because it may be
596 # contained in a playlist
597 return self.extract_info(ie_result['url'],
598 download,
599 ie_key=ie_result.get('ie_key'),
600 extra_info=extra_info)
7fc3fa05
PH
601 elif result_type == 'url_transparent':
602 # Use the information from the embedding page
603 info = self.extract_info(
604 ie_result['url'], ie_key=ie_result.get('ie_key'),
605 extra_info=extra_info, download=False, process=False)
606
607 def make_result(embedded_info):
608 new_result = ie_result.copy()
609 for f in ('_type', 'url', 'ext', 'player_url', 'formats',
1538eff6 610 'entries', 'ie_key', 'duration',
ef4fd848
PH
611 'subtitles', 'annotations', 'format',
612 'thumbnail', 'thumbnails'):
7fc3fa05
PH
613 if f in new_result:
614 del new_result[f]
615 if f in embedded_info:
616 new_result[f] = embedded_info[f]
617 return new_result
618 new_result = make_result(info)
619
620 assert new_result.get('_type') != 'url_transparent'
621 if new_result.get('_type') == 'compat_list':
622 new_result['entries'] = [
623 make_result(e) for e in new_result['entries']]
624
625 return self.process_ie_result(
626 new_result, download=download, extra_info=extra_info)
8222d8de
JMF
627 elif result_type == 'playlist':
628 # We process each entry in the playlist
629 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 630 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
631
632 playlist_results = []
633
8222d8de 634 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
635 playlistend = self.params.get('playlistend', None)
636 # For backwards compatibility, interpret -1 as whole list
8222d8de 637 if playlistend == -1:
a19fd00c 638 playlistend = None
8222d8de 639
b7ab0590
PH
640 if isinstance(ie_result['entries'], list):
641 n_all_entries = len(ie_result['entries'])
642 entries = ie_result['entries'][playliststart:playlistend]
643 n_entries = len(entries)
644 self.to_screen(
645 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
646 (ie_result['extractor'], playlist, n_all_entries, n_entries))
647 else:
648 assert isinstance(ie_result['entries'], PagedList)
649 entries = ie_result['entries'].getslice(
650 playliststart, playlistend)
651 n_entries = len(entries)
652 self.to_screen(
653 "[%s] playlist %s: Downloading %d videos" %
654 (ie_result['extractor'], playlist, n_entries))
8222d8de 655
fe7e0c98 656 for i, entry in enumerate(entries, 1):
6febd1c1 657 self.to_screen('[download] Downloading video #%s of %s' % (i, n_entries))
8222d8de 658 extra = {
c6b4132a 659 'n_entries': n_entries,
fe7e0c98 660 'playlist': playlist,
a1cf99d0
PH
661 'playlist_id': ie_result.get('id'),
662 'playlist_title': ie_result.get('title'),
fe7e0c98 663 'playlist_index': i + playliststart,
b6c45014 664 'extractor': ie_result['extractor'],
9103bbc5 665 'webpage_url': ie_result['webpage_url'],
29eb5174 666 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 667 'extractor_key': ie_result['extractor_key'],
fe7e0c98 668 }
7012b23c
PH
669
670 reason = self._match_entry(entry)
671 if reason is not None:
6febd1c1 672 self.to_screen('[download] ' + reason)
7012b23c
PH
673 continue
674
8222d8de
JMF
675 entry_result = self.process_ie_result(entry,
676 download=download,
677 extra_info=extra)
678 playlist_results.append(entry_result)
679 ie_result['entries'] = playlist_results
680 return ie_result
681 elif result_type == 'compat_list':
682 def _fixup(r):
b6c45014 683 self.add_extra_info(r,
9103bbc5
JMF
684 {
685 'extractor': ie_result['extractor'],
686 'webpage_url': ie_result['webpage_url'],
29eb5174 687 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 688 'extractor_key': ie_result['extractor_key'],
9103bbc5 689 })
8222d8de
JMF
690 return r
691 ie_result['entries'] = [
b6c45014 692 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
693 for r in ie_result['entries']
694 ]
695 return ie_result
696 else:
697 raise Exception('Invalid result type: %s' % result_type)
698
a9c58ad9
JMF
699 def select_format(self, format_spec, available_formats):
700 if format_spec == 'best' or format_spec is None:
701 return available_formats[-1]
702 elif format_spec == 'worst':
703 return available_formats[0]
ba7678f9
PH
704 elif format_spec == 'bestaudio':
705 audio_formats = [
706 f for f in available_formats
707 if f.get('vcodec') == 'none']
708 if audio_formats:
709 return audio_formats[-1]
710 elif format_spec == 'worstaudio':
711 audio_formats = [
712 f for f in available_formats
713 if f.get('vcodec') == 'none']
714 if audio_formats:
715 return audio_formats[0]
bc6d5978
JMF
716 elif format_spec == 'bestvideo':
717 video_formats = [
718 f for f in available_formats
719 if f.get('acodec') == 'none']
720 if video_formats:
721 return video_formats[-1]
722 elif format_spec == 'worstvideo':
723 video_formats = [
724 f for f in available_formats
725 if f.get('acodec') == 'none']
726 if video_formats:
727 return video_formats[0]
a9c58ad9 728 else:
e2e5dae6 729 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a']
49e86983
JMF
730 if format_spec in extensions:
731 filter_f = lambda f: f['ext'] == format_spec
732 else:
733 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 734 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
735 if matches:
736 return matches[-1]
737 return None
738
dd82ffea
JMF
739 def process_video_result(self, info_dict, download=True):
740 assert info_dict.get('_type', 'video') == 'video'
741
bec1fad2
PH
742 if 'id' not in info_dict:
743 raise ExtractorError('Missing "id" field in extractor result')
744 if 'title' not in info_dict:
745 raise ExtractorError('Missing "title" field in extractor result')
746
dd82ffea
JMF
747 if 'playlist' not in info_dict:
748 # It isn't part of a playlist
749 info_dict['playlist'] = None
750 info_dict['playlist_index'] = None
751
d5519808
PH
752 thumbnails = info_dict.get('thumbnails')
753 if thumbnails:
be6d7229
PH
754 thumbnails.sort(key=lambda t: (
755 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
756 for t in thumbnails:
757 if 'width' in t and 'height' in t:
758 t['resolution'] = '%dx%d' % (t['width'], t['height'])
759
760 if thumbnails and 'thumbnail' not in info_dict:
761 info_dict['thumbnail'] = thumbnails[-1]['url']
762
c9ae7b95 763 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
764 info_dict['display_id'] = info_dict['id']
765
955c4514 766 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
9d2ecdbc 767 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 768 info_dict['timestamp'])
9d2ecdbc
PH
769 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
770
6ff000b8 771 # This extractors handle format selection themselves
6febd1c1 772 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
773 if download:
774 self.process_info(info_dict)
6ff000b8
JMF
775 return info_dict
776
dd82ffea
JMF
777 # We now pick which formats have to be downloaded
778 if info_dict.get('formats') is None:
779 # There's only one format available
780 formats = [info_dict]
781 else:
782 formats = info_dict['formats']
783
db95dc13
PH
784 if not formats:
785 raise ExtractorError('No video formats found!')
786
dd82ffea 787 # We check that all the formats have the format and format_id fields
db95dc13 788 for i, format in enumerate(formats):
bec1fad2
PH
789 if 'url' not in format:
790 raise ExtractorError('Missing "url" key in result (index %d)' % i)
791
dd82ffea 792 if format.get('format_id') is None:
8016c922 793 format['format_id'] = compat_str(i)
8c51aa65 794 if format.get('format') is None:
6febd1c1 795 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
796 id=format['format_id'],
797 res=self.format_resolution(format),
6febd1c1 798 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 799 )
c1002e96
PH
800 # Automatically determine file extension if missing
801 if 'ext' not in format:
cce929ea 802 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 803
99e206d5
JMF
804 format_limit = self.params.get('format_limit', None)
805 if format_limit:
f4d96df0
PH
806 formats = list(takewhile_inclusive(
807 lambda f: f['format_id'] != format_limit, formats
808 ))
4bcc7bd1
PH
809
810 # TODO Central sorting goes here
99e206d5 811
f89197d7 812 if formats[0] is not info_dict:
b3d9ef88
JMF
813 # only set the 'formats' fields if the original info_dict list them
814 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 815 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
816 # wich can't be exported to json
817 info_dict['formats'] = formats
bfaae0a7 818 if self.params.get('listformats', None):
819 self.list_formats(info_dict)
820 return
821
de3ef3ed 822 req_format = self.params.get('format')
a9c58ad9
JMF
823 if req_format is None:
824 req_format = 'best'
dd82ffea 825 formats_to_download = []
dd82ffea 826 # The -1 is for supporting YoutubeIE
a9c58ad9 827 if req_format in ('-1', 'all'):
dd82ffea
JMF
828 formats_to_download = formats
829 else:
1de33faf
PH
830 for rfstr in req_format.split(','):
831 # We can accept formats requested in the format: 34/5/best, we pick
832 # the first that is available, starting from left
833 req_formats = rfstr.split('/')
834 for rf in req_formats:
835 if re.match(r'.+?\+.+?', rf) is not None:
836 # Two formats have been requested like '137+139'
837 format_1, format_2 = rf.split('+')
838 formats_info = (self.select_format(format_1, formats),
839 self.select_format(format_2, formats))
840 if all(formats_info):
c2954908
JMF
841 # The first format must contain the video and the
842 # second the audio
843 if formats_info[0].get('vcodec') == 'none':
844 self.report_error('The first format must '
845 'contain the video, try using '
846 '"-f %s+%s"' % (format_2, format_1))
847 return
1de33faf
PH
848 selected_format = {
849 'requested_formats': formats_info,
850 'format': rf,
851 'ext': formats_info[0]['ext'],
852 }
853 else:
854 selected_format = None
6350728b 855 else:
1de33faf
PH
856 selected_format = self.select_format(rf, formats)
857 if selected_format is not None:
858 formats_to_download.append(selected_format)
859 break
dd82ffea 860 if not formats_to_download:
6febd1c1 861 raise ExtractorError('requested format not available',
78a3a9f8 862 expected=True)
dd82ffea
JMF
863
864 if download:
865 if len(formats_to_download) > 1:
6febd1c1 866 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
867 for format in formats_to_download:
868 new_info = dict(info_dict)
869 new_info.update(format)
870 self.process_info(new_info)
871 # We update the info dict with the best quality format (backwards compatibility)
872 info_dict.update(formats_to_download[-1])
873 return info_dict
874
8222d8de
JMF
875 def process_info(self, info_dict):
876 """Process a single resolved IE result."""
877
878 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
879
880 max_downloads = self.params.get('max_downloads')
881 if max_downloads is not None:
882 if self._num_downloads >= int(max_downloads):
883 raise MaxDownloadsReached()
8222d8de
JMF
884
885 info_dict['fulltitle'] = info_dict['title']
886 if len(info_dict['title']) > 200:
6febd1c1 887 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
888
889 # Keep for backwards compatibility
890 info_dict['stitle'] = info_dict['title']
891
11b85ce6 892 if 'format' not in info_dict:
8222d8de
JMF
893 info_dict['format'] = info_dict['ext']
894
895 reason = self._match_entry(info_dict)
896 if reason is not None:
6febd1c1 897 self.to_screen('[download] ' + reason)
8222d8de
JMF
898 return
899
fd288278 900 self._num_downloads += 1
8222d8de
JMF
901
902 filename = self.prepare_filename(info_dict)
903
904 # Forced printings
905 if self.params.get('forcetitle', False):
0783b09b 906 self.to_stdout(info_dict['fulltitle'])
8222d8de 907 if self.params.get('forceid', False):
0783b09b 908 self.to_stdout(info_dict['id'])
8222d8de 909 if self.params.get('forceurl', False):
edde6c56 910 # For RTMP URLs, also include the playpath
6febd1c1 911 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 912 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 913 self.to_stdout(info_dict['thumbnail'])
216d71d0 914 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 915 self.to_stdout(info_dict['description'])
8222d8de 916 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 917 self.to_stdout(filename)
525ef922
PH
918 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
919 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 920 if self.params.get('forceformat', False):
0783b09b 921 self.to_stdout(info_dict['format'])
9d153818 922 if self.params.get('forcejson', False):
a0d96c98 923 info_dict['_filename'] = filename
0783b09b 924 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
925 if self.params.get('dump_single_json', False):
926 info_dict['_filename'] = filename
8222d8de
JMF
927
928 # Do nothing else if in simulate mode
929 if self.params.get('simulate', False):
930 return
931
932 if filename is None:
933 return
934
935 try:
936 dn = os.path.dirname(encodeFilename(filename))
d26e981d 937 if dn and not os.path.exists(dn):
8222d8de
JMF
938 os.makedirs(dn)
939 except (OSError, IOError) as err:
6febd1c1 940 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
941 return
942
943 if self.params.get('writedescription', False):
6febd1c1 944 descfn = filename + '.description'
7b6fefc9 945 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 946 self.to_screen('[info] Video description is already present')
7b6fefc9
PH
947 else:
948 try:
6febd1c1 949 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
950 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
951 descfile.write(info_dict['description'])
952 except (KeyError, TypeError):
6febd1c1 953 self.report_warning('There\'s no description to write.')
7b6fefc9 954 except (OSError, IOError):
6febd1c1 955 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 956 return
8222d8de 957
1fb07d10 958 if self.params.get('writeannotations', False):
6febd1c1 959 annofn = filename + '.annotations.xml'
7b6fefc9 960 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 961 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
962 else:
963 try:
6febd1c1 964 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
965 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
966 annofile.write(info_dict['annotations'])
967 except (KeyError, TypeError):
6febd1c1 968 self.report_warning('There are no annotations to write.')
7b6fefc9 969 except (OSError, IOError):
6febd1c1 970 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 971 return
1fb07d10 972
c4a91be7 973 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 974 self.params.get('writeautomaticsub')])
c4a91be7 975
fe7e0c98 976 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
977 # subtitles download errors are already managed as troubles in relevant IE
978 # that way it will silently go on when used with unsupporting IE
8222d8de 979 subtitles = info_dict['subtitles']
ca715127 980 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
981 for sub_lang in subtitles.keys():
982 sub = subtitles[sub_lang]
6804038d
JMF
983 if sub is None:
984 continue
8222d8de 985 try:
d4051a8e 986 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 987 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 988 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 989 else:
6febd1c1 990 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9
PH
991 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
992 subfile.write(sub)
8222d8de 993 except (OSError, IOError):
e4db1951 994 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
995 return
996
8222d8de 997 if self.params.get('writeinfojson', False):
6febd1c1 998 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 999 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1000 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1001 else:
6febd1c1 1002 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1003 try:
1538eff6 1004 write_json_file(info_dict, encodeFilename(infofn))
7b6fefc9 1005 except (OSError, IOError):
6febd1c1 1006 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1007 return
8222d8de
JMF
1008
1009 if self.params.get('writethumbnail', False):
d8269e1d 1010 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1011 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1012 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1013 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1014 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1015 (info_dict['extractor'], info_dict['id']))
1016 else:
6febd1c1 1017 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1018 (info_dict['extractor'], info_dict['id']))
1019 try:
e9c092f1 1020 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1021 with open(thumb_filename, 'wb') as thumbf:
1022 shutil.copyfileobj(uf, thumbf)
6febd1c1 1023 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
7b6fefc9
PH
1024 (info_dict['extractor'], info_dict['id'], thumb_filename))
1025 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1026 self.report_warning('Unable to download thumbnail "%s": %s' %
7b6fefc9 1027 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1028
1029 if not self.params.get('skip_download', False):
1030 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1031 success = True
1032 else:
1033 try:
6350728b
JMF
1034 def dl(name, info):
1035 fd = get_suitable_downloader(info)(self, self.params)
1036 for ph in self._progress_hooks:
1037 fd.add_progress_hook(ph)
8d5797b0
PH
1038 if self.params.get('verbose'):
1039 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1040 return fd.download(name, info)
1041 if info_dict.get('requested_formats') is not None:
1042 downloaded = []
1043 success = True
b7f81164 1044 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1045 if not merger._executable:
58c3c7ae
JMF
1046 postprocessors = []
1047 self.report_warning('You have requested multiple '
1048 'formats but ffmpeg or avconv are not installed.'
1049 ' The formats won\'t be merged')
1050 else:
1051 postprocessors = [merger]
6350728b
JMF
1052 for f in info_dict['requested_formats']:
1053 new_info = dict(info_dict)
1054 new_info.update(f)
1055 fname = self.prepare_filename(new_info)
1056 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1057 downloaded.append(fname)
1058 partial_success = dl(fname, new_info)
1059 success = success and partial_success
58c3c7ae 1060 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1061 info_dict['__files_to_merge'] = downloaded
1062 else:
1063 # Just a single file
1064 success = dl(filename, info_dict)
8222d8de 1065 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1066 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1067 return
c40c6aaa
JMF
1068 except (OSError, IOError) as err:
1069 raise UnavailableVideoError(err)
8222d8de 1070 except (ContentTooShortError, ) as err:
6febd1c1 1071 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1072 return
1073
1074 if success:
1075 try:
1076 self.post_process(filename, info_dict)
1077 except (PostProcessingError) as err:
6febd1c1 1078 self.report_error('postprocessing: %s' % str(err))
8222d8de
JMF
1079 return
1080
c1c9a79c
PH
1081 self.record_download_archive(info_dict)
1082
8222d8de
JMF
1083 def download(self, url_list):
1084 """Download a given list of URLs."""
acd69589 1085 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1086 if (len(url_list) > 1 and
acd69589 1087 '%' not in outtmpl
0c75c3fa 1088 and self.params.get('max_downloads') != 1):
acd69589 1089 raise SameFileError(outtmpl)
8222d8de
JMF
1090
1091 for url in url_list:
1092 try:
1093 #It also downloads the videos
63e0be34 1094 res = self.extract_info(url)
8222d8de 1095 except UnavailableVideoError:
6febd1c1 1096 self.report_error('unable to download video')
8222d8de 1097 except MaxDownloadsReached:
6febd1c1 1098 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1099 raise
63e0be34
PH
1100 else:
1101 if self.params.get('dump_single_json', False):
1102 self.to_stdout(json.dumps(res))
8222d8de
JMF
1103
1104 return self._download_retcode
1105
1dcc4c0c 1106 def download_with_info_file(self, info_filename):
395293a8 1107 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1108 info = json.load(f)
d4943898
JMF
1109 try:
1110 self.process_ie_result(info, download=True)
1111 except DownloadError:
1112 webpage_url = info.get('webpage_url')
1113 if webpage_url is not None:
6febd1c1 1114 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1115 return self.download([webpage_url])
1116 else:
1117 raise
1118 return self._download_retcode
1dcc4c0c 1119
8222d8de
JMF
1120 def post_process(self, filename, ie_info):
1121 """Run all the postprocessors on the given file."""
1122 info = dict(ie_info)
1123 info['filepath'] = filename
1124 keep_video = None
6350728b
JMF
1125 pps_chain = []
1126 if ie_info.get('__postprocessors') is not None:
1127 pps_chain.extend(ie_info['__postprocessors'])
1128 pps_chain.extend(self._pps)
1129 for pp in pps_chain:
8222d8de 1130 try:
fe7e0c98 1131 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1132 if keep_video_wish is not None:
1133 if keep_video_wish:
1134 keep_video = keep_video_wish
1135 elif keep_video is None:
1136 # No clear decision yet, let IE decide
1137 keep_video = keep_video_wish
1138 except PostProcessingError as e:
bbcbf4d4 1139 self.report_error(e.msg)
8222d8de
JMF
1140 if keep_video is False and not self.params.get('keepvideo', False):
1141 try:
6febd1c1 1142 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1143 os.remove(encodeFilename(filename))
1144 except (IOError, OSError):
6febd1c1 1145 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1146
5db07df6
PH
1147 def _make_archive_id(self, info_dict):
1148 # Future-proof against any change in case
1149 # and backwards compatibility with prior versions
d31209a1 1150 extractor = info_dict.get('extractor_key')
7012b23c
PH
1151 if extractor is None:
1152 if 'id' in info_dict:
1153 extractor = info_dict.get('ie_key') # key in a playlist
1154 if extractor is None:
5db07df6 1155 return None # Incomplete video information
6febd1c1 1156 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1157
1158 def in_download_archive(self, info_dict):
1159 fn = self.params.get('download_archive')
1160 if fn is None:
1161 return False
1162
1163 vid_id = self._make_archive_id(info_dict)
1164 if vid_id is None:
7012b23c 1165 return False # Incomplete video information
5db07df6 1166
c1c9a79c
PH
1167 try:
1168 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1169 for line in archive_file:
1170 if line.strip() == vid_id:
1171 return True
1172 except IOError as ioe:
1173 if ioe.errno != errno.ENOENT:
1174 raise
1175 return False
1176
1177 def record_download_archive(self, info_dict):
1178 fn = self.params.get('download_archive')
1179 if fn is None:
1180 return
5db07df6
PH
1181 vid_id = self._make_archive_id(info_dict)
1182 assert vid_id
c1c9a79c 1183 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1184 archive_file.write(vid_id + '\n')
dd82ffea 1185
8c51aa65 1186 @staticmethod
8abeeb94 1187 def format_resolution(format, default='unknown'):
fb04e403
PH
1188 if format.get('vcodec') == 'none':
1189 return 'audio only'
f49d89ee
PH
1190 if format.get('resolution') is not None:
1191 return format['resolution']
8c51aa65
JMF
1192 if format.get('height') is not None:
1193 if format.get('width') is not None:
6febd1c1 1194 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1195 else:
6febd1c1 1196 res = '%sp' % format['height']
f49d89ee 1197 elif format.get('width') is not None:
6febd1c1 1198 res = '?x%d' % format['width']
8c51aa65 1199 else:
8abeeb94 1200 res = default
8c51aa65
JMF
1201 return res
1202
c57f7757
PH
1203 def _format_note(self, fdict):
1204 res = ''
1205 if fdict.get('ext') in ['f4f', 'f4m']:
1206 res += '(unsupported) '
1207 if fdict.get('format_note') is not None:
1208 res += fdict['format_note'] + ' '
1209 if fdict.get('tbr') is not None:
1210 res += '%4dk ' % fdict['tbr']
1211 if fdict.get('container') is not None:
1212 if res:
1213 res += ', '
1214 res += '%s container' % fdict['container']
1215 if (fdict.get('vcodec') is not None and
1216 fdict.get('vcodec') != 'none'):
1217 if res:
1218 res += ', '
1219 res += fdict['vcodec']
91c7271a 1220 if fdict.get('vbr') is not None:
c57f7757
PH
1221 res += '@'
1222 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1223 res += 'video@'
1224 if fdict.get('vbr') is not None:
1225 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1226 if fdict.get('fps') is not None:
1227 res += ', %sfps' % fdict['fps']
c57f7757
PH
1228 if fdict.get('acodec') is not None:
1229 if res:
1230 res += ', '
1231 if fdict['acodec'] == 'none':
1232 res += 'video only'
1233 else:
1234 res += '%-5s' % fdict['acodec']
1235 elif fdict.get('abr') is not None:
1236 if res:
1237 res += ', '
1238 res += 'audio'
1239 if fdict.get('abr') is not None:
1240 res += '@%3dk' % fdict['abr']
1241 if fdict.get('asr') is not None:
1242 res += ' (%5dHz)' % fdict['asr']
1243 if fdict.get('filesize') is not None:
1244 if res:
1245 res += ', '
1246 res += format_bytes(fdict['filesize'])
9732d77e
PH
1247 elif fdict.get('filesize_approx') is not None:
1248 if res:
1249 res += ', '
1250 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1251 return res
91c7271a 1252
c57f7757 1253 def list_formats(self, info_dict):
02dbf93f 1254 def line(format, idlen=20):
6febd1c1 1255 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1256 format['format_id'],
1257 format['ext'],
8c51aa65 1258 self.format_resolution(format),
c57f7757 1259 self._format_note(format),
02dbf93f 1260 ))
57dd9a8f 1261
94badb25 1262 formats = info_dict.get('formats', [info_dict])
6febd1c1 1263 idlen = max(len('format code'),
02dbf93f
PH
1264 max(len(f['format_id']) for f in formats))
1265 formats_s = [line(f, idlen) for f in formats]
94badb25 1266 if len(formats) > 1:
c57f7757
PH
1267 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1268 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1269
1270 header_line = line({
6febd1c1
PH
1271 'format_id': 'format code', 'ext': 'extension',
1272 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1273 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1274 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1275
1276 def urlopen(self, req):
1277 """ Start an HTTP download """
37419b4f 1278
d05cfe06
S
1279 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1280 # always respected by websites, some tend to give out URLs with non percent-encoded
1281 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1282 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1283 # To work around aforementioned issue we will replace request's original URL with
1284 # percent-encoded one
ee0d9070 1285 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1286 url = req if req_is_string else req.get_full_url()
d05cfe06 1287 url_escaped = escape_url(url)
37419b4f
S
1288
1289 # Substitute URL if any change after escaping
1290 if url != url_escaped:
68b09730 1291 if req_is_string:
37419b4f
S
1292 req = url_escaped
1293 else:
1294 req = compat_urllib_request.Request(
1295 url_escaped, data=req.data, headers=req.headers,
1296 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1297
19a41fc6 1298 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1299
1300 def print_debug_header(self):
1301 if not self.params.get('verbose'):
1302 return
62fec3b2 1303
4192b51c
PH
1304 if type('') is not compat_str:
1305 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1306 self.report_warning(
1307 'Your Python is broken! Update to a newer and supported version')
1308
b0472057 1309 encoding_str = (
734f90bb
PH
1310 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1311 locale.getpreferredencoding(),
1312 sys.getfilesystemencoding(),
1313 sys.stdout.encoding,
b0472057 1314 self.get_encoding()))
4192b51c 1315 write_string(encoding_str, encoding=None)
734f90bb
PH
1316
1317 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1318 try:
1319 sp = subprocess.Popen(
1320 ['git', 'rev-parse', '--short', 'HEAD'],
1321 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1322 cwd=os.path.dirname(os.path.abspath(__file__)))
1323 out, err = sp.communicate()
1324 out = out.decode().strip()
1325 if re.match('[0-9a-f]+', out):
734f90bb 1326 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1327 except:
1328 try:
1329 sys.exc_clear()
1330 except:
1331 pass
d28b5171
PH
1332 self._write_string('[debug] Python version %s - %s\n' % (
1333 platform.python_version(), platform_name()))
1334
1335 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1336 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1337 exe_str = ', '.join(
1338 '%s %s' % (exe, v)
1339 for exe, v in sorted(exe_versions.items())
1340 if v
1341 )
1342 if not exe_str:
1343 exe_str = 'none'
1344 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1345
1346 proxy_map = {}
1347 for handler in self._opener.handlers:
1348 if hasattr(handler, 'proxies'):
1349 proxy_map.update(handler.proxies)
734f90bb 1350 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1351
e344693b 1352 def _setup_opener(self):
6ad14cab 1353 timeout_val = self.params.get('socket_timeout')
19a41fc6 1354 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1355
dca08720
PH
1356 opts_cookiefile = self.params.get('cookiefile')
1357 opts_proxy = self.params.get('proxy')
1358
1359 if opts_cookiefile is None:
1360 self.cookiejar = compat_cookiejar.CookieJar()
1361 else:
1362 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1363 opts_cookiefile)
1364 if os.access(opts_cookiefile, os.R_OK):
1365 self.cookiejar.load()
1366
1367 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1368 self.cookiejar)
1369 if opts_proxy is not None:
1370 if opts_proxy == '':
1371 proxies = {}
1372 else:
1373 proxies = {'http': opts_proxy, 'https': opts_proxy}
1374 else:
1375 proxies = compat_urllib_request.getproxies()
1376 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1377 if 'http' in proxies and 'https' not in proxies:
1378 proxies['https'] = proxies['http']
1379 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1380
1381 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1382 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1383 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1384 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1385 opener = compat_urllib_request.build_opener(
a0ddb8a2 1386 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1387 # Delete the default user-agent header, which would otherwise apply in
1388 # cases where our custom HTTP handler doesn't come into play
1389 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1390 opener.addheaders = []
1391 self._opener = opener
62fec3b2
PH
1392
1393 def encode(self, s):
1394 if isinstance(s, bytes):
1395 return s # Already encoded
1396
1397 try:
1398 return s.encode(self.get_encoding())
1399 except UnicodeEncodeError as err:
1400 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1401 raise
1402
1403 def get_encoding(self):
1404 encoding = self.params.get('encoding')
1405 if encoding is None:
1406 encoding = preferredencoding()
1407 return encoding