]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
Merge remote-tracking branch 'Tithen-Firion/master'
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
b82f815f 10import itertools
8694c600 11import json
62fec3b2 12import locale
8222d8de 13import os
dca08720 14import platform
8222d8de
JMF
15import re
16import shutil
dca08720 17import subprocess
8222d8de
JMF
18import socket
19import sys
20import time
21import traceback
22
1e5b9a95
PH
23if os.name == 'nt':
24 import ctypes
25
8c25f81b 26from .compat import (
dca08720 27 compat_cookiejar,
4644ac55 28 compat_expanduser,
ce02ed60 29 compat_http_client,
4f026faf 30 compat_kwargs,
ce02ed60
PH
31 compat_str,
32 compat_urllib_error,
33 compat_urllib_request,
8c25f81b
PH
34)
35from .utils import (
d05cfe06 36 escape_url,
ce02ed60
PH
37 ContentTooShortError,
38 date_from_str,
39 DateRange,
acd69589 40 DEFAULT_OUTTMPL,
ce02ed60
PH
41 determine_ext,
42 DownloadError,
43 encodeFilename,
44 ExtractorError,
02dbf93f 45 format_bytes,
525ef922 46 formatSeconds,
1c088fa8 47 get_term_width,
ce02ed60 48 locked_file,
dca08720 49 make_HTTPS_handler,
ce02ed60 50 MaxDownloadsReached,
b7ab0590 51 PagedList,
ce02ed60 52 PostProcessingError,
dca08720 53 platform_name,
ce02ed60
PH
54 preferredencoding,
55 SameFileError,
56 sanitize_filename,
57 subtitles_filename,
58 takewhile_inclusive,
59 UnavailableVideoError,
29eb5174 60 url_basename,
ce02ed60
PH
61 write_json_file,
62 write_string,
dca08720 63 YoutubeDLHandler,
6350728b 64 prepend_extension,
7d4111ed 65 args_to_str,
05900629 66 age_restricted,
ce02ed60 67)
a0e07d31 68from .cache import Cache
023fa8c4 69from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 70from .downloader import get_suitable_downloader
4c83c967 71from .downloader.rtmp import rtmpdump_version
4f026faf
PH
72from .postprocessor import (
73 FFmpegMergerPP,
74 FFmpegPostProcessor,
75 get_postprocessor,
76)
dca08720 77from .version import __version__
8222d8de
JMF
78
79
80class YoutubeDL(object):
81 """YoutubeDL class.
82
83 YoutubeDL objects are the ones responsible of downloading the
84 actual video file and writing it to disk if the user has requested
85 it, among some other tasks. In most cases there should be one per
86 program. As, given a video URL, the downloader doesn't know how to
87 extract all the needed information, task that InfoExtractors do, it
88 has to pass the URL to one of them.
89
90 For this, YoutubeDL objects have a method that allows
91 InfoExtractors to be registered in a given order. When it is passed
92 a URL, the YoutubeDL object handles it to the first InfoExtractor it
93 finds that reports being able to handle it. The InfoExtractor extracts
94 all the information about the video or videos the URL refers to, and
95 YoutubeDL process the extracted information, possibly using a File
96 Downloader to download the video.
97
98 YoutubeDL objects accept a lot of parameters. In order not to saturate
99 the object constructor with arguments, it receives a dictionary of
100 options instead. These options are available through the params
101 attribute for the InfoExtractors to use. The YoutubeDL also
102 registers itself as the downloader in charge for the InfoExtractors
103 that are added to it, so this is a "mutual registration".
104
105 Available options:
106
107 username: Username for authentication purposes.
108 password: Password for authentication purposes.
c6c19746 109 videopassword: Password for acces a video.
8222d8de
JMF
110 usenetrc: Use netrc for authentication instead.
111 verbose: Print additional info to stdout.
112 quiet: Do not print messages to stdout.
ad8915b7 113 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
114 forceurl: Force printing final URL.
115 forcetitle: Force printing title.
116 forceid: Force printing ID.
117 forcethumbnail: Force printing thumbnail URL.
118 forcedescription: Force printing description.
119 forcefilename: Force printing final filename.
525ef922 120 forceduration: Force printing duration.
8694c600 121 forcejson: Force printing info_dict as JSON.
63e0be34
PH
122 dump_single_json: Force printing the info_dict of the whole playlist
123 (or video) as a single JSON line.
8222d8de 124 simulate: Do not download the video files.
d8600787 125 format: Video format code. See options.py for more information.
8222d8de
JMF
126 format_limit: Highest quality format to try.
127 outtmpl: Template for output names.
128 restrictfilenames: Do not allow "&" and spaces in file names
129 ignoreerrors: Do not stop on download errors.
130 nooverwrites: Prevent overwriting files.
131 playliststart: Playlist item to start at.
132 playlistend: Playlist item to end at.
ff815fe6 133 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
134 matchtitle: Download only matching titles.
135 rejecttitle: Reject downloads for matching titles.
8bf9319e 136 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
137 logtostderr: Log messages to stderr instead of stdout.
138 writedescription: Write the video description to a .description file
139 writeinfojson: Write the video description to a .info.json file
1fb07d10 140 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
141 writethumbnail: Write the thumbnail image to a file
142 writesubtitles: Write the video subtitles to a file
b004821f 143 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 144 allsubtitles: Downloads all the subtitles of the video
0b7f3118 145 (requires writesubtitles or writeautomaticsub)
8222d8de 146 listsubtitles: Lists all available subtitles for the video
b98a6b2f 147 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 148 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
149 keepvideo: Keep the video file after post-processing
150 daterange: A DateRange object, download only if the upload_date is in the range.
151 skip_download: Skip the actual download of the video file
c35f9e72 152 cachedir: Location of the cache files in the filesystem.
a0e07d31 153 False to disable filesystem cache.
47192f92 154 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
155 age_limit: An integer representing the user's age in years.
156 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
157 min_views: An integer representing the minimum view count the video
158 must have in order to not be skipped.
159 Videos without view count information are always
160 downloaded. None for no limit.
161 max_views: An integer representing the maximum view count.
162 Videos that are more popular than that are not
163 downloaded.
164 Videos without view count information are always
165 downloaded. None for no limit.
166 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
167 Videos already present in the file are not downloaded
168 again.
dca08720 169 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 170 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
171 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
172 At the moment, this is only supported by YouTube.
a1ee09e8 173 proxy: URL of the proxy server to use
e344693b 174 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
175 bidi_workaround: Work around buggy terminals without bidirectional text
176 support, using fridibi
a0ddb8a2 177 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 178 include_ads: Download ads as well
04b4d394
PH
179 default_search: Prepend this string if an input url is not valid.
180 'auto' for elaborate guessing
62fec3b2 181 encoding: Use this encoding instead of the system-specified.
e8ee972c 182 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
183 Pass in 'in_playlist' to only show this behavior for
184 playlist items.
4f026faf 185 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
186 * key: The name of the postprocessor. See
187 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
188 as well as any further keyword arguments for the
189 postprocessor.
71b640cc
PH
190 progress_hooks: A list of functions that get called on download
191 progress, with a dictionary with the entries
192 * filename: The final filename
193 * status: One of "downloading" and "finished"
194
195 The dict may also have some of the following entries:
196
197 * downloaded_bytes: Bytes on disk
198 * total_bytes: Size of the whole file, None if unknown
199 * tmpfilename: The filename we're currently writing to
200 * eta: The estimated time in seconds, None if unknown
201 * speed: The download speed in bytes/second, None if
202 unknown
203
204 Progress hooks are guaranteed to be called at least once
205 (with status "finished") if the download is successful.
45598f15 206 merge_output_format: Extension to use when merging formats.
71b640cc 207
fe7e0c98 208
8222d8de
JMF
209 The following parameters are not used by YoutubeDL itself, they are used by
210 the FileDownloader:
211 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
212 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
213
214 The following options are used by the post processors:
215 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
216 otherwise prefer avconv.
8d31fa3c 217 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
218 """
219
220 params = None
221 _ies = []
222 _pps = []
223 _download_retcode = None
224 _num_downloads = None
225 _screen_file = None
226
3511266b 227 def __init__(self, params=None, auto_init=True):
8222d8de 228 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
229 if params is None:
230 params = {}
8222d8de 231 self._ies = []
56c73665 232 self._ies_instances = {}
8222d8de 233 self._pps = []
933605d7 234 self._progress_hooks = []
8222d8de
JMF
235 self._download_retcode = 0
236 self._num_downloads = 0
237 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 238 self._err_file = sys.stderr
e9f9a10f 239 self.params = params
a0e07d31 240 self.cache = Cache(self)
34308b30 241
0783b09b 242 if params.get('bidi_workaround', False):
1c088fa8
PH
243 try:
244 import pty
245 master, slave = pty.openpty()
246 width = get_term_width()
247 if width is None:
248 width_args = []
249 else:
250 width_args = ['-w', str(width)]
5d681e96 251 sp_kwargs = dict(
1c088fa8
PH
252 stdin=subprocess.PIPE,
253 stdout=slave,
254 stderr=self._err_file)
5d681e96
PH
255 try:
256 self._output_process = subprocess.Popen(
257 ['bidiv'] + width_args, **sp_kwargs
258 )
259 except OSError:
5d681e96
PH
260 self._output_process = subprocess.Popen(
261 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
262 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
263 except OSError as ose:
264 if ose.errno == 2:
6febd1c1 265 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
266 else:
267 raise
0783b09b 268
34308b30
PH
269 if (sys.version_info >= (3,) and sys.platform != 'win32' and
270 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 271 and not params.get('restrictfilenames', False)):
34308b30
PH
272 # On Python 3, the Unicode filesystem API will throw errors (#1474)
273 self.report_warning(
6febd1c1 274 'Assuming --restrict-filenames since file system encoding '
1b725173 275 'cannot encode all characters. '
6febd1c1 276 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 277 self.params['restrictfilenames'] = True
34308b30 278
a3927cf7 279 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 280 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 281
dca08720
PH
282 self._setup_opener()
283
3511266b
PH
284 if auto_init:
285 self.print_debug_header()
286 self.add_default_info_extractors()
287
4f026faf
PH
288 for pp_def_raw in self.params.get('postprocessors', []):
289 pp_class = get_postprocessor(pp_def_raw['key'])
290 pp_def = dict(pp_def_raw)
291 del pp_def['key']
292 pp = pp_class(self, **compat_kwargs(pp_def))
293 self.add_post_processor(pp)
294
71b640cc
PH
295 for ph in self.params.get('progress_hooks', []):
296 self.add_progress_hook(ph)
297
7d4111ed
PH
298 def warn_if_short_id(self, argv):
299 # short YouTube ID starting with dash?
300 idxs = [
301 i for i, a in enumerate(argv)
302 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
303 if idxs:
304 correct_argv = (
305 ['youtube-dl'] +
306 [a for i, a in enumerate(argv) if i not in idxs] +
307 ['--'] + [argv[i] for i in idxs]
308 )
309 self.report_warning(
310 'Long argument string detected. '
311 'Use -- to separate parameters and URLs, like this:\n%s\n' %
312 args_to_str(correct_argv))
313
8222d8de
JMF
314 def add_info_extractor(self, ie):
315 """Add an InfoExtractor object to the end of the list."""
316 self._ies.append(ie)
56c73665 317 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
318 ie.set_downloader(self)
319
56c73665
JMF
320 def get_info_extractor(self, ie_key):
321 """
322 Get an instance of an IE with name ie_key, it will try to get one from
323 the _ies list, if there's no instance it will create a new one and add
324 it to the extractor list.
325 """
326 ie = self._ies_instances.get(ie_key)
327 if ie is None:
328 ie = get_info_extractor(ie_key)()
329 self.add_info_extractor(ie)
330 return ie
331
023fa8c4
JMF
332 def add_default_info_extractors(self):
333 """
334 Add the InfoExtractors returned by gen_extractors to the end of the list
335 """
336 for ie in gen_extractors():
337 self.add_info_extractor(ie)
338
8222d8de
JMF
339 def add_post_processor(self, pp):
340 """Add a PostProcessor object to the end of the chain."""
341 self._pps.append(pp)
342 pp.set_downloader(self)
343
933605d7
JMF
344 def add_progress_hook(self, ph):
345 """Add the progress hook (currently only for the file downloader)"""
346 self._progress_hooks.append(ph)
8ab470f1 347
1c088fa8 348 def _bidi_workaround(self, message):
5d681e96 349 if not hasattr(self, '_output_channel'):
1c088fa8
PH
350 return message
351
5d681e96 352 assert hasattr(self, '_output_process')
11b85ce6 353 assert isinstance(message, compat_str)
6febd1c1
PH
354 line_count = message.count('\n') + 1
355 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 356 self._output_process.stdin.flush()
6febd1c1 357 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 358 for _ in range(line_count))
6febd1c1 359 return res[:-len('\n')]
1c088fa8 360
8222d8de 361 def to_screen(self, message, skip_eol=False):
0783b09b
PH
362 """Print message to stdout if not in quiet mode."""
363 return self.to_stdout(message, skip_eol, check_quiet=True)
364
734f90bb 365 def _write_string(self, s, out=None):
b58ddb32 366 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 367
0783b09b 368 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 369 """Print message to stdout if not in quiet mode."""
8bf9319e 370 if self.params.get('logger'):
43afe285 371 self.params['logger'].debug(message)
0783b09b 372 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 373 message = self._bidi_workaround(message)
6febd1c1 374 terminator = ['\n', ''][skip_eol]
8222d8de 375 output = message + terminator
1c088fa8 376
734f90bb 377 self._write_string(output, self._screen_file)
8222d8de
JMF
378
379 def to_stderr(self, message):
380 """Print message to stderr."""
11b85ce6 381 assert isinstance(message, compat_str)
8bf9319e 382 if self.params.get('logger'):
43afe285
IB
383 self.params['logger'].error(message)
384 else:
1c088fa8 385 message = self._bidi_workaround(message)
6febd1c1 386 output = message + '\n'
734f90bb 387 self._write_string(output, self._err_file)
8222d8de 388
1e5b9a95
PH
389 def to_console_title(self, message):
390 if not self.params.get('consoletitle', False):
391 return
392 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
393 # c_wchar_p() might not be necessary if `message` is
394 # already of type unicode()
395 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
396 elif 'TERM' in os.environ:
734f90bb 397 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 398
bdde425c
PH
399 def save_console_title(self):
400 if not self.params.get('consoletitle', False):
401 return
402 if 'TERM' in os.environ:
efd6c574 403 # Save the title on stack
734f90bb 404 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
405
406 def restore_console_title(self):
407 if not self.params.get('consoletitle', False):
408 return
409 if 'TERM' in os.environ:
efd6c574 410 # Restore the title from stack
734f90bb 411 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
412
413 def __enter__(self):
414 self.save_console_title()
415 return self
416
417 def __exit__(self, *args):
418 self.restore_console_title()
f89197d7 419
dca08720
PH
420 if self.params.get('cookiefile') is not None:
421 self.cookiejar.save()
bdde425c 422
8222d8de
JMF
423 def trouble(self, message=None, tb=None):
424 """Determine action to take when a download problem appears.
425
426 Depending on if the downloader has been configured to ignore
427 download errors or not, this method may throw an exception or
428 not when errors are found, after printing the message.
429
430 tb, if given, is additional traceback information.
431 """
432 if message is not None:
433 self.to_stderr(message)
434 if self.params.get('verbose'):
435 if tb is None:
436 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 437 tb = ''
8222d8de 438 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 439 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
440 tb += compat_str(traceback.format_exc())
441 else:
442 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 443 tb = ''.join(tb_data)
8222d8de
JMF
444 self.to_stderr(tb)
445 if not self.params.get('ignoreerrors', False):
446 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
447 exc_info = sys.exc_info()[1].exc_info
448 else:
449 exc_info = sys.exc_info()
450 raise DownloadError(message, exc_info)
451 self._download_retcode = 1
452
453 def report_warning(self, message):
454 '''
455 Print the message to stderr, it will be prefixed with 'WARNING:'
456 If stderr is a tty file the 'WARNING:' will be colored
457 '''
6d07ce01
JMF
458 if self.params.get('logger') is not None:
459 self.params['logger'].warning(message)
8222d8de 460 else:
ad8915b7
PH
461 if self.params.get('no_warnings'):
462 return
6d07ce01
JMF
463 if self._err_file.isatty() and os.name != 'nt':
464 _msg_header = '\033[0;33mWARNING:\033[0m'
465 else:
466 _msg_header = 'WARNING:'
467 warning_message = '%s %s' % (_msg_header, message)
468 self.to_stderr(warning_message)
8222d8de
JMF
469
470 def report_error(self, message, tb=None):
471 '''
472 Do the same as trouble, but prefixes the message with 'ERROR:', colored
473 in red if stderr is a tty file.
474 '''
0783b09b 475 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 476 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 477 else:
6febd1c1
PH
478 _msg_header = 'ERROR:'
479 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
480 self.trouble(error_message, tb)
481
8222d8de
JMF
482 def report_file_already_downloaded(self, file_name):
483 """Report file has already been fully downloaded."""
484 try:
6febd1c1 485 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 486 except UnicodeEncodeError:
6febd1c1 487 self.to_screen('[download] The file has already been downloaded')
8222d8de 488
8222d8de
JMF
489 def prepare_filename(self, info_dict):
490 """Generate the output filename."""
491 try:
492 template_dict = dict(info_dict)
493
494 template_dict['epoch'] = int(time.time())
495 autonumber_size = self.params.get('autonumber_size')
496 if autonumber_size is None:
497 autonumber_size = 5
6febd1c1 498 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 499 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 500 if template_dict.get('playlist_index') is not None:
c6b4132a 501 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
502 if template_dict.get('resolution') is None:
503 if template_dict.get('width') and template_dict.get('height'):
504 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
505 elif template_dict.get('height'):
805ef3c6 506 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 507 elif template_dict.get('width'):
805ef3c6 508 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 509
586a91b6 510 sanitize = lambda k, v: sanitize_filename(
45598aab 511 compat_str(v),
8222d8de 512 restricted=self.params.get('restrictfilenames'),
6febd1c1 513 is_id=(k == 'id'))
586a91b6 514 template_dict = dict((k, sanitize(k, v))
45598aab
PH
515 for k, v in template_dict.items()
516 if v is not None)
6febd1c1 517 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 518
acd69589 519 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 520 tmpl = compat_expanduser(outtmpl)
586a91b6 521 filename = tmpl % template_dict
8222d8de 522 return filename
8222d8de 523 except ValueError as err:
6febd1c1 524 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
525 return None
526
527 def _match_entry(self, info_dict):
528 """ Returns None iff the file should be downloaded """
529
6febd1c1 530 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
531 if 'title' in info_dict:
532 # This can happen when we're just evaluating the playlist
533 title = info_dict['title']
534 matchtitle = self.params.get('matchtitle', False)
535 if matchtitle:
536 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 537 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
538 rejecttitle = self.params.get('rejecttitle', False)
539 if rejecttitle:
540 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 541 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
542 date = info_dict.get('upload_date', None)
543 if date is not None:
544 dateRange = self.params.get('daterange', DateRange())
545 if date not in dateRange:
6febd1c1 546 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
547 view_count = info_dict.get('view_count', None)
548 if view_count is not None:
549 min_views = self.params.get('min_views')
550 if min_views is not None and view_count < min_views:
6febd1c1 551 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
552 max_views = self.params.get('max_views')
553 if max_views is not None and view_count > max_views:
6febd1c1 554 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629
PH
555 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
556 return 'Skipping "%s" because it is age restricted' % title
c1c9a79c 557 if self.in_download_archive(info_dict):
6febd1c1 558 return '%s has already been recorded in archive' % video_title
8222d8de 559 return None
fe7e0c98 560
b6c45014
JMF
561 @staticmethod
562 def add_extra_info(info_dict, extra_info):
563 '''Set the keys from extra_info in info dict if they are missing'''
564 for key, value in extra_info.items():
565 info_dict.setdefault(key, value)
566
7fc3fa05
PH
567 def extract_info(self, url, download=True, ie_key=None, extra_info={},
568 process=True):
8222d8de
JMF
569 '''
570 Returns a list with a dictionary for each video we find.
571 If 'download', also downloads the videos.
572 extra_info is a dict containing the extra values to add to each result
573 '''
fe7e0c98 574
8222d8de 575 if ie_key:
56c73665 576 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
577 else:
578 ies = self._ies
579
580 for ie in ies:
581 if not ie.suitable(url):
582 continue
583
584 if not ie.working():
6febd1c1
PH
585 self.report_warning('The program functionality for this site has been marked as broken, '
586 'and will probably not work.')
8222d8de
JMF
587
588 try:
589 ie_result = ie.extract(url)
5f6a1245 590 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
591 break
592 if isinstance(ie_result, list):
593 # Backwards compatibility: old IE result format
8222d8de
JMF
594 ie_result = {
595 '_type': 'compat_list',
596 'entries': ie_result,
597 }
ea38e55f 598 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
599 if process:
600 return self.process_ie_result(ie_result, download, extra_info)
601 else:
602 return ie_result
5f6a1245 603 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
604 self.report_error(compat_str(de), de.format_traceback())
605 break
d3e5bbf4
PH
606 except MaxDownloadsReached:
607 raise
8222d8de
JMF
608 except Exception as e:
609 if self.params.get('ignoreerrors', False):
610 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
611 break
612 else:
613 raise
614 else:
1a489545 615 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 616
ea38e55f
PH
617 def add_default_extra_info(self, ie_result, ie, url):
618 self.add_extra_info(ie_result, {
619 'extractor': ie.IE_NAME,
620 'webpage_url': url,
621 'webpage_url_basename': url_basename(url),
622 'extractor_key': ie.ie_key(),
623 })
624
8222d8de
JMF
625 def process_ie_result(self, ie_result, download=True, extra_info={}):
626 """
627 Take the result of the ie(may be modified) and resolve all unresolved
628 references (URLs, playlist items).
629
630 It will also download the videos if 'download'.
631 Returns the resolved ie_result.
632 """
633
e8ee972c
PH
634 result_type = ie_result.get('_type', 'video')
635
057a5206
PH
636 if result_type in ('url', 'url_transparent'):
637 extract_flat = self.params.get('extract_flat', False)
638 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
639 extract_flat is True):
057a5206
PH
640 if self.params.get('forcejson', False):
641 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
642 return ie_result
643
8222d8de 644 if result_type == 'video':
b6c45014 645 self.add_extra_info(ie_result, extra_info)
feee2ecf 646 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
647 elif result_type == 'url':
648 # We have to add extra_info to the results because it may be
649 # contained in a playlist
650 return self.extract_info(ie_result['url'],
651 download,
652 ie_key=ie_result.get('ie_key'),
653 extra_info=extra_info)
7fc3fa05
PH
654 elif result_type == 'url_transparent':
655 # Use the information from the embedding page
656 info = self.extract_info(
657 ie_result['url'], ie_key=ie_result.get('ie_key'),
658 extra_info=extra_info, download=False, process=False)
659
412c617d
PH
660 force_properties = dict(
661 (k, v) for k, v in ie_result.items() if v is not None)
662 for f in ('_type', 'url'):
663 if f in force_properties:
664 del force_properties[f]
665 new_result = info.copy()
666 new_result.update(force_properties)
7fc3fa05
PH
667
668 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
669
670 return self.process_ie_result(
671 new_result, download=download, extra_info=extra_info)
42e12102 672 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
673 # We process each entry in the playlist
674 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 675 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
676
677 playlist_results = []
678
8222d8de 679 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
680 playlistend = self.params.get('playlistend', None)
681 # For backwards compatibility, interpret -1 as whole list
8222d8de 682 if playlistend == -1:
a19fd00c 683 playlistend = None
8222d8de 684
b82f815f
PH
685 ie_entries = ie_result['entries']
686 if isinstance(ie_entries, list):
687 n_all_entries = len(ie_entries)
688 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
689 n_entries = len(entries)
690 self.to_screen(
691 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
692 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f
PH
693 elif isinstance(ie_entries, PagedList):
694 entries = ie_entries.getslice(
b7ab0590
PH
695 playliststart, playlistend)
696 n_entries = len(entries)
697 self.to_screen(
698 "[%s] playlist %s: Downloading %d videos" %
699 (ie_result['extractor'], playlist, n_entries))
b82f815f
PH
700 else: # iterable
701 entries = list(itertools.islice(
702 ie_entries, playliststart, playlistend))
703 n_entries = len(entries)
704 self.to_screen(
705 "[%s] playlist %s: Downloading %d videos" %
706 (ie_result['extractor'], playlist, n_entries))
8222d8de 707
ff815fe6
MS
708 if self.params.get('playlistreverse', False):
709 entries = entries[::-1]
710
fe7e0c98 711 for i, entry in enumerate(entries, 1):
734ea11e 712 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 713 extra = {
c6b4132a 714 'n_entries': n_entries,
fe7e0c98 715 'playlist': playlist,
a1cf99d0
PH
716 'playlist_id': ie_result.get('id'),
717 'playlist_title': ie_result.get('title'),
fe7e0c98 718 'playlist_index': i + playliststart,
b6c45014 719 'extractor': ie_result['extractor'],
9103bbc5 720 'webpage_url': ie_result['webpage_url'],
29eb5174 721 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 722 'extractor_key': ie_result['extractor_key'],
fe7e0c98 723 }
7012b23c
PH
724
725 reason = self._match_entry(entry)
726 if reason is not None:
6febd1c1 727 self.to_screen('[download] ' + reason)
7012b23c
PH
728 continue
729
8222d8de
JMF
730 entry_result = self.process_ie_result(entry,
731 download=download,
732 extra_info=extra)
733 playlist_results.append(entry_result)
734 ie_result['entries'] = playlist_results
735 return ie_result
736 elif result_type == 'compat_list':
c9bf4114
PH
737 self.report_warning(
738 'Extractor %s returned a compat_list result. '
739 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 740
8222d8de 741 def _fixup(r):
9e1a5b84
JW
742 self.add_extra_info(
743 r,
9103bbc5
JMF
744 {
745 'extractor': ie_result['extractor'],
746 'webpage_url': ie_result['webpage_url'],
29eb5174 747 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 748 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
749 }
750 )
8222d8de
JMF
751 return r
752 ie_result['entries'] = [
b6c45014 753 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
754 for r in ie_result['entries']
755 ]
756 return ie_result
757 else:
758 raise Exception('Invalid result type: %s' % result_type)
759
a9c58ad9
JMF
760 def select_format(self, format_spec, available_formats):
761 if format_spec == 'best' or format_spec is None:
762 return available_formats[-1]
763 elif format_spec == 'worst':
764 return available_formats[0]
ba7678f9
PH
765 elif format_spec == 'bestaudio':
766 audio_formats = [
767 f for f in available_formats
768 if f.get('vcodec') == 'none']
769 if audio_formats:
770 return audio_formats[-1]
771 elif format_spec == 'worstaudio':
772 audio_formats = [
773 f for f in available_formats
774 if f.get('vcodec') == 'none']
775 if audio_formats:
776 return audio_formats[0]
bc6d5978
JMF
777 elif format_spec == 'bestvideo':
778 video_formats = [
779 f for f in available_formats
780 if f.get('acodec') == 'none']
781 if video_formats:
782 return video_formats[-1]
783 elif format_spec == 'worstvideo':
784 video_formats = [
785 f for f in available_formats
786 if f.get('acodec') == 'none']
787 if video_formats:
788 return video_formats[0]
a9c58ad9 789 else:
0217c783 790 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
49e86983
JMF
791 if format_spec in extensions:
792 filter_f = lambda f: f['ext'] == format_spec
793 else:
794 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 795 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
796 if matches:
797 return matches[-1]
798 return None
799
dd82ffea
JMF
800 def process_video_result(self, info_dict, download=True):
801 assert info_dict.get('_type', 'video') == 'video'
802
bec1fad2
PH
803 if 'id' not in info_dict:
804 raise ExtractorError('Missing "id" field in extractor result')
805 if 'title' not in info_dict:
806 raise ExtractorError('Missing "title" field in extractor result')
807
dd82ffea
JMF
808 if 'playlist' not in info_dict:
809 # It isn't part of a playlist
810 info_dict['playlist'] = None
811 info_dict['playlist_index'] = None
812
d5519808
PH
813 thumbnails = info_dict.get('thumbnails')
814 if thumbnails:
be6d7229
PH
815 thumbnails.sort(key=lambda t: (
816 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
817 for t in thumbnails:
818 if 'width' in t and 'height' in t:
819 t['resolution'] = '%dx%d' % (t['width'], t['height'])
820
821 if thumbnails and 'thumbnail' not in info_dict:
822 info_dict['thumbnail'] = thumbnails[-1]['url']
823
c9ae7b95 824 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
825 info_dict['display_id'] = info_dict['id']
826
955c4514 827 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
706d7d4e
S
828 # Working around negative timestamps in Windows
829 # (see http://bugs.python.org/issue1646728)
830 if info_dict['timestamp'] < 0 and os.name == 'nt':
831 info_dict['timestamp'] = 0
9d2ecdbc 832 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 833 info_dict['timestamp'])
9d2ecdbc
PH
834 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
835
6ff000b8 836 # This extractors handle format selection themselves
6febd1c1 837 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
838 if download:
839 self.process_info(info_dict)
6ff000b8
JMF
840 return info_dict
841
dd82ffea
JMF
842 # We now pick which formats have to be downloaded
843 if info_dict.get('formats') is None:
844 # There's only one format available
845 formats = [info_dict]
846 else:
847 formats = info_dict['formats']
848
db95dc13
PH
849 if not formats:
850 raise ExtractorError('No video formats found!')
851
dd82ffea 852 # We check that all the formats have the format and format_id fields
db95dc13 853 for i, format in enumerate(formats):
bec1fad2
PH
854 if 'url' not in format:
855 raise ExtractorError('Missing "url" key in result (index %d)' % i)
856
dd82ffea 857 if format.get('format_id') is None:
8016c922 858 format['format_id'] = compat_str(i)
8c51aa65 859 if format.get('format') is None:
6febd1c1 860 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
861 id=format['format_id'],
862 res=self.format_resolution(format),
6febd1c1 863 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 864 )
c1002e96
PH
865 # Automatically determine file extension if missing
866 if 'ext' not in format:
cce929ea 867 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 868
99e206d5
JMF
869 format_limit = self.params.get('format_limit', None)
870 if format_limit:
f4d96df0
PH
871 formats = list(takewhile_inclusive(
872 lambda f: f['format_id'] != format_limit, formats
873 ))
4bcc7bd1
PH
874
875 # TODO Central sorting goes here
99e206d5 876
f89197d7 877 if formats[0] is not info_dict:
b3d9ef88
JMF
878 # only set the 'formats' fields if the original info_dict list them
879 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 880 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
881 # wich can't be exported to json
882 info_dict['formats'] = formats
bfaae0a7 883 if self.params.get('listformats', None):
884 self.list_formats(info_dict)
885 return
886
de3ef3ed 887 req_format = self.params.get('format')
a9c58ad9
JMF
888 if req_format is None:
889 req_format = 'best'
dd82ffea 890 formats_to_download = []
dd82ffea 891 # The -1 is for supporting YoutubeIE
a9c58ad9 892 if req_format in ('-1', 'all'):
dd82ffea
JMF
893 formats_to_download = formats
894 else:
1de33faf
PH
895 for rfstr in req_format.split(','):
896 # We can accept formats requested in the format: 34/5/best, we pick
897 # the first that is available, starting from left
898 req_formats = rfstr.split('/')
899 for rf in req_formats:
900 if re.match(r'.+?\+.+?', rf) is not None:
901 # Two formats have been requested like '137+139'
902 format_1, format_2 = rf.split('+')
903 formats_info = (self.select_format(format_1, formats),
9e1a5b84 904 self.select_format(format_2, formats))
1de33faf 905 if all(formats_info):
c2954908
JMF
906 # The first format must contain the video and the
907 # second the audio
908 if formats_info[0].get('vcodec') == 'none':
909 self.report_error('The first format must '
9e1a5b84
JW
910 'contain the video, try using '
911 '"-f %s+%s"' % (format_2, format_1))
c2954908 912 return
45598f15
PH
913 output_ext = (
914 formats_info[0]['ext']
915 if self.params.get('merge_output_format') is None
916 else self.params['merge_output_format'])
1de33faf
PH
917 selected_format = {
918 'requested_formats': formats_info,
919 'format': rf,
920 'ext': formats_info[0]['ext'],
f90ad273
PH
921 'width': formats_info[0].get('width'),
922 'height': formats_info[0].get('height'),
923 'resolution': formats_info[0].get('resolution'),
924 'fps': formats_info[0].get('fps'),
925 'vcodec': formats_info[0].get('vcodec'),
926 'vbr': formats_info[0].get('vbr'),
927 'acodec': formats_info[1].get('acodec'),
928 'abr': formats_info[1].get('abr'),
45598f15 929 'ext': output_ext,
1de33faf
PH
930 }
931 else:
932 selected_format = None
6350728b 933 else:
1de33faf
PH
934 selected_format = self.select_format(rf, formats)
935 if selected_format is not None:
936 formats_to_download.append(selected_format)
937 break
dd82ffea 938 if not formats_to_download:
6febd1c1 939 raise ExtractorError('requested format not available',
78a3a9f8 940 expected=True)
dd82ffea
JMF
941
942 if download:
943 if len(formats_to_download) > 1:
6febd1c1 944 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
945 for format in formats_to_download:
946 new_info = dict(info_dict)
947 new_info.update(format)
948 self.process_info(new_info)
949 # We update the info dict with the best quality format (backwards compatibility)
950 info_dict.update(formats_to_download[-1])
951 return info_dict
952
8222d8de
JMF
953 def process_info(self, info_dict):
954 """Process a single resolved IE result."""
955
956 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
957
958 max_downloads = self.params.get('max_downloads')
959 if max_downloads is not None:
960 if self._num_downloads >= int(max_downloads):
961 raise MaxDownloadsReached()
8222d8de
JMF
962
963 info_dict['fulltitle'] = info_dict['title']
964 if len(info_dict['title']) > 200:
6febd1c1 965 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
966
967 # Keep for backwards compatibility
968 info_dict['stitle'] = info_dict['title']
969
11b85ce6 970 if 'format' not in info_dict:
8222d8de
JMF
971 info_dict['format'] = info_dict['ext']
972
973 reason = self._match_entry(info_dict)
974 if reason is not None:
6febd1c1 975 self.to_screen('[download] ' + reason)
8222d8de
JMF
976 return
977
fd288278 978 self._num_downloads += 1
8222d8de
JMF
979
980 filename = self.prepare_filename(info_dict)
981
982 # Forced printings
983 if self.params.get('forcetitle', False):
0783b09b 984 self.to_stdout(info_dict['fulltitle'])
8222d8de 985 if self.params.get('forceid', False):
0783b09b 986 self.to_stdout(info_dict['id'])
8222d8de 987 if self.params.get('forceurl', False):
16ae61f6 988 if info_dict.get('requested_formats') is not None:
989 for f in info_dict['requested_formats']:
990 self.to_stdout(f['url'] + f.get('play_path', ''))
991 else:
992 # For RTMP URLs, also include the playpath
993 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 994 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 995 self.to_stdout(info_dict['thumbnail'])
216d71d0 996 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 997 self.to_stdout(info_dict['description'])
8222d8de 998 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 999 self.to_stdout(filename)
525ef922
PH
1000 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1001 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1002 if self.params.get('forceformat', False):
0783b09b 1003 self.to_stdout(info_dict['format'])
9d153818 1004 if self.params.get('forcejson', False):
a0d96c98 1005 info_dict['_filename'] = filename
0783b09b 1006 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
1007 if self.params.get('dump_single_json', False):
1008 info_dict['_filename'] = filename
8222d8de
JMF
1009
1010 # Do nothing else if in simulate mode
1011 if self.params.get('simulate', False):
1012 return
1013
1014 if filename is None:
1015 return
1016
1017 try:
1018 dn = os.path.dirname(encodeFilename(filename))
d26e981d 1019 if dn and not os.path.exists(dn):
8222d8de
JMF
1020 os.makedirs(dn)
1021 except (OSError, IOError) as err:
6febd1c1 1022 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1023 return
1024
1025 if self.params.get('writedescription', False):
6febd1c1 1026 descfn = filename + '.description'
7b6fefc9 1027 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1028 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1029 elif info_dict.get('description') is None:
1030 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1031 else:
1032 try:
6febd1c1 1033 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1034 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1035 descfile.write(info_dict['description'])
7b6fefc9 1036 except (OSError, IOError):
6febd1c1 1037 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1038 return
8222d8de 1039
1fb07d10 1040 if self.params.get('writeannotations', False):
6febd1c1 1041 annofn = filename + '.annotations.xml'
7b6fefc9 1042 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1043 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1044 else:
1045 try:
6febd1c1 1046 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1047 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1048 annofile.write(info_dict['annotations'])
1049 except (KeyError, TypeError):
6febd1c1 1050 self.report_warning('There are no annotations to write.')
7b6fefc9 1051 except (OSError, IOError):
6febd1c1 1052 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1053 return
1fb07d10 1054
c4a91be7 1055 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1056 self.params.get('writeautomaticsub')])
c4a91be7 1057
fe7e0c98 1058 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
1059 # subtitles download errors are already managed as troubles in relevant IE
1060 # that way it will silently go on when used with unsupporting IE
8222d8de 1061 subtitles = info_dict['subtitles']
ca715127 1062 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1063 for sub_lang in subtitles.keys():
1064 sub = subtitles[sub_lang]
6804038d
JMF
1065 if sub is None:
1066 continue
8222d8de 1067 try:
d4051a8e 1068 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1069 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1070 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1071 else:
6febd1c1 1072 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1073 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5f6a1245 1074 subfile.write(sub)
8222d8de 1075 except (OSError, IOError):
e4db1951 1076 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1077 return
1078
8222d8de 1079 if self.params.get('writeinfojson', False):
6febd1c1 1080 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1081 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1082 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1083 else:
6febd1c1 1084 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1085 try:
92120217 1086 write_json_file(info_dict, infofn)
7b6fefc9 1087 except (OSError, IOError):
6febd1c1 1088 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1089 return
8222d8de
JMF
1090
1091 if self.params.get('writethumbnail', False):
d8269e1d 1092 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1093 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1094 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1095 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1096 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1097 (info_dict['extractor'], info_dict['id']))
1098 else:
6febd1c1 1099 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1100 (info_dict['extractor'], info_dict['id']))
1101 try:
e9c092f1 1102 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1103 with open(thumb_filename, 'wb') as thumbf:
1104 shutil.copyfileobj(uf, thumbf)
6febd1c1 1105 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
9e1a5b84 1106 (info_dict['extractor'], info_dict['id'], thumb_filename))
7b6fefc9 1107 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1108 self.report_warning('Unable to download thumbnail "%s": %s' %
9e1a5b84 1109 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1110
1111 if not self.params.get('skip_download', False):
1112 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1113 success = True
1114 else:
1115 try:
6350728b
JMF
1116 def dl(name, info):
1117 fd = get_suitable_downloader(info)(self, self.params)
1118 for ph in self._progress_hooks:
1119 fd.add_progress_hook(ph)
8d5797b0
PH
1120 if self.params.get('verbose'):
1121 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1122 return fd.download(name, info)
1123 if info_dict.get('requested_formats') is not None:
1124 downloaded = []
1125 success = True
b7f81164 1126 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1127 if not merger._executable:
58c3c7ae
JMF
1128 postprocessors = []
1129 self.report_warning('You have requested multiple '
9e1a5b84
JW
1130 'formats but ffmpeg or avconv are not installed.'
1131 ' The formats won\'t be merged')
58c3c7ae
JMF
1132 else:
1133 postprocessors = [merger]
6350728b
JMF
1134 for f in info_dict['requested_formats']:
1135 new_info = dict(info_dict)
1136 new_info.update(f)
1137 fname = self.prepare_filename(new_info)
1138 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1139 downloaded.append(fname)
1140 partial_success = dl(fname, new_info)
1141 success = success and partial_success
58c3c7ae 1142 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1143 info_dict['__files_to_merge'] = downloaded
1144 else:
1145 # Just a single file
1146 success = dl(filename, info_dict)
8222d8de 1147 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1148 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1149 return
c40c6aaa
JMF
1150 except (OSError, IOError) as err:
1151 raise UnavailableVideoError(err)
8222d8de 1152 except (ContentTooShortError, ) as err:
6febd1c1 1153 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1154 return
1155
1156 if success:
1157 try:
1158 self.post_process(filename, info_dict)
1159 except (PostProcessingError) as err:
6febd1c1 1160 self.report_error('postprocessing: %s' % str(err))
8222d8de 1161 return
cd58dc3e 1162 self.record_download_archive(info_dict)
8222d8de
JMF
1163
1164 def download(self, url_list):
1165 """Download a given list of URLs."""
acd69589 1166 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1167 if (len(url_list) > 1 and
acd69589 1168 '%' not in outtmpl
0c75c3fa 1169 and self.params.get('max_downloads') != 1):
acd69589 1170 raise SameFileError(outtmpl)
8222d8de
JMF
1171
1172 for url in url_list:
1173 try:
5f6a1245 1174 # It also downloads the videos
63e0be34 1175 res = self.extract_info(url)
8222d8de 1176 except UnavailableVideoError:
6febd1c1 1177 self.report_error('unable to download video')
8222d8de 1178 except MaxDownloadsReached:
6febd1c1 1179 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1180 raise
63e0be34
PH
1181 else:
1182 if self.params.get('dump_single_json', False):
1183 self.to_stdout(json.dumps(res))
8222d8de
JMF
1184
1185 return self._download_retcode
1186
1dcc4c0c 1187 def download_with_info_file(self, info_filename):
395293a8 1188 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1189 info = json.load(f)
d4943898
JMF
1190 try:
1191 self.process_ie_result(info, download=True)
1192 except DownloadError:
1193 webpage_url = info.get('webpage_url')
1194 if webpage_url is not None:
6febd1c1 1195 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1196 return self.download([webpage_url])
1197 else:
1198 raise
1199 return self._download_retcode
1dcc4c0c 1200
8222d8de
JMF
1201 def post_process(self, filename, ie_info):
1202 """Run all the postprocessors on the given file."""
1203 info = dict(ie_info)
1204 info['filepath'] = filename
1205 keep_video = None
6350728b
JMF
1206 pps_chain = []
1207 if ie_info.get('__postprocessors') is not None:
1208 pps_chain.extend(ie_info['__postprocessors'])
1209 pps_chain.extend(self._pps)
1210 for pp in pps_chain:
8222d8de 1211 try:
fe7e0c98 1212 keep_video_wish, new_info = pp.run(info)
8222d8de
JMF
1213 if keep_video_wish is not None:
1214 if keep_video_wish:
1215 keep_video = keep_video_wish
1216 elif keep_video is None:
1217 # No clear decision yet, let IE decide
1218 keep_video = keep_video_wish
1219 except PostProcessingError as e:
bbcbf4d4 1220 self.report_error(e.msg)
8222d8de
JMF
1221 if keep_video is False and not self.params.get('keepvideo', False):
1222 try:
6febd1c1 1223 self.to_screen('Deleting original file %s (pass -k to keep)' % filename)
8222d8de
JMF
1224 os.remove(encodeFilename(filename))
1225 except (IOError, OSError):
6febd1c1 1226 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1227
5db07df6
PH
1228 def _make_archive_id(self, info_dict):
1229 # Future-proof against any change in case
1230 # and backwards compatibility with prior versions
d31209a1 1231 extractor = info_dict.get('extractor_key')
7012b23c
PH
1232 if extractor is None:
1233 if 'id' in info_dict:
1234 extractor = info_dict.get('ie_key') # key in a playlist
1235 if extractor is None:
5db07df6 1236 return None # Incomplete video information
6febd1c1 1237 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1238
1239 def in_download_archive(self, info_dict):
1240 fn = self.params.get('download_archive')
1241 if fn is None:
1242 return False
1243
1244 vid_id = self._make_archive_id(info_dict)
1245 if vid_id is None:
7012b23c 1246 return False # Incomplete video information
5db07df6 1247
c1c9a79c
PH
1248 try:
1249 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1250 for line in archive_file:
1251 if line.strip() == vid_id:
1252 return True
1253 except IOError as ioe:
1254 if ioe.errno != errno.ENOENT:
1255 raise
1256 return False
1257
1258 def record_download_archive(self, info_dict):
1259 fn = self.params.get('download_archive')
1260 if fn is None:
1261 return
5db07df6
PH
1262 vid_id = self._make_archive_id(info_dict)
1263 assert vid_id
c1c9a79c 1264 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1265 archive_file.write(vid_id + '\n')
dd82ffea 1266
8c51aa65 1267 @staticmethod
8abeeb94 1268 def format_resolution(format, default='unknown'):
fb04e403
PH
1269 if format.get('vcodec') == 'none':
1270 return 'audio only'
f49d89ee
PH
1271 if format.get('resolution') is not None:
1272 return format['resolution']
8c51aa65
JMF
1273 if format.get('height') is not None:
1274 if format.get('width') is not None:
6febd1c1 1275 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1276 else:
6febd1c1 1277 res = '%sp' % format['height']
f49d89ee 1278 elif format.get('width') is not None:
6febd1c1 1279 res = '?x%d' % format['width']
8c51aa65 1280 else:
8abeeb94 1281 res = default
8c51aa65
JMF
1282 return res
1283
c57f7757
PH
1284 def _format_note(self, fdict):
1285 res = ''
1286 if fdict.get('ext') in ['f4f', 'f4m']:
1287 res += '(unsupported) '
1288 if fdict.get('format_note') is not None:
1289 res += fdict['format_note'] + ' '
1290 if fdict.get('tbr') is not None:
1291 res += '%4dk ' % fdict['tbr']
1292 if fdict.get('container') is not None:
1293 if res:
1294 res += ', '
1295 res += '%s container' % fdict['container']
1296 if (fdict.get('vcodec') is not None and
1297 fdict.get('vcodec') != 'none'):
1298 if res:
1299 res += ', '
1300 res += fdict['vcodec']
91c7271a 1301 if fdict.get('vbr') is not None:
c57f7757
PH
1302 res += '@'
1303 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1304 res += 'video@'
1305 if fdict.get('vbr') is not None:
1306 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1307 if fdict.get('fps') is not None:
1308 res += ', %sfps' % fdict['fps']
c57f7757
PH
1309 if fdict.get('acodec') is not None:
1310 if res:
1311 res += ', '
1312 if fdict['acodec'] == 'none':
1313 res += 'video only'
1314 else:
1315 res += '%-5s' % fdict['acodec']
1316 elif fdict.get('abr') is not None:
1317 if res:
1318 res += ', '
1319 res += 'audio'
1320 if fdict.get('abr') is not None:
1321 res += '@%3dk' % fdict['abr']
1322 if fdict.get('asr') is not None:
1323 res += ' (%5dHz)' % fdict['asr']
1324 if fdict.get('filesize') is not None:
1325 if res:
1326 res += ', '
1327 res += format_bytes(fdict['filesize'])
9732d77e
PH
1328 elif fdict.get('filesize_approx') is not None:
1329 if res:
1330 res += ', '
1331 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1332 return res
91c7271a 1333
c57f7757 1334 def list_formats(self, info_dict):
02dbf93f 1335 def line(format, idlen=20):
6febd1c1 1336 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1337 format['format_id'],
1338 format['ext'],
8c51aa65 1339 self.format_resolution(format),
c57f7757 1340 self._format_note(format),
02dbf93f 1341 ))
57dd9a8f 1342
94badb25 1343 formats = info_dict.get('formats', [info_dict])
6febd1c1 1344 idlen = max(len('format code'),
02dbf93f 1345 max(len(f['format_id']) for f in formats))
e65566a9
PH
1346 formats_s = [
1347 line(f, idlen) for f in formats
1348 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1349 if len(formats) > 1:
c57f7757
PH
1350 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1351 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1352
1353 header_line = line({
6febd1c1
PH
1354 'format_id': 'format code', 'ext': 'extension',
1355 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1356 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1357 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1358
1359 def urlopen(self, req):
1360 """ Start an HTTP download """
37419b4f 1361
d05cfe06
S
1362 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1363 # always respected by websites, some tend to give out URLs with non percent-encoded
1364 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1365 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1366 # To work around aforementioned issue we will replace request's original URL with
1367 # percent-encoded one
ee0d9070 1368 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1369 url = req if req_is_string else req.get_full_url()
d05cfe06 1370 url_escaped = escape_url(url)
37419b4f
S
1371
1372 # Substitute URL if any change after escaping
1373 if url != url_escaped:
68b09730 1374 if req_is_string:
37419b4f
S
1375 req = url_escaped
1376 else:
1377 req = compat_urllib_request.Request(
1378 url_escaped, data=req.data, headers=req.headers,
1379 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1380
19a41fc6 1381 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1382
1383 def print_debug_header(self):
1384 if not self.params.get('verbose'):
1385 return
62fec3b2 1386
4192b51c
PH
1387 if type('') is not compat_str:
1388 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1389 self.report_warning(
1390 'Your Python is broken! Update to a newer and supported version')
1391
c6afed48
PH
1392 stdout_encoding = getattr(
1393 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1394 encoding_str = (
734f90bb
PH
1395 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1396 locale.getpreferredencoding(),
1397 sys.getfilesystemencoding(),
c6afed48 1398 stdout_encoding,
b0472057 1399 self.get_encoding()))
4192b51c 1400 write_string(encoding_str, encoding=None)
734f90bb
PH
1401
1402 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1403 try:
1404 sp = subprocess.Popen(
1405 ['git', 'rev-parse', '--short', 'HEAD'],
1406 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1407 cwd=os.path.dirname(os.path.abspath(__file__)))
1408 out, err = sp.communicate()
1409 out = out.decode().strip()
1410 if re.match('[0-9a-f]+', out):
734f90bb 1411 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1412 except:
1413 try:
1414 sys.exc_clear()
1415 except:
1416 pass
d28b5171
PH
1417 self._write_string('[debug] Python version %s - %s\n' % (
1418 platform.python_version(), platform_name()))
1419
1420 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1421 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1422 exe_str = ', '.join(
1423 '%s %s' % (exe, v)
1424 for exe, v in sorted(exe_versions.items())
1425 if v
1426 )
1427 if not exe_str:
1428 exe_str = 'none'
1429 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1430
1431 proxy_map = {}
1432 for handler in self._opener.handlers:
1433 if hasattr(handler, 'proxies'):
1434 proxy_map.update(handler.proxies)
734f90bb 1435 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1436
e344693b 1437 def _setup_opener(self):
6ad14cab 1438 timeout_val = self.params.get('socket_timeout')
19a41fc6 1439 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1440
dca08720
PH
1441 opts_cookiefile = self.params.get('cookiefile')
1442 opts_proxy = self.params.get('proxy')
1443
1444 if opts_cookiefile is None:
1445 self.cookiejar = compat_cookiejar.CookieJar()
1446 else:
1447 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1448 opts_cookiefile)
1449 if os.access(opts_cookiefile, os.R_OK):
1450 self.cookiejar.load()
1451
1452 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1453 self.cookiejar)
1454 if opts_proxy is not None:
1455 if opts_proxy == '':
1456 proxies = {}
1457 else:
1458 proxies = {'http': opts_proxy, 'https': opts_proxy}
1459 else:
1460 proxies = compat_urllib_request.getproxies()
1461 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1462 if 'http' in proxies and 'https' not in proxies:
1463 proxies['https'] = proxies['http']
1464 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1465
1466 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
dca08720 1467 https_handler = make_HTTPS_handler(
a0ddb8a2
PH
1468 self.params.get('nocheckcertificate', False), debuglevel=debuglevel)
1469 ydlh = YoutubeDLHandler(debuglevel=debuglevel)
dca08720 1470 opener = compat_urllib_request.build_opener(
a0ddb8a2 1471 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1472 # Delete the default user-agent header, which would otherwise apply in
1473 # cases where our custom HTTP handler doesn't come into play
1474 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1475 opener.addheaders = []
1476 self._opener = opener
62fec3b2
PH
1477
1478 def encode(self, s):
1479 if isinstance(s, bytes):
1480 return s # Already encoded
1481
1482 try:
1483 return s.encode(self.get_encoding())
1484 except UnicodeEncodeError as err:
1485 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1486 raise
1487
1488 def get_encoding(self):
1489 encoding = self.params.get('encoding')
1490 if encoding is None:
1491 encoding = preferredencoding()
1492 return encoding