]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
YoutubeDL: Make the decision about removing the original file after each postprocesso...
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
9d2ecdbc 7import datetime
c1c9a79c 8import errno
8222d8de 9import io
b82f815f 10import itertools
8694c600 11import json
62fec3b2 12import locale
8222d8de 13import os
dca08720 14import platform
8222d8de
JMF
15import re
16import shutil
dca08720 17import subprocess
8222d8de
JMF
18import socket
19import sys
20import time
21import traceback
22
1e5b9a95
PH
23if os.name == 'nt':
24 import ctypes
25
8c25f81b 26from .compat import (
dca08720 27 compat_cookiejar,
4644ac55 28 compat_expanduser,
ce02ed60 29 compat_http_client,
4f026faf 30 compat_kwargs,
ce02ed60
PH
31 compat_str,
32 compat_urllib_error,
33 compat_urllib_request,
8c25f81b
PH
34)
35from .utils import (
d05cfe06 36 escape_url,
ce02ed60
PH
37 ContentTooShortError,
38 date_from_str,
39 DateRange,
acd69589 40 DEFAULT_OUTTMPL,
ce02ed60
PH
41 determine_ext,
42 DownloadError,
43 encodeFilename,
44 ExtractorError,
02dbf93f 45 format_bytes,
525ef922 46 formatSeconds,
1c088fa8 47 get_term_width,
ce02ed60 48 locked_file,
dca08720 49 make_HTTPS_handler,
ce02ed60 50 MaxDownloadsReached,
b7ab0590 51 PagedList,
ce02ed60 52 PostProcessingError,
dca08720 53 platform_name,
ce02ed60
PH
54 preferredencoding,
55 SameFileError,
56 sanitize_filename,
57 subtitles_filename,
58 takewhile_inclusive,
59 UnavailableVideoError,
29eb5174 60 url_basename,
58b1f00d 61 version_tuple,
ce02ed60
PH
62 write_json_file,
63 write_string,
dca08720 64 YoutubeDLHandler,
6350728b 65 prepend_extension,
7d4111ed 66 args_to_str,
05900629 67 age_restricted,
ce02ed60 68)
a0e07d31 69from .cache import Cache
023fa8c4 70from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 71from .downloader import get_suitable_downloader
4c83c967 72from .downloader.rtmp import rtmpdump_version
4f026faf 73from .postprocessor import (
6271f1ca 74 FFmpegFixupStretchedPP,
4f026faf
PH
75 FFmpegMergerPP,
76 FFmpegPostProcessor,
77 get_postprocessor,
78)
dca08720 79from .version import __version__
8222d8de
JMF
80
81
82class YoutubeDL(object):
83 """YoutubeDL class.
84
85 YoutubeDL objects are the ones responsible of downloading the
86 actual video file and writing it to disk if the user has requested
87 it, among some other tasks. In most cases there should be one per
88 program. As, given a video URL, the downloader doesn't know how to
89 extract all the needed information, task that InfoExtractors do, it
90 has to pass the URL to one of them.
91
92 For this, YoutubeDL objects have a method that allows
93 InfoExtractors to be registered in a given order. When it is passed
94 a URL, the YoutubeDL object handles it to the first InfoExtractor it
95 finds that reports being able to handle it. The InfoExtractor extracts
96 all the information about the video or videos the URL refers to, and
97 YoutubeDL process the extracted information, possibly using a File
98 Downloader to download the video.
99
100 YoutubeDL objects accept a lot of parameters. In order not to saturate
101 the object constructor with arguments, it receives a dictionary of
102 options instead. These options are available through the params
103 attribute for the InfoExtractors to use. The YoutubeDL also
104 registers itself as the downloader in charge for the InfoExtractors
105 that are added to it, so this is a "mutual registration".
106
107 Available options:
108
109 username: Username for authentication purposes.
110 password: Password for authentication purposes.
c6c19746 111 videopassword: Password for acces a video.
8222d8de
JMF
112 usenetrc: Use netrc for authentication instead.
113 verbose: Print additional info to stdout.
114 quiet: Do not print messages to stdout.
ad8915b7 115 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
116 forceurl: Force printing final URL.
117 forcetitle: Force printing title.
118 forceid: Force printing ID.
119 forcethumbnail: Force printing thumbnail URL.
120 forcedescription: Force printing description.
121 forcefilename: Force printing final filename.
525ef922 122 forceduration: Force printing duration.
8694c600 123 forcejson: Force printing info_dict as JSON.
63e0be34
PH
124 dump_single_json: Force printing the info_dict of the whole playlist
125 (or video) as a single JSON line.
8222d8de 126 simulate: Do not download the video files.
d8600787 127 format: Video format code. See options.py for more information.
8222d8de
JMF
128 format_limit: Highest quality format to try.
129 outtmpl: Template for output names.
130 restrictfilenames: Do not allow "&" and spaces in file names
131 ignoreerrors: Do not stop on download errors.
132 nooverwrites: Prevent overwriting files.
133 playliststart: Playlist item to start at.
134 playlistend: Playlist item to end at.
ff815fe6 135 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
136 matchtitle: Download only matching titles.
137 rejecttitle: Reject downloads for matching titles.
8bf9319e 138 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
139 logtostderr: Log messages to stderr instead of stdout.
140 writedescription: Write the video description to a .description file
141 writeinfojson: Write the video description to a .info.json file
1fb07d10 142 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de
JMF
143 writethumbnail: Write the thumbnail image to a file
144 writesubtitles: Write the video subtitles to a file
b004821f 145 writeautomaticsub: Write the automatic subtitles to a file
8222d8de 146 allsubtitles: Downloads all the subtitles of the video
0b7f3118 147 (requires writesubtitles or writeautomaticsub)
8222d8de 148 listsubtitles: Lists all available subtitles for the video
b98a6b2f 149 subtitlesformat: Subtitle format [srt/sbv/vtt] (default=srt)
aa6a10c4 150 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
151 keepvideo: Keep the video file after post-processing
152 daterange: A DateRange object, download only if the upload_date is in the range.
153 skip_download: Skip the actual download of the video file
c35f9e72 154 cachedir: Location of the cache files in the filesystem.
a0e07d31 155 False to disable filesystem cache.
47192f92 156 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
157 age_limit: An integer representing the user's age in years.
158 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
159 min_views: An integer representing the minimum view count the video
160 must have in order to not be skipped.
161 Videos without view count information are always
162 downloaded. None for no limit.
163 max_views: An integer representing the maximum view count.
164 Videos that are more popular than that are not
165 downloaded.
166 Videos without view count information are always
167 downloaded. None for no limit.
168 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
169 Videos already present in the file are not downloaded
170 again.
dca08720 171 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 172 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
173 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
174 At the moment, this is only supported by YouTube.
a1ee09e8 175 proxy: URL of the proxy server to use
e344693b 176 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
177 bidi_workaround: Work around buggy terminals without bidirectional text
178 support, using fridibi
a0ddb8a2 179 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 180 include_ads: Download ads as well
04b4d394
PH
181 default_search: Prepend this string if an input url is not valid.
182 'auto' for elaborate guessing
62fec3b2 183 encoding: Use this encoding instead of the system-specified.
e8ee972c 184 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
185 Pass in 'in_playlist' to only show this behavior for
186 playlist items.
4f026faf 187 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
188 * key: The name of the postprocessor. See
189 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
190 as well as any further keyword arguments for the
191 postprocessor.
71b640cc
PH
192 progress_hooks: A list of functions that get called on download
193 progress, with a dictionary with the entries
194 * filename: The final filename
195 * status: One of "downloading" and "finished"
196
197 The dict may also have some of the following entries:
198
199 * downloaded_bytes: Bytes on disk
200 * total_bytes: Size of the whole file, None if unknown
201 * tmpfilename: The filename we're currently writing to
202 * eta: The estimated time in seconds, None if unknown
203 * speed: The download speed in bytes/second, None if
204 unknown
205
206 Progress hooks are guaranteed to be called at least once
207 (with status "finished") if the download is successful.
45598f15 208 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
209 fixup: Automatically correct known faults of the file.
210 One of:
211 - "never": do nothing
212 - "warn": only emit a warning
213 - "detect_or_warn": check whether we can do anything
214 about it, warn otherwise
be4a824d 215 source_address: (Experimental) Client-side IP address to bind to.
8bfa7545
PH
216 call_home: Boolean, true iff we are allowed to contact the
217 youtube-dl servers for debugging.
71b640cc 218
fe7e0c98 219
8222d8de
JMF
220 The following parameters are not used by YoutubeDL itself, they are used by
221 the FileDownloader:
222 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
223 noresizebuffer, retries, continuedl, noprogress, consoletitle
76b1bd67
JMF
224
225 The following options are used by the post processors:
226 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
227 otherwise prefer avconv.
8d31fa3c 228 exec_cmd: Arbitrary command to run after downloading
8222d8de
JMF
229 """
230
231 params = None
232 _ies = []
233 _pps = []
234 _download_retcode = None
235 _num_downloads = None
236 _screen_file = None
237
3511266b 238 def __init__(self, params=None, auto_init=True):
8222d8de 239 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
240 if params is None:
241 params = {}
8222d8de 242 self._ies = []
56c73665 243 self._ies_instances = {}
8222d8de 244 self._pps = []
933605d7 245 self._progress_hooks = []
8222d8de
JMF
246 self._download_retcode = 0
247 self._num_downloads = 0
248 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 249 self._err_file = sys.stderr
e9f9a10f 250 self.params = params
a0e07d31 251 self.cache = Cache(self)
34308b30 252
0783b09b 253 if params.get('bidi_workaround', False):
1c088fa8
PH
254 try:
255 import pty
256 master, slave = pty.openpty()
257 width = get_term_width()
258 if width is None:
259 width_args = []
260 else:
261 width_args = ['-w', str(width)]
5d681e96 262 sp_kwargs = dict(
1c088fa8
PH
263 stdin=subprocess.PIPE,
264 stdout=slave,
265 stderr=self._err_file)
5d681e96
PH
266 try:
267 self._output_process = subprocess.Popen(
268 ['bidiv'] + width_args, **sp_kwargs
269 )
270 except OSError:
5d681e96
PH
271 self._output_process = subprocess.Popen(
272 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
273 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
274 except OSError as ose:
275 if ose.errno == 2:
6febd1c1 276 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
277 else:
278 raise
0783b09b 279
34308b30
PH
280 if (sys.version_info >= (3,) and sys.platform != 'win32' and
281 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968']
53d9009b 282 and not params.get('restrictfilenames', False)):
34308b30
PH
283 # On Python 3, the Unicode filesystem API will throw errors (#1474)
284 self.report_warning(
6febd1c1 285 'Assuming --restrict-filenames since file system encoding '
1b725173 286 'cannot encode all characters. '
6febd1c1 287 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 288 self.params['restrictfilenames'] = True
34308b30 289
a3927cf7 290 if '%(stitle)s' in self.params.get('outtmpl', ''):
6febd1c1 291 self.report_warning('%(stitle)s is deprecated. Use the %(title)s and the --restrict-filenames flag(which also secures %(uploader)s et al) instead.')
8222d8de 292
dca08720
PH
293 self._setup_opener()
294
3511266b
PH
295 if auto_init:
296 self.print_debug_header()
297 self.add_default_info_extractors()
298
4f026faf
PH
299 for pp_def_raw in self.params.get('postprocessors', []):
300 pp_class = get_postprocessor(pp_def_raw['key'])
301 pp_def = dict(pp_def_raw)
302 del pp_def['key']
303 pp = pp_class(self, **compat_kwargs(pp_def))
304 self.add_post_processor(pp)
305
71b640cc
PH
306 for ph in self.params.get('progress_hooks', []):
307 self.add_progress_hook(ph)
308
7d4111ed
PH
309 def warn_if_short_id(self, argv):
310 # short YouTube ID starting with dash?
311 idxs = [
312 i for i, a in enumerate(argv)
313 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
314 if idxs:
315 correct_argv = (
316 ['youtube-dl'] +
317 [a for i, a in enumerate(argv) if i not in idxs] +
318 ['--'] + [argv[i] for i in idxs]
319 )
320 self.report_warning(
321 'Long argument string detected. '
322 'Use -- to separate parameters and URLs, like this:\n%s\n' %
323 args_to_str(correct_argv))
324
8222d8de
JMF
325 def add_info_extractor(self, ie):
326 """Add an InfoExtractor object to the end of the list."""
327 self._ies.append(ie)
56c73665 328 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
329 ie.set_downloader(self)
330
56c73665
JMF
331 def get_info_extractor(self, ie_key):
332 """
333 Get an instance of an IE with name ie_key, it will try to get one from
334 the _ies list, if there's no instance it will create a new one and add
335 it to the extractor list.
336 """
337 ie = self._ies_instances.get(ie_key)
338 if ie is None:
339 ie = get_info_extractor(ie_key)()
340 self.add_info_extractor(ie)
341 return ie
342
023fa8c4
JMF
343 def add_default_info_extractors(self):
344 """
345 Add the InfoExtractors returned by gen_extractors to the end of the list
346 """
347 for ie in gen_extractors():
348 self.add_info_extractor(ie)
349
8222d8de
JMF
350 def add_post_processor(self, pp):
351 """Add a PostProcessor object to the end of the chain."""
352 self._pps.append(pp)
353 pp.set_downloader(self)
354
933605d7
JMF
355 def add_progress_hook(self, ph):
356 """Add the progress hook (currently only for the file downloader)"""
357 self._progress_hooks.append(ph)
8ab470f1 358
1c088fa8 359 def _bidi_workaround(self, message):
5d681e96 360 if not hasattr(self, '_output_channel'):
1c088fa8
PH
361 return message
362
5d681e96 363 assert hasattr(self, '_output_process')
11b85ce6 364 assert isinstance(message, compat_str)
6febd1c1
PH
365 line_count = message.count('\n') + 1
366 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 367 self._output_process.stdin.flush()
6febd1c1 368 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 369 for _ in range(line_count))
6febd1c1 370 return res[:-len('\n')]
1c088fa8 371
8222d8de 372 def to_screen(self, message, skip_eol=False):
0783b09b
PH
373 """Print message to stdout if not in quiet mode."""
374 return self.to_stdout(message, skip_eol, check_quiet=True)
375
734f90bb 376 def _write_string(self, s, out=None):
b58ddb32 377 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 378
0783b09b 379 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 380 """Print message to stdout if not in quiet mode."""
8bf9319e 381 if self.params.get('logger'):
43afe285 382 self.params['logger'].debug(message)
0783b09b 383 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 384 message = self._bidi_workaround(message)
6febd1c1 385 terminator = ['\n', ''][skip_eol]
8222d8de 386 output = message + terminator
1c088fa8 387
734f90bb 388 self._write_string(output, self._screen_file)
8222d8de
JMF
389
390 def to_stderr(self, message):
391 """Print message to stderr."""
11b85ce6 392 assert isinstance(message, compat_str)
8bf9319e 393 if self.params.get('logger'):
43afe285
IB
394 self.params['logger'].error(message)
395 else:
1c088fa8 396 message = self._bidi_workaround(message)
6febd1c1 397 output = message + '\n'
734f90bb 398 self._write_string(output, self._err_file)
8222d8de 399
1e5b9a95
PH
400 def to_console_title(self, message):
401 if not self.params.get('consoletitle', False):
402 return
403 if os.name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
404 # c_wchar_p() might not be necessary if `message` is
405 # already of type unicode()
406 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
407 elif 'TERM' in os.environ:
734f90bb 408 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 409
bdde425c
PH
410 def save_console_title(self):
411 if not self.params.get('consoletitle', False):
412 return
413 if 'TERM' in os.environ:
efd6c574 414 # Save the title on stack
734f90bb 415 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
416
417 def restore_console_title(self):
418 if not self.params.get('consoletitle', False):
419 return
420 if 'TERM' in os.environ:
efd6c574 421 # Restore the title from stack
734f90bb 422 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
423
424 def __enter__(self):
425 self.save_console_title()
426 return self
427
428 def __exit__(self, *args):
429 self.restore_console_title()
f89197d7 430
dca08720
PH
431 if self.params.get('cookiefile') is not None:
432 self.cookiejar.save()
bdde425c 433
8222d8de
JMF
434 def trouble(self, message=None, tb=None):
435 """Determine action to take when a download problem appears.
436
437 Depending on if the downloader has been configured to ignore
438 download errors or not, this method may throw an exception or
439 not when errors are found, after printing the message.
440
441 tb, if given, is additional traceback information.
442 """
443 if message is not None:
444 self.to_stderr(message)
445 if self.params.get('verbose'):
446 if tb is None:
447 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 448 tb = ''
8222d8de 449 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 450 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
8222d8de
JMF
451 tb += compat_str(traceback.format_exc())
452 else:
453 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 454 tb = ''.join(tb_data)
8222d8de
JMF
455 self.to_stderr(tb)
456 if not self.params.get('ignoreerrors', False):
457 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
458 exc_info = sys.exc_info()[1].exc_info
459 else:
460 exc_info = sys.exc_info()
461 raise DownloadError(message, exc_info)
462 self._download_retcode = 1
463
464 def report_warning(self, message):
465 '''
466 Print the message to stderr, it will be prefixed with 'WARNING:'
467 If stderr is a tty file the 'WARNING:' will be colored
468 '''
6d07ce01
JMF
469 if self.params.get('logger') is not None:
470 self.params['logger'].warning(message)
8222d8de 471 else:
ad8915b7
PH
472 if self.params.get('no_warnings'):
473 return
6d07ce01
JMF
474 if self._err_file.isatty() and os.name != 'nt':
475 _msg_header = '\033[0;33mWARNING:\033[0m'
476 else:
477 _msg_header = 'WARNING:'
478 warning_message = '%s %s' % (_msg_header, message)
479 self.to_stderr(warning_message)
8222d8de
JMF
480
481 def report_error(self, message, tb=None):
482 '''
483 Do the same as trouble, but prefixes the message with 'ERROR:', colored
484 in red if stderr is a tty file.
485 '''
0783b09b 486 if self._err_file.isatty() and os.name != 'nt':
6febd1c1 487 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 488 else:
6febd1c1
PH
489 _msg_header = 'ERROR:'
490 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
491 self.trouble(error_message, tb)
492
8222d8de
JMF
493 def report_file_already_downloaded(self, file_name):
494 """Report file has already been fully downloaded."""
495 try:
6febd1c1 496 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 497 except UnicodeEncodeError:
6febd1c1 498 self.to_screen('[download] The file has already been downloaded')
8222d8de 499
8222d8de
JMF
500 def prepare_filename(self, info_dict):
501 """Generate the output filename."""
502 try:
503 template_dict = dict(info_dict)
504
505 template_dict['epoch'] = int(time.time())
506 autonumber_size = self.params.get('autonumber_size')
507 if autonumber_size is None:
508 autonumber_size = 5
6febd1c1 509 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 510 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 511 if template_dict.get('playlist_index') is not None:
c6b4132a 512 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
513 if template_dict.get('resolution') is None:
514 if template_dict.get('width') and template_dict.get('height'):
515 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
516 elif template_dict.get('height'):
805ef3c6 517 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 518 elif template_dict.get('width'):
805ef3c6 519 template_dict['resolution'] = '?x%d' % template_dict['width']
8222d8de 520
586a91b6 521 sanitize = lambda k, v: sanitize_filename(
45598aab 522 compat_str(v),
8222d8de 523 restricted=self.params.get('restrictfilenames'),
6febd1c1 524 is_id=(k == 'id'))
586a91b6 525 template_dict = dict((k, sanitize(k, v))
45598aab
PH
526 for k, v in template_dict.items()
527 if v is not None)
6febd1c1 528 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 529
acd69589 530 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 531 tmpl = compat_expanduser(outtmpl)
586a91b6 532 filename = tmpl % template_dict
8222d8de 533 return filename
8222d8de 534 except ValueError as err:
6febd1c1 535 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
536 return None
537
538 def _match_entry(self, info_dict):
539 """ Returns None iff the file should be downloaded """
540
6febd1c1 541 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
542 if 'title' in info_dict:
543 # This can happen when we're just evaluating the playlist
544 title = info_dict['title']
545 matchtitle = self.params.get('matchtitle', False)
546 if matchtitle:
547 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 548 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
549 rejecttitle = self.params.get('rejecttitle', False)
550 if rejecttitle:
551 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 552 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
8222d8de
JMF
553 date = info_dict.get('upload_date', None)
554 if date is not None:
555 dateRange = self.params.get('daterange', DateRange())
556 if date not in dateRange:
6febd1c1 557 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
5fe18bdb
PH
558 view_count = info_dict.get('view_count', None)
559 if view_count is not None:
560 min_views = self.params.get('min_views')
561 if min_views is not None and view_count < min_views:
6febd1c1 562 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
563 max_views = self.params.get('max_views')
564 if max_views is not None and view_count > max_views:
6febd1c1 565 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629
PH
566 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
567 return 'Skipping "%s" because it is age restricted' % title
c1c9a79c 568 if self.in_download_archive(info_dict):
6febd1c1 569 return '%s has already been recorded in archive' % video_title
8222d8de 570 return None
fe7e0c98 571
b6c45014
JMF
572 @staticmethod
573 def add_extra_info(info_dict, extra_info):
574 '''Set the keys from extra_info in info dict if they are missing'''
575 for key, value in extra_info.items():
576 info_dict.setdefault(key, value)
577
7fc3fa05
PH
578 def extract_info(self, url, download=True, ie_key=None, extra_info={},
579 process=True):
8222d8de
JMF
580 '''
581 Returns a list with a dictionary for each video we find.
582 If 'download', also downloads the videos.
583 extra_info is a dict containing the extra values to add to each result
584 '''
fe7e0c98 585
8222d8de 586 if ie_key:
56c73665 587 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
588 else:
589 ies = self._ies
590
591 for ie in ies:
592 if not ie.suitable(url):
593 continue
594
595 if not ie.working():
6febd1c1
PH
596 self.report_warning('The program functionality for this site has been marked as broken, '
597 'and will probably not work.')
8222d8de
JMF
598
599 try:
600 ie_result = ie.extract(url)
5f6a1245 601 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
602 break
603 if isinstance(ie_result, list):
604 # Backwards compatibility: old IE result format
8222d8de
JMF
605 ie_result = {
606 '_type': 'compat_list',
607 'entries': ie_result,
608 }
ea38e55f 609 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
610 if process:
611 return self.process_ie_result(ie_result, download, extra_info)
612 else:
613 return ie_result
5f6a1245 614 except ExtractorError as de: # An error we somewhat expected
8222d8de
JMF
615 self.report_error(compat_str(de), de.format_traceback())
616 break
d3e5bbf4
PH
617 except MaxDownloadsReached:
618 raise
8222d8de
JMF
619 except Exception as e:
620 if self.params.get('ignoreerrors', False):
621 self.report_error(compat_str(e), tb=compat_str(traceback.format_exc()))
622 break
623 else:
624 raise
625 else:
1a489545 626 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 627
ea38e55f
PH
628 def add_default_extra_info(self, ie_result, ie, url):
629 self.add_extra_info(ie_result, {
630 'extractor': ie.IE_NAME,
631 'webpage_url': url,
632 'webpage_url_basename': url_basename(url),
633 'extractor_key': ie.ie_key(),
634 })
635
8222d8de
JMF
636 def process_ie_result(self, ie_result, download=True, extra_info={}):
637 """
638 Take the result of the ie(may be modified) and resolve all unresolved
639 references (URLs, playlist items).
640
641 It will also download the videos if 'download'.
642 Returns the resolved ie_result.
643 """
644
e8ee972c
PH
645 result_type = ie_result.get('_type', 'video')
646
057a5206
PH
647 if result_type in ('url', 'url_transparent'):
648 extract_flat = self.params.get('extract_flat', False)
649 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
650 extract_flat is True):
057a5206
PH
651 if self.params.get('forcejson', False):
652 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
653 return ie_result
654
8222d8de 655 if result_type == 'video':
b6c45014 656 self.add_extra_info(ie_result, extra_info)
feee2ecf 657 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
658 elif result_type == 'url':
659 # We have to add extra_info to the results because it may be
660 # contained in a playlist
661 return self.extract_info(ie_result['url'],
662 download,
663 ie_key=ie_result.get('ie_key'),
664 extra_info=extra_info)
7fc3fa05
PH
665 elif result_type == 'url_transparent':
666 # Use the information from the embedding page
667 info = self.extract_info(
668 ie_result['url'], ie_key=ie_result.get('ie_key'),
669 extra_info=extra_info, download=False, process=False)
670
412c617d
PH
671 force_properties = dict(
672 (k, v) for k, v in ie_result.items() if v is not None)
673 for f in ('_type', 'url'):
674 if f in force_properties:
675 del force_properties[f]
676 new_result = info.copy()
677 new_result.update(force_properties)
7fc3fa05
PH
678
679 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
680
681 return self.process_ie_result(
682 new_result, download=download, extra_info=extra_info)
42e12102 683 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de
JMF
684 # We process each entry in the playlist
685 playlist = ie_result.get('title', None) or ie_result.get('id', None)
6febd1c1 686 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
687
688 playlist_results = []
689
8222d8de 690 playliststart = self.params.get('playliststart', 1) - 1
a19fd00c
PH
691 playlistend = self.params.get('playlistend', None)
692 # For backwards compatibility, interpret -1 as whole list
8222d8de 693 if playlistend == -1:
a19fd00c 694 playlistend = None
8222d8de 695
b82f815f
PH
696 ie_entries = ie_result['entries']
697 if isinstance(ie_entries, list):
698 n_all_entries = len(ie_entries)
699 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
700 n_entries = len(entries)
701 self.to_screen(
702 "[%s] playlist %s: Collected %d video ids (downloading %d of them)" %
703 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f
PH
704 elif isinstance(ie_entries, PagedList):
705 entries = ie_entries.getslice(
b7ab0590
PH
706 playliststart, playlistend)
707 n_entries = len(entries)
708 self.to_screen(
709 "[%s] playlist %s: Downloading %d videos" %
710 (ie_result['extractor'], playlist, n_entries))
b82f815f
PH
711 else: # iterable
712 entries = list(itertools.islice(
713 ie_entries, playliststart, playlistend))
714 n_entries = len(entries)
715 self.to_screen(
716 "[%s] playlist %s: Downloading %d videos" %
717 (ie_result['extractor'], playlist, n_entries))
8222d8de 718
ff815fe6
MS
719 if self.params.get('playlistreverse', False):
720 entries = entries[::-1]
721
fe7e0c98 722 for i, entry in enumerate(entries, 1):
734ea11e 723 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 724 extra = {
c6b4132a 725 'n_entries': n_entries,
fe7e0c98 726 'playlist': playlist,
a1cf99d0
PH
727 'playlist_id': ie_result.get('id'),
728 'playlist_title': ie_result.get('title'),
fe7e0c98 729 'playlist_index': i + playliststart,
b6c45014 730 'extractor': ie_result['extractor'],
9103bbc5 731 'webpage_url': ie_result['webpage_url'],
29eb5174 732 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 733 'extractor_key': ie_result['extractor_key'],
fe7e0c98 734 }
7012b23c
PH
735
736 reason = self._match_entry(entry)
737 if reason is not None:
6febd1c1 738 self.to_screen('[download] ' + reason)
7012b23c
PH
739 continue
740
8222d8de
JMF
741 entry_result = self.process_ie_result(entry,
742 download=download,
743 extra_info=extra)
744 playlist_results.append(entry_result)
745 ie_result['entries'] = playlist_results
746 return ie_result
747 elif result_type == 'compat_list':
c9bf4114
PH
748 self.report_warning(
749 'Extractor %s returned a compat_list result. '
750 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 751
8222d8de 752 def _fixup(r):
9e1a5b84
JW
753 self.add_extra_info(
754 r,
9103bbc5
JMF
755 {
756 'extractor': ie_result['extractor'],
757 'webpage_url': ie_result['webpage_url'],
29eb5174 758 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 759 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
760 }
761 )
8222d8de
JMF
762 return r
763 ie_result['entries'] = [
b6c45014 764 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
765 for r in ie_result['entries']
766 ]
767 return ie_result
768 else:
769 raise Exception('Invalid result type: %s' % result_type)
770
a9c58ad9
JMF
771 def select_format(self, format_spec, available_formats):
772 if format_spec == 'best' or format_spec is None:
773 return available_formats[-1]
774 elif format_spec == 'worst':
775 return available_formats[0]
ba7678f9
PH
776 elif format_spec == 'bestaudio':
777 audio_formats = [
778 f for f in available_formats
779 if f.get('vcodec') == 'none']
780 if audio_formats:
781 return audio_formats[-1]
782 elif format_spec == 'worstaudio':
783 audio_formats = [
784 f for f in available_formats
785 if f.get('vcodec') == 'none']
786 if audio_formats:
787 return audio_formats[0]
bc6d5978
JMF
788 elif format_spec == 'bestvideo':
789 video_formats = [
790 f for f in available_formats
791 if f.get('acodec') == 'none']
792 if video_formats:
793 return video_formats[-1]
794 elif format_spec == 'worstvideo':
795 video_formats = [
796 f for f in available_formats
797 if f.get('acodec') == 'none']
798 if video_formats:
799 return video_formats[0]
a9c58ad9 800 else:
0217c783 801 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
49e86983
JMF
802 if format_spec in extensions:
803 filter_f = lambda f: f['ext'] == format_spec
804 else:
805 filter_f = lambda f: f['format_id'] == format_spec
fe7e0c98 806 matches = list(filter(filter_f, available_formats))
a9c58ad9
JMF
807 if matches:
808 return matches[-1]
809 return None
810
dd82ffea
JMF
811 def process_video_result(self, info_dict, download=True):
812 assert info_dict.get('_type', 'video') == 'video'
813
bec1fad2
PH
814 if 'id' not in info_dict:
815 raise ExtractorError('Missing "id" field in extractor result')
816 if 'title' not in info_dict:
817 raise ExtractorError('Missing "title" field in extractor result')
818
dd82ffea
JMF
819 if 'playlist' not in info_dict:
820 # It isn't part of a playlist
821 info_dict['playlist'] = None
822 info_dict['playlist_index'] = None
823
d5519808
PH
824 thumbnails = info_dict.get('thumbnails')
825 if thumbnails:
be6d7229
PH
826 thumbnails.sort(key=lambda t: (
827 t.get('width'), t.get('height'), t.get('url')))
d5519808
PH
828 for t in thumbnails:
829 if 'width' in t and 'height' in t:
830 t['resolution'] = '%dx%d' % (t['width'], t['height'])
831
832 if thumbnails and 'thumbnail' not in info_dict:
833 info_dict['thumbnail'] = thumbnails[-1]['url']
834
c9ae7b95 835 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
836 info_dict['display_id'] = info_dict['id']
837
955c4514 838 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
706d7d4e
S
839 # Working around negative timestamps in Windows
840 # (see http://bugs.python.org/issue1646728)
841 if info_dict['timestamp'] < 0 and os.name == 'nt':
842 info_dict['timestamp'] = 0
9d2ecdbc 843 upload_date = datetime.datetime.utcfromtimestamp(
955c4514 844 info_dict['timestamp'])
9d2ecdbc
PH
845 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
846
6ff000b8 847 # This extractors handle format selection themselves
6febd1c1 848 if info_dict['extractor'] in ['Youku']:
12893efe
JMF
849 if download:
850 self.process_info(info_dict)
6ff000b8
JMF
851 return info_dict
852
dd82ffea
JMF
853 # We now pick which formats have to be downloaded
854 if info_dict.get('formats') is None:
855 # There's only one format available
856 formats = [info_dict]
857 else:
858 formats = info_dict['formats']
859
db95dc13
PH
860 if not formats:
861 raise ExtractorError('No video formats found!')
862
dd82ffea 863 # We check that all the formats have the format and format_id fields
db95dc13 864 for i, format in enumerate(formats):
bec1fad2
PH
865 if 'url' not in format:
866 raise ExtractorError('Missing "url" key in result (index %d)' % i)
867
dd82ffea 868 if format.get('format_id') is None:
8016c922 869 format['format_id'] = compat_str(i)
8c51aa65 870 if format.get('format') is None:
6febd1c1 871 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
872 id=format['format_id'],
873 res=self.format_resolution(format),
6febd1c1 874 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 875 )
c1002e96
PH
876 # Automatically determine file extension if missing
877 if 'ext' not in format:
cce929ea 878 format['ext'] = determine_ext(format['url']).lower()
dd82ffea 879
99e206d5
JMF
880 format_limit = self.params.get('format_limit', None)
881 if format_limit:
f4d96df0
PH
882 formats = list(takewhile_inclusive(
883 lambda f: f['format_id'] != format_limit, formats
884 ))
4bcc7bd1
PH
885
886 # TODO Central sorting goes here
99e206d5 887
f89197d7 888 if formats[0] is not info_dict:
b3d9ef88
JMF
889 # only set the 'formats' fields if the original info_dict list them
890 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 891 # element in the 'formats' field in info_dict is info_dict itself,
b3d9ef88
JMF
892 # wich can't be exported to json
893 info_dict['formats'] = formats
bfaae0a7 894 if self.params.get('listformats', None):
895 self.list_formats(info_dict)
896 return
897
de3ef3ed 898 req_format = self.params.get('format')
a9c58ad9
JMF
899 if req_format is None:
900 req_format = 'best'
dd82ffea 901 formats_to_download = []
dd82ffea 902 # The -1 is for supporting YoutubeIE
a9c58ad9 903 if req_format in ('-1', 'all'):
dd82ffea
JMF
904 formats_to_download = formats
905 else:
1de33faf
PH
906 for rfstr in req_format.split(','):
907 # We can accept formats requested in the format: 34/5/best, we pick
908 # the first that is available, starting from left
909 req_formats = rfstr.split('/')
910 for rf in req_formats:
911 if re.match(r'.+?\+.+?', rf) is not None:
912 # Two formats have been requested like '137+139'
913 format_1, format_2 = rf.split('+')
914 formats_info = (self.select_format(format_1, formats),
9e1a5b84 915 self.select_format(format_2, formats))
1de33faf 916 if all(formats_info):
c2954908
JMF
917 # The first format must contain the video and the
918 # second the audio
919 if formats_info[0].get('vcodec') == 'none':
920 self.report_error('The first format must '
9e1a5b84
JW
921 'contain the video, try using '
922 '"-f %s+%s"' % (format_2, format_1))
c2954908 923 return
45598f15
PH
924 output_ext = (
925 formats_info[0]['ext']
926 if self.params.get('merge_output_format') is None
927 else self.params['merge_output_format'])
1de33faf
PH
928 selected_format = {
929 'requested_formats': formats_info,
930 'format': rf,
931 'ext': formats_info[0]['ext'],
f90ad273
PH
932 'width': formats_info[0].get('width'),
933 'height': formats_info[0].get('height'),
934 'resolution': formats_info[0].get('resolution'),
935 'fps': formats_info[0].get('fps'),
936 'vcodec': formats_info[0].get('vcodec'),
937 'vbr': formats_info[0].get('vbr'),
6271f1ca 938 'stretched_ratio': formats_info[0].get('stretched_ratio'),
f90ad273
PH
939 'acodec': formats_info[1].get('acodec'),
940 'abr': formats_info[1].get('abr'),
45598f15 941 'ext': output_ext,
1de33faf
PH
942 }
943 else:
944 selected_format = None
6350728b 945 else:
1de33faf
PH
946 selected_format = self.select_format(rf, formats)
947 if selected_format is not None:
948 formats_to_download.append(selected_format)
949 break
dd82ffea 950 if not formats_to_download:
6febd1c1 951 raise ExtractorError('requested format not available',
78a3a9f8 952 expected=True)
dd82ffea
JMF
953
954 if download:
955 if len(formats_to_download) > 1:
6febd1c1 956 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
957 for format in formats_to_download:
958 new_info = dict(info_dict)
959 new_info.update(format)
960 self.process_info(new_info)
961 # We update the info dict with the best quality format (backwards compatibility)
962 info_dict.update(formats_to_download[-1])
963 return info_dict
964
8222d8de
JMF
965 def process_info(self, info_dict):
966 """Process a single resolved IE result."""
967
968 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
969
970 max_downloads = self.params.get('max_downloads')
971 if max_downloads is not None:
972 if self._num_downloads >= int(max_downloads):
973 raise MaxDownloadsReached()
8222d8de
JMF
974
975 info_dict['fulltitle'] = info_dict['title']
976 if len(info_dict['title']) > 200:
6febd1c1 977 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de
JMF
978
979 # Keep for backwards compatibility
980 info_dict['stitle'] = info_dict['title']
981
11b85ce6 982 if 'format' not in info_dict:
8222d8de
JMF
983 info_dict['format'] = info_dict['ext']
984
985 reason = self._match_entry(info_dict)
986 if reason is not None:
6febd1c1 987 self.to_screen('[download] ' + reason)
8222d8de
JMF
988 return
989
fd288278 990 self._num_downloads += 1
8222d8de
JMF
991
992 filename = self.prepare_filename(info_dict)
993
994 # Forced printings
995 if self.params.get('forcetitle', False):
0783b09b 996 self.to_stdout(info_dict['fulltitle'])
8222d8de 997 if self.params.get('forceid', False):
0783b09b 998 self.to_stdout(info_dict['id'])
8222d8de 999 if self.params.get('forceurl', False):
16ae61f6 1000 if info_dict.get('requested_formats') is not None:
1001 for f in info_dict['requested_formats']:
1002 self.to_stdout(f['url'] + f.get('play_path', ''))
1003 else:
1004 # For RTMP URLs, also include the playpath
1005 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1006 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1007 self.to_stdout(info_dict['thumbnail'])
216d71d0 1008 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1009 self.to_stdout(info_dict['description'])
8222d8de 1010 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1011 self.to_stdout(filename)
525ef922
PH
1012 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1013 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1014 if self.params.get('forceformat', False):
0783b09b 1015 self.to_stdout(info_dict['format'])
9d153818 1016 if self.params.get('forcejson', False):
a0d96c98 1017 info_dict['_filename'] = filename
0783b09b 1018 self.to_stdout(json.dumps(info_dict))
63e0be34
PH
1019 if self.params.get('dump_single_json', False):
1020 info_dict['_filename'] = filename
8222d8de
JMF
1021
1022 # Do nothing else if in simulate mode
1023 if self.params.get('simulate', False):
1024 return
1025
1026 if filename is None:
1027 return
1028
1029 try:
1030 dn = os.path.dirname(encodeFilename(filename))
d26e981d 1031 if dn and not os.path.exists(dn):
8222d8de
JMF
1032 os.makedirs(dn)
1033 except (OSError, IOError) as err:
6febd1c1 1034 self.report_error('unable to create directory ' + compat_str(err))
8222d8de
JMF
1035 return
1036
1037 if self.params.get('writedescription', False):
6febd1c1 1038 descfn = filename + '.description'
7b6fefc9 1039 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1040 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1041 elif info_dict.get('description') is None:
1042 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1043 else:
1044 try:
6febd1c1 1045 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1046 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1047 descfile.write(info_dict['description'])
7b6fefc9 1048 except (OSError, IOError):
6febd1c1 1049 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1050 return
8222d8de 1051
1fb07d10 1052 if self.params.get('writeannotations', False):
6febd1c1 1053 annofn = filename + '.annotations.xml'
7b6fefc9 1054 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1055 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1056 else:
1057 try:
6febd1c1 1058 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1059 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1060 annofile.write(info_dict['annotations'])
1061 except (KeyError, TypeError):
6febd1c1 1062 self.report_warning('There are no annotations to write.')
7b6fefc9 1063 except (OSError, IOError):
6febd1c1 1064 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1065 return
1fb07d10 1066
c4a91be7 1067 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1068 self.params.get('writeautomaticsub')])
c4a91be7 1069
fe7e0c98 1070 if subtitles_are_requested and 'subtitles' in info_dict and info_dict['subtitles']:
8222d8de
JMF
1071 # subtitles download errors are already managed as troubles in relevant IE
1072 # that way it will silently go on when used with unsupporting IE
8222d8de 1073 subtitles = info_dict['subtitles']
ca715127 1074 sub_format = self.params.get('subtitlesformat', 'srt')
5d51a883
JMF
1075 for sub_lang in subtitles.keys():
1076 sub = subtitles[sub_lang]
6804038d
JMF
1077 if sub is None:
1078 continue
8222d8de 1079 try:
d4051a8e 1080 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1081 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1082 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1083 else:
6febd1c1 1084 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1085 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
5f6a1245 1086 subfile.write(sub)
8222d8de 1087 except (OSError, IOError):
e4db1951 1088 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1089 return
1090
8222d8de 1091 if self.params.get('writeinfojson', False):
6febd1c1 1092 infofn = os.path.splitext(filename)[0] + '.info.json'
7b6fefc9 1093 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1094 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1095 else:
6febd1c1 1096 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1097 try:
92120217 1098 write_json_file(info_dict, infofn)
7b6fefc9 1099 except (OSError, IOError):
6febd1c1 1100 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1101 return
8222d8de
JMF
1102
1103 if self.params.get('writethumbnail', False):
d8269e1d 1104 if info_dict.get('thumbnail') is not None:
6febd1c1
PH
1105 thumb_format = determine_ext(info_dict['thumbnail'], 'jpg')
1106 thumb_filename = os.path.splitext(filename)[0] + '.' + thumb_format
0a9ce268 1107 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
6febd1c1 1108 self.to_screen('[%s] %s: Thumbnail is already present' %
7b6fefc9
PH
1109 (info_dict['extractor'], info_dict['id']))
1110 else:
6febd1c1 1111 self.to_screen('[%s] %s: Downloading thumbnail ...' %
7b6fefc9
PH
1112 (info_dict['extractor'], info_dict['id']))
1113 try:
e9c092f1 1114 uf = self.urlopen(info_dict['thumbnail'])
7b6fefc9
PH
1115 with open(thumb_filename, 'wb') as thumbf:
1116 shutil.copyfileobj(uf, thumbf)
6febd1c1 1117 self.to_screen('[%s] %s: Writing thumbnail to: %s' %
9e1a5b84 1118 (info_dict['extractor'], info_dict['id'], thumb_filename))
7b6fefc9 1119 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1120 self.report_warning('Unable to download thumbnail "%s": %s' %
9e1a5b84 1121 (info_dict['thumbnail'], compat_str(err)))
8222d8de
JMF
1122
1123 if not self.params.get('skip_download', False):
1124 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(filename)):
1125 success = True
1126 else:
1127 try:
6350728b
JMF
1128 def dl(name, info):
1129 fd = get_suitable_downloader(info)(self, self.params)
1130 for ph in self._progress_hooks:
1131 fd.add_progress_hook(ph)
8d5797b0
PH
1132 if self.params.get('verbose'):
1133 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
6350728b
JMF
1134 return fd.download(name, info)
1135 if info_dict.get('requested_formats') is not None:
1136 downloaded = []
1137 success = True
b7f81164 1138 merger = FFmpegMergerPP(self, not self.params.get('keepvideo'))
48844745 1139 if not merger._executable:
58c3c7ae
JMF
1140 postprocessors = []
1141 self.report_warning('You have requested multiple '
9e1a5b84
JW
1142 'formats but ffmpeg or avconv are not installed.'
1143 ' The formats won\'t be merged')
58c3c7ae
JMF
1144 else:
1145 postprocessors = [merger]
6350728b
JMF
1146 for f in info_dict['requested_formats']:
1147 new_info = dict(info_dict)
1148 new_info.update(f)
1149 fname = self.prepare_filename(new_info)
1150 fname = prepend_extension(fname, 'f%s' % f['format_id'])
1151 downloaded.append(fname)
1152 partial_success = dl(fname, new_info)
1153 success = success and partial_success
58c3c7ae 1154 info_dict['__postprocessors'] = postprocessors
6350728b
JMF
1155 info_dict['__files_to_merge'] = downloaded
1156 else:
1157 # Just a single file
1158 success = dl(filename, info_dict)
8222d8de 1159 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
6febd1c1 1160 self.report_error('unable to download video data: %s' % str(err))
8222d8de 1161 return
c40c6aaa
JMF
1162 except (OSError, IOError) as err:
1163 raise UnavailableVideoError(err)
8222d8de 1164 except (ContentTooShortError, ) as err:
6febd1c1 1165 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
8222d8de
JMF
1166 return
1167
1168 if success:
6271f1ca
PH
1169 # Fixup content
1170 stretched_ratio = info_dict.get('stretched_ratio')
1171 if stretched_ratio is not None and stretched_ratio != 1:
1172 fixup_policy = self.params.get('fixup')
1173 if fixup_policy is None:
1174 fixup_policy = 'detect_or_warn'
1175 if fixup_policy == 'warn':
1176 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1177 info_dict['id'], stretched_ratio))
1178 elif fixup_policy == 'detect_or_warn':
1179 stretched_pp = FFmpegFixupStretchedPP(self)
1180 if stretched_pp.available:
1181 info_dict.setdefault('__postprocessors', [])
1182 info_dict['__postprocessors'].append(stretched_pp)
1183 else:
1184 self.report_warning(
1185 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1186 info_dict['id'], stretched_ratio))
1187 else:
1188 assert fixup_policy == 'ignore'
1189
8222d8de
JMF
1190 try:
1191 self.post_process(filename, info_dict)
1192 except (PostProcessingError) as err:
6febd1c1 1193 self.report_error('postprocessing: %s' % str(err))
8222d8de 1194 return
cd58dc3e 1195 self.record_download_archive(info_dict)
8222d8de
JMF
1196
1197 def download(self, url_list):
1198 """Download a given list of URLs."""
acd69589 1199 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1200 if (len(url_list) > 1 and
acd69589 1201 '%' not in outtmpl
0c75c3fa 1202 and self.params.get('max_downloads') != 1):
acd69589 1203 raise SameFileError(outtmpl)
8222d8de
JMF
1204
1205 for url in url_list:
1206 try:
5f6a1245 1207 # It also downloads the videos
63e0be34 1208 res = self.extract_info(url)
8222d8de 1209 except UnavailableVideoError:
6febd1c1 1210 self.report_error('unable to download video')
8222d8de 1211 except MaxDownloadsReached:
6febd1c1 1212 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1213 raise
63e0be34
PH
1214 else:
1215 if self.params.get('dump_single_json', False):
1216 self.to_stdout(json.dumps(res))
8222d8de
JMF
1217
1218 return self._download_retcode
1219
1dcc4c0c 1220 def download_with_info_file(self, info_filename):
395293a8 1221 with io.open(info_filename, 'r', encoding='utf-8') as f:
1dcc4c0c 1222 info = json.load(f)
d4943898
JMF
1223 try:
1224 self.process_ie_result(info, download=True)
1225 except DownloadError:
1226 webpage_url = info.get('webpage_url')
1227 if webpage_url is not None:
6febd1c1 1228 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1229 return self.download([webpage_url])
1230 else:
1231 raise
1232 return self._download_retcode
1dcc4c0c 1233
8222d8de
JMF
1234 def post_process(self, filename, ie_info):
1235 """Run all the postprocessors on the given file."""
1236 info = dict(ie_info)
1237 info['filepath'] = filename
6350728b
JMF
1238 pps_chain = []
1239 if ie_info.get('__postprocessors') is not None:
1240 pps_chain.extend(ie_info['__postprocessors'])
1241 pps_chain.extend(self._pps)
1242 for pp in pps_chain:
f3ff1a36
JMF
1243 keep_video = None
1244 old_filename = info['filepath']
8222d8de 1245 try:
f3ff1a36 1246 keep_video_wish, info = pp.run(info)
8222d8de
JMF
1247 if keep_video_wish is not None:
1248 if keep_video_wish:
1249 keep_video = keep_video_wish
1250 elif keep_video is None:
1251 # No clear decision yet, let IE decide
1252 keep_video = keep_video_wish
1253 except PostProcessingError as e:
bbcbf4d4 1254 self.report_error(e.msg)
f3ff1a36
JMF
1255 if keep_video is False and not self.params.get('keepvideo', False):
1256 try:
1257 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
1258 os.remove(encodeFilename(old_filename))
1259 except (IOError, OSError):
1260 self.report_warning('Unable to remove downloaded video file')
c1c9a79c 1261
5db07df6
PH
1262 def _make_archive_id(self, info_dict):
1263 # Future-proof against any change in case
1264 # and backwards compatibility with prior versions
d31209a1 1265 extractor = info_dict.get('extractor_key')
7012b23c
PH
1266 if extractor is None:
1267 if 'id' in info_dict:
1268 extractor = info_dict.get('ie_key') # key in a playlist
1269 if extractor is None:
5db07df6 1270 return None # Incomplete video information
6febd1c1 1271 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1272
1273 def in_download_archive(self, info_dict):
1274 fn = self.params.get('download_archive')
1275 if fn is None:
1276 return False
1277
1278 vid_id = self._make_archive_id(info_dict)
1279 if vid_id is None:
7012b23c 1280 return False # Incomplete video information
5db07df6 1281
c1c9a79c
PH
1282 try:
1283 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1284 for line in archive_file:
1285 if line.strip() == vid_id:
1286 return True
1287 except IOError as ioe:
1288 if ioe.errno != errno.ENOENT:
1289 raise
1290 return False
1291
1292 def record_download_archive(self, info_dict):
1293 fn = self.params.get('download_archive')
1294 if fn is None:
1295 return
5db07df6
PH
1296 vid_id = self._make_archive_id(info_dict)
1297 assert vid_id
c1c9a79c 1298 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1299 archive_file.write(vid_id + '\n')
dd82ffea 1300
8c51aa65 1301 @staticmethod
8abeeb94 1302 def format_resolution(format, default='unknown'):
fb04e403
PH
1303 if format.get('vcodec') == 'none':
1304 return 'audio only'
f49d89ee
PH
1305 if format.get('resolution') is not None:
1306 return format['resolution']
8c51aa65
JMF
1307 if format.get('height') is not None:
1308 if format.get('width') is not None:
6febd1c1 1309 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1310 else:
6febd1c1 1311 res = '%sp' % format['height']
f49d89ee 1312 elif format.get('width') is not None:
6febd1c1 1313 res = '?x%d' % format['width']
8c51aa65 1314 else:
8abeeb94 1315 res = default
8c51aa65
JMF
1316 return res
1317
c57f7757
PH
1318 def _format_note(self, fdict):
1319 res = ''
1320 if fdict.get('ext') in ['f4f', 'f4m']:
1321 res += '(unsupported) '
1322 if fdict.get('format_note') is not None:
1323 res += fdict['format_note'] + ' '
1324 if fdict.get('tbr') is not None:
1325 res += '%4dk ' % fdict['tbr']
1326 if fdict.get('container') is not None:
1327 if res:
1328 res += ', '
1329 res += '%s container' % fdict['container']
1330 if (fdict.get('vcodec') is not None and
1331 fdict.get('vcodec') != 'none'):
1332 if res:
1333 res += ', '
1334 res += fdict['vcodec']
91c7271a 1335 if fdict.get('vbr') is not None:
c57f7757
PH
1336 res += '@'
1337 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1338 res += 'video@'
1339 if fdict.get('vbr') is not None:
1340 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1341 if fdict.get('fps') is not None:
1342 res += ', %sfps' % fdict['fps']
c57f7757
PH
1343 if fdict.get('acodec') is not None:
1344 if res:
1345 res += ', '
1346 if fdict['acodec'] == 'none':
1347 res += 'video only'
1348 else:
1349 res += '%-5s' % fdict['acodec']
1350 elif fdict.get('abr') is not None:
1351 if res:
1352 res += ', '
1353 res += 'audio'
1354 if fdict.get('abr') is not None:
1355 res += '@%3dk' % fdict['abr']
1356 if fdict.get('asr') is not None:
1357 res += ' (%5dHz)' % fdict['asr']
1358 if fdict.get('filesize') is not None:
1359 if res:
1360 res += ', '
1361 res += format_bytes(fdict['filesize'])
9732d77e
PH
1362 elif fdict.get('filesize_approx') is not None:
1363 if res:
1364 res += ', '
1365 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1366 return res
91c7271a 1367
c57f7757 1368 def list_formats(self, info_dict):
02dbf93f 1369 def line(format, idlen=20):
6febd1c1 1370 return (('%-' + compat_str(idlen + 1) + 's%-10s%-12s%s') % (
8c51aa65
JMF
1371 format['format_id'],
1372 format['ext'],
8c51aa65 1373 self.format_resolution(format),
c57f7757 1374 self._format_note(format),
02dbf93f 1375 ))
57dd9a8f 1376
94badb25 1377 formats = info_dict.get('formats', [info_dict])
6febd1c1 1378 idlen = max(len('format code'),
02dbf93f 1379 max(len(f['format_id']) for f in formats))
e65566a9
PH
1380 formats_s = [
1381 line(f, idlen) for f in formats
1382 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1383 if len(formats) > 1:
c57f7757
PH
1384 formats_s[0] += (' ' if self._format_note(formats[0]) else '') + '(worst)'
1385 formats_s[-1] += (' ' if self._format_note(formats[-1]) else '') + '(best)'
57dd9a8f
PH
1386
1387 header_line = line({
6febd1c1
PH
1388 'format_id': 'format code', 'ext': 'extension',
1389 'resolution': 'resolution', 'format_note': 'note'}, idlen=idlen)
1390 self.to_screen('[info] Available formats for %s:\n%s\n%s' %
1391 (info_dict['id'], header_line, '\n'.join(formats_s)))
dca08720
PH
1392
1393 def urlopen(self, req):
1394 """ Start an HTTP download """
37419b4f 1395
d05cfe06
S
1396 # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
1397 # always respected by websites, some tend to give out URLs with non percent-encoded
1398 # non-ASCII characters (see telemb.py, ard.py [#3412])
37419b4f 1399 # urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
d05cfe06
S
1400 # To work around aforementioned issue we will replace request's original URL with
1401 # percent-encoded one
ee0d9070 1402 req_is_string = isinstance(req, basestring if sys.version_info < (3, 0) else compat_str)
68b09730 1403 url = req if req_is_string else req.get_full_url()
d05cfe06 1404 url_escaped = escape_url(url)
37419b4f
S
1405
1406 # Substitute URL if any change after escaping
1407 if url != url_escaped:
68b09730 1408 if req_is_string:
37419b4f
S
1409 req = url_escaped
1410 else:
1411 req = compat_urllib_request.Request(
1412 url_escaped, data=req.data, headers=req.headers,
1413 origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
1414
19a41fc6 1415 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1416
1417 def print_debug_header(self):
1418 if not self.params.get('verbose'):
1419 return
62fec3b2 1420
4192b51c
PH
1421 if type('') is not compat_str:
1422 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1423 self.report_warning(
1424 'Your Python is broken! Update to a newer and supported version')
1425
c6afed48
PH
1426 stdout_encoding = getattr(
1427 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1428 encoding_str = (
734f90bb
PH
1429 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1430 locale.getpreferredencoding(),
1431 sys.getfilesystemencoding(),
c6afed48 1432 stdout_encoding,
b0472057 1433 self.get_encoding()))
4192b51c 1434 write_string(encoding_str, encoding=None)
734f90bb
PH
1435
1436 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1437 try:
1438 sp = subprocess.Popen(
1439 ['git', 'rev-parse', '--short', 'HEAD'],
1440 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1441 cwd=os.path.dirname(os.path.abspath(__file__)))
1442 out, err = sp.communicate()
1443 out = out.decode().strip()
1444 if re.match('[0-9a-f]+', out):
734f90bb 1445 self._write_string('[debug] Git HEAD: ' + out + '\n')
dca08720
PH
1446 except:
1447 try:
1448 sys.exc_clear()
1449 except:
1450 pass
d28b5171
PH
1451 self._write_string('[debug] Python version %s - %s\n' % (
1452 platform.python_version(), platform_name()))
1453
1454 exe_versions = FFmpegPostProcessor.get_versions()
4c83c967 1455 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1456 exe_str = ', '.join(
1457 '%s %s' % (exe, v)
1458 for exe, v in sorted(exe_versions.items())
1459 if v
1460 )
1461 if not exe_str:
1462 exe_str = 'none'
1463 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1464
1465 proxy_map = {}
1466 for handler in self._opener.handlers:
1467 if hasattr(handler, 'proxies'):
1468 proxy_map.update(handler.proxies)
734f90bb 1469 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1470
58b1f00d
PH
1471 if self.params.get('call_home', False):
1472 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1473 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1474 latest_version = self.urlopen(
1475 'https://yt-dl.org/latest/version').read().decode('utf-8')
1476 if version_tuple(latest_version) > version_tuple(__version__):
1477 self.report_warning(
1478 'You are using an outdated version (newest version: %s)! '
1479 'See https://yt-dl.org/update if you need help updating.' %
1480 latest_version)
1481
e344693b 1482 def _setup_opener(self):
6ad14cab 1483 timeout_val = self.params.get('socket_timeout')
19a41fc6 1484 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1485
dca08720
PH
1486 opts_cookiefile = self.params.get('cookiefile')
1487 opts_proxy = self.params.get('proxy')
1488
1489 if opts_cookiefile is None:
1490 self.cookiejar = compat_cookiejar.CookieJar()
1491 else:
1492 self.cookiejar = compat_cookiejar.MozillaCookieJar(
1493 opts_cookiefile)
1494 if os.access(opts_cookiefile, os.R_OK):
1495 self.cookiejar.load()
1496
1497 cookie_processor = compat_urllib_request.HTTPCookieProcessor(
1498 self.cookiejar)
1499 if opts_proxy is not None:
1500 if opts_proxy == '':
1501 proxies = {}
1502 else:
1503 proxies = {'http': opts_proxy, 'https': opts_proxy}
1504 else:
1505 proxies = compat_urllib_request.getproxies()
1506 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
1507 if 'http' in proxies and 'https' not in proxies:
1508 proxies['https'] = proxies['http']
1509 proxy_handler = compat_urllib_request.ProxyHandler(proxies)
a0ddb8a2
PH
1510
1511 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
1512 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
1513 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
dca08720 1514 opener = compat_urllib_request.build_opener(
a0ddb8a2 1515 https_handler, proxy_handler, cookie_processor, ydlh)
dca08720
PH
1516 # Delete the default user-agent header, which would otherwise apply in
1517 # cases where our custom HTTP handler doesn't come into play
1518 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
1519 opener.addheaders = []
1520 self._opener = opener
62fec3b2
PH
1521
1522 def encode(self, s):
1523 if isinstance(s, bytes):
1524 return s # Already encoded
1525
1526 try:
1527 return s.encode(self.get_encoding())
1528 except UnicodeEncodeError as err:
1529 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
1530 raise
1531
1532 def get_encoding(self):
1533 encoding = self.params.get('encoding')
1534 if encoding is None:
1535 encoding = preferredencoding()
1536 return encoding