]> jfr.im git - yt-dlp.git/blame - youtube_dl/YoutubeDL.py
[YoutubeDL] Fix typo in m3u8_native fixup
[yt-dlp.git] / youtube_dl / YoutubeDL.py
CommitLineData
8222d8de
JMF
1#!/usr/bin/env python
2# -*- coding: utf-8 -*-
3
6febd1c1 4from __future__ import absolute_import, unicode_literals
8222d8de 5
26e63931 6import collections
31bd3925 7import contextlib
9d2ecdbc 8import datetime
c1c9a79c 9import errno
31bd3925 10import fileinput
8222d8de 11import io
b82f815f 12import itertools
8694c600 13import json
62fec3b2 14import locale
083c9df9 15import operator
8222d8de 16import os
dca08720 17import platform
8222d8de
JMF
18import re
19import shutil
dca08720 20import subprocess
8222d8de
JMF
21import socket
22import sys
23import time
67134eab 24import tokenize
8222d8de
JMF
25import traceback
26
8c25f81b 27from .compat import (
82d8a8b6 28 compat_basestring,
dca08720 29 compat_cookiejar,
4644ac55 30 compat_expanduser,
003c69a8 31 compat_get_terminal_size,
ce02ed60 32 compat_http_client,
4f026faf 33 compat_kwargs,
e9c0cdd3 34 compat_os_name,
ce02ed60 35 compat_str,
67134eab 36 compat_tokenize_tokenize,
ce02ed60
PH
37 compat_urllib_error,
38 compat_urllib_request,
8b172c2e 39 compat_urllib_request_DataHandler,
8c25f81b
PH
40)
41from .utils import (
ce02ed60
PH
42 ContentTooShortError,
43 date_from_str,
44 DateRange,
acd69589 45 DEFAULT_OUTTMPL,
ce02ed60 46 determine_ext,
b5559424 47 determine_protocol,
ce02ed60 48 DownloadError,
c0384f22 49 encode_compat_str,
ce02ed60 50 encodeFilename,
9b9c5355 51 error_to_compat_str,
ce02ed60 52 ExtractorError,
02dbf93f 53 format_bytes,
525ef922 54 formatSeconds,
ce02ed60 55 locked_file,
dca08720 56 make_HTTPS_handler,
ce02ed60 57 MaxDownloadsReached,
b7ab0590 58 PagedList,
083c9df9 59 parse_filesize,
91410c9b 60 PerRequestProxyHandler,
ce02ed60 61 PostProcessingError,
dca08720 62 platform_name,
ce02ed60 63 preferredencoding,
cfb56d1a 64 render_table,
ce02ed60
PH
65 SameFileError,
66 sanitize_filename,
1bb5c511 67 sanitize_path,
67dda517 68 sanitized_Request,
e5660ee6 69 std_headers,
ce02ed60 70 subtitles_filename,
ce02ed60 71 UnavailableVideoError,
29eb5174 72 url_basename,
58b1f00d 73 version_tuple,
ce02ed60
PH
74 write_json_file,
75 write_string,
6a3f4c3f 76 YoutubeDLCookieProcessor,
dca08720 77 YoutubeDLHandler,
6350728b 78 prepend_extension,
b29e0000 79 replace_extension,
7d4111ed 80 args_to_str,
05900629 81 age_restricted,
ce02ed60 82)
a0e07d31 83from .cache import Cache
023fa8c4 84from .extractor import get_info_extractor, gen_extractors
3bc2ddcc 85from .downloader import get_suitable_downloader
4c83c967 86from .downloader.rtmp import rtmpdump_version
4f026faf 87from .postprocessor import (
f17f8651 88 FFmpegFixupM3u8PP,
62cd676c 89 FFmpegFixupM4aPP,
6271f1ca 90 FFmpegFixupStretchedPP,
4f026faf
PH
91 FFmpegMergerPP,
92 FFmpegPostProcessor,
93 get_postprocessor,
94)
dca08720 95from .version import __version__
8222d8de 96
e9c0cdd3
YCH
97if compat_os_name == 'nt':
98 import ctypes
99
8222d8de
JMF
100
101class YoutubeDL(object):
102 """YoutubeDL class.
103
104 YoutubeDL objects are the ones responsible of downloading the
105 actual video file and writing it to disk if the user has requested
106 it, among some other tasks. In most cases there should be one per
107 program. As, given a video URL, the downloader doesn't know how to
108 extract all the needed information, task that InfoExtractors do, it
109 has to pass the URL to one of them.
110
111 For this, YoutubeDL objects have a method that allows
112 InfoExtractors to be registered in a given order. When it is passed
113 a URL, the YoutubeDL object handles it to the first InfoExtractor it
114 finds that reports being able to handle it. The InfoExtractor extracts
115 all the information about the video or videos the URL refers to, and
116 YoutubeDL process the extracted information, possibly using a File
117 Downloader to download the video.
118
119 YoutubeDL objects accept a lot of parameters. In order not to saturate
120 the object constructor with arguments, it receives a dictionary of
121 options instead. These options are available through the params
122 attribute for the InfoExtractors to use. The YoutubeDL also
123 registers itself as the downloader in charge for the InfoExtractors
124 that are added to it, so this is a "mutual registration".
125
126 Available options:
127
128 username: Username for authentication purposes.
129 password: Password for authentication purposes.
180940e0 130 videopassword: Password for accessing a video.
8222d8de
JMF
131 usenetrc: Use netrc for authentication instead.
132 verbose: Print additional info to stdout.
133 quiet: Do not print messages to stdout.
ad8915b7 134 no_warnings: Do not print out anything for warnings.
8222d8de
JMF
135 forceurl: Force printing final URL.
136 forcetitle: Force printing title.
137 forceid: Force printing ID.
138 forcethumbnail: Force printing thumbnail URL.
139 forcedescription: Force printing description.
140 forcefilename: Force printing final filename.
525ef922 141 forceduration: Force printing duration.
8694c600 142 forcejson: Force printing info_dict as JSON.
63e0be34
PH
143 dump_single_json: Force printing the info_dict of the whole playlist
144 (or video) as a single JSON line.
8222d8de 145 simulate: Do not download the video files.
d8600787 146 format: Video format code. See options.py for more information.
8222d8de
JMF
147 outtmpl: Template for output names.
148 restrictfilenames: Do not allow "&" and spaces in file names
149 ignoreerrors: Do not stop on download errors.
d22dec74 150 force_generic_extractor: Force downloader to use the generic extractor
8222d8de
JMF
151 nooverwrites: Prevent overwriting files.
152 playliststart: Playlist item to start at.
153 playlistend: Playlist item to end at.
c14e88f0 154 playlist_items: Specific indices of playlist to download.
ff815fe6 155 playlistreverse: Download playlist items in reverse order.
8222d8de
JMF
156 matchtitle: Download only matching titles.
157 rejecttitle: Reject downloads for matching titles.
8bf9319e 158 logger: Log messages to a logging.Logger instance.
8222d8de
JMF
159 logtostderr: Log messages to stderr instead of stdout.
160 writedescription: Write the video description to a .description file
161 writeinfojson: Write the video description to a .info.json file
1fb07d10 162 writeannotations: Write the video annotations to a .annotations.xml file
8222d8de 163 writethumbnail: Write the thumbnail image to a file
ec82d85a 164 write_all_thumbnails: Write all thumbnail formats to files
8222d8de 165 writesubtitles: Write the video subtitles to a file
741dd8ea 166 writeautomaticsub: Write the automatically generated subtitles to a file
8222d8de 167 allsubtitles: Downloads all the subtitles of the video
0b7f3118 168 (requires writesubtitles or writeautomaticsub)
8222d8de 169 listsubtitles: Lists all available subtitles for the video
a504ced0 170 subtitlesformat: The format code for subtitles
aa6a10c4 171 subtitleslangs: List of languages of the subtitles to download
8222d8de
JMF
172 keepvideo: Keep the video file after post-processing
173 daterange: A DateRange object, download only if the upload_date is in the range.
174 skip_download: Skip the actual download of the video file
c35f9e72 175 cachedir: Location of the cache files in the filesystem.
a0e07d31 176 False to disable filesystem cache.
47192f92 177 noplaylist: Download single video instead of a playlist if in doubt.
8dbe9899
PH
178 age_limit: An integer representing the user's age in years.
179 Unsuitable videos for the given age are skipped.
5fe18bdb
PH
180 min_views: An integer representing the minimum view count the video
181 must have in order to not be skipped.
182 Videos without view count information are always
183 downloaded. None for no limit.
184 max_views: An integer representing the maximum view count.
185 Videos that are more popular than that are not
186 downloaded.
187 Videos without view count information are always
188 downloaded. None for no limit.
189 download_archive: File name of a file where all downloads are recorded.
c1c9a79c
PH
190 Videos already present in the file are not downloaded
191 again.
dca08720 192 cookiefile: File name where cookies should be read from and dumped to.
a1ee09e8 193 nocheckcertificate:Do not verify SSL certificates
7e8c0af0
PH
194 prefer_insecure: Use HTTP instead of HTTPS to retrieve information.
195 At the moment, this is only supported by YouTube.
a1ee09e8 196 proxy: URL of the proxy server to use
91410c9b
PH
197 cn_verification_proxy: URL of the proxy to use for IP address verification
198 on Chinese sites. (Experimental)
e344693b 199 socket_timeout: Time to wait for unresponsive hosts, in seconds
0783b09b
PH
200 bidi_workaround: Work around buggy terminals without bidirectional text
201 support, using fridibi
a0ddb8a2 202 debug_printtraffic:Print out sent and received HTTP traffic
7b0817e8 203 include_ads: Download ads as well
04b4d394
PH
204 default_search: Prepend this string if an input url is not valid.
205 'auto' for elaborate guessing
62fec3b2 206 encoding: Use this encoding instead of the system-specified.
e8ee972c 207 extract_flat: Do not resolve URLs, return the immediate result.
057a5206
PH
208 Pass in 'in_playlist' to only show this behavior for
209 playlist items.
4f026faf 210 postprocessors: A list of dictionaries, each with an entry
71b640cc
PH
211 * key: The name of the postprocessor. See
212 youtube_dl/postprocessor/__init__.py for a list.
4f026faf
PH
213 as well as any further keyword arguments for the
214 postprocessor.
71b640cc
PH
215 progress_hooks: A list of functions that get called on download
216 progress, with a dictionary with the entries
5cda4eda 217 * status: One of "downloading", "error", or "finished".
ee69b99a 218 Check this first and ignore unknown values.
71b640cc 219
5cda4eda 220 If status is one of "downloading", or "finished", the
ee69b99a
PH
221 following properties may also be present:
222 * filename: The final filename (always present)
5cda4eda 223 * tmpfilename: The filename we're currently writing to
71b640cc
PH
224 * downloaded_bytes: Bytes on disk
225 * total_bytes: Size of the whole file, None if unknown
5cda4eda
PH
226 * total_bytes_estimate: Guess of the eventual file size,
227 None if unavailable.
228 * elapsed: The number of seconds since download started.
71b640cc
PH
229 * eta: The estimated time in seconds, None if unknown
230 * speed: The download speed in bytes/second, None if
231 unknown
5cda4eda
PH
232 * fragment_index: The counter of the currently
233 downloaded video fragment.
234 * fragment_count: The number of fragments (= individual
235 files that will be merged)
71b640cc
PH
236
237 Progress hooks are guaranteed to be called at least once
238 (with status "finished") if the download is successful.
45598f15 239 merge_output_format: Extension to use when merging formats.
6271f1ca
PH
240 fixup: Automatically correct known faults of the file.
241 One of:
242 - "never": do nothing
243 - "warn": only emit a warning
244 - "detect_or_warn": check whether we can do anything
62cd676c 245 about it, warn otherwise (default)
be4a824d 246 source_address: (Experimental) Client-side IP address to bind to.
6ec6cb4e 247 call_home: Boolean, true iff we are allowed to contact the
8bfa7545 248 youtube-dl servers for debugging.
5f0d813d 249 sleep_interval: Number of seconds to sleep before each download.
cfb56d1a
PH
250 listformats: Print an overview of available video formats and exit.
251 list_thumbnails: Print a table of all thumbnails and exit.
347de493
PH
252 match_filter: A function that gets called with the info_dict of
253 every video.
254 If it returns a message, the video is ignored.
255 If it returns None, the video is downloaded.
256 match_filter_func in utils.py is one example for this.
7e5db8c9 257 no_color: Do not emit color codes in output.
71b640cc 258
85729c51
PH
259 The following options determine which downloader is picked:
260 external_downloader: Executable of the external downloader to call.
261 None or unset for standard (built-in) downloader.
262 hls_prefer_native: Use the native HLS downloader instead of ffmpeg/avconv.
fe7e0c98 263
8222d8de 264 The following parameters are not used by YoutubeDL itself, they are used by
c75f0b36 265 the downloader (see youtube_dl/downloader/common.py):
8222d8de 266 nopart, updatetime, buffersize, ratelimit, min_filesize, max_filesize, test,
881e6a1f 267 noresizebuffer, retries, continuedl, noprogress, consoletitle,
7d106a65 268 xattr_set_filesize, external_downloader_args, hls_use_mpegts.
76b1bd67
JMF
269
270 The following options are used by the post processors:
271 prefer_ffmpeg: If True, use ffmpeg instead of avconv if both are available,
272 otherwise prefer avconv.
f72b0a60
S
273 postprocessor_args: A list of additional command-line arguments for the
274 postprocessor.
8222d8de
JMF
275 """
276
277 params = None
278 _ies = []
279 _pps = []
280 _download_retcode = None
281 _num_downloads = None
282 _screen_file = None
283
3511266b 284 def __init__(self, params=None, auto_init=True):
8222d8de 285 """Create a FileDownloader object with the given options."""
e9f9a10f
JMF
286 if params is None:
287 params = {}
8222d8de 288 self._ies = []
56c73665 289 self._ies_instances = {}
8222d8de 290 self._pps = []
933605d7 291 self._progress_hooks = []
8222d8de
JMF
292 self._download_retcode = 0
293 self._num_downloads = 0
294 self._screen_file = [sys.stdout, sys.stderr][params.get('logtostderr', False)]
0783b09b 295 self._err_file = sys.stderr
4abf617b
S
296 self.params = {
297 # Default parameters
298 'nocheckcertificate': False,
299 }
300 self.params.update(params)
a0e07d31 301 self.cache = Cache(self)
34308b30 302
0783b09b 303 if params.get('bidi_workaround', False):
1c088fa8
PH
304 try:
305 import pty
306 master, slave = pty.openpty()
003c69a8 307 width = compat_get_terminal_size().columns
1c088fa8
PH
308 if width is None:
309 width_args = []
310 else:
311 width_args = ['-w', str(width)]
5d681e96 312 sp_kwargs = dict(
1c088fa8
PH
313 stdin=subprocess.PIPE,
314 stdout=slave,
315 stderr=self._err_file)
5d681e96
PH
316 try:
317 self._output_process = subprocess.Popen(
318 ['bidiv'] + width_args, **sp_kwargs
319 )
320 except OSError:
5d681e96
PH
321 self._output_process = subprocess.Popen(
322 ['fribidi', '-c', 'UTF-8'] + width_args, **sp_kwargs)
323 self._output_channel = os.fdopen(master, 'rb')
1c088fa8
PH
324 except OSError as ose:
325 if ose.errno == 2:
6febd1c1 326 self.report_warning('Could not find fribidi executable, ignoring --bidi-workaround . Make sure that fribidi is an executable file in one of the directories in your $PATH.')
1c088fa8
PH
327 else:
328 raise
0783b09b 329
34308b30 330 if (sys.version_info >= (3,) and sys.platform != 'win32' and
8fb3ac36
PH
331 sys.getfilesystemencoding() in ['ascii', 'ANSI_X3.4-1968'] and
332 not params.get('restrictfilenames', False)):
34308b30
PH
333 # On Python 3, the Unicode filesystem API will throw errors (#1474)
334 self.report_warning(
6febd1c1 335 'Assuming --restrict-filenames since file system encoding '
1b725173 336 'cannot encode all characters. '
6febd1c1 337 'Set the LC_ALL environment variable to fix this.')
4a98cdbf 338 self.params['restrictfilenames'] = True
34308b30 339
486dd09e
PH
340 if isinstance(params.get('outtmpl'), bytes):
341 self.report_warning(
342 'Parameter outtmpl is bytes, but should be a unicode string. '
343 'Put from __future__ import unicode_literals at the top of your code file or consider switching to Python 3.x.')
344
dca08720
PH
345 self._setup_opener()
346
3511266b
PH
347 if auto_init:
348 self.print_debug_header()
349 self.add_default_info_extractors()
350
4f026faf
PH
351 for pp_def_raw in self.params.get('postprocessors', []):
352 pp_class = get_postprocessor(pp_def_raw['key'])
353 pp_def = dict(pp_def_raw)
354 del pp_def['key']
355 pp = pp_class(self, **compat_kwargs(pp_def))
356 self.add_post_processor(pp)
357
71b640cc
PH
358 for ph in self.params.get('progress_hooks', []):
359 self.add_progress_hook(ph)
360
7d4111ed
PH
361 def warn_if_short_id(self, argv):
362 # short YouTube ID starting with dash?
363 idxs = [
364 i for i, a in enumerate(argv)
365 if re.match(r'^-[0-9A-Za-z_-]{10}$', a)]
366 if idxs:
367 correct_argv = (
368 ['youtube-dl'] +
369 [a for i, a in enumerate(argv) if i not in idxs] +
370 ['--'] + [argv[i] for i in idxs]
371 )
372 self.report_warning(
373 'Long argument string detected. '
374 'Use -- to separate parameters and URLs, like this:\n%s\n' %
375 args_to_str(correct_argv))
376
8222d8de
JMF
377 def add_info_extractor(self, ie):
378 """Add an InfoExtractor object to the end of the list."""
379 self._ies.append(ie)
56c73665 380 self._ies_instances[ie.ie_key()] = ie
8222d8de
JMF
381 ie.set_downloader(self)
382
56c73665
JMF
383 def get_info_extractor(self, ie_key):
384 """
385 Get an instance of an IE with name ie_key, it will try to get one from
386 the _ies list, if there's no instance it will create a new one and add
387 it to the extractor list.
388 """
389 ie = self._ies_instances.get(ie_key)
390 if ie is None:
391 ie = get_info_extractor(ie_key)()
392 self.add_info_extractor(ie)
393 return ie
394
023fa8c4
JMF
395 def add_default_info_extractors(self):
396 """
397 Add the InfoExtractors returned by gen_extractors to the end of the list
398 """
399 for ie in gen_extractors():
400 self.add_info_extractor(ie)
401
8222d8de
JMF
402 def add_post_processor(self, pp):
403 """Add a PostProcessor object to the end of the chain."""
404 self._pps.append(pp)
405 pp.set_downloader(self)
406
933605d7
JMF
407 def add_progress_hook(self, ph):
408 """Add the progress hook (currently only for the file downloader)"""
409 self._progress_hooks.append(ph)
8ab470f1 410
1c088fa8 411 def _bidi_workaround(self, message):
5d681e96 412 if not hasattr(self, '_output_channel'):
1c088fa8
PH
413 return message
414
5d681e96 415 assert hasattr(self, '_output_process')
11b85ce6 416 assert isinstance(message, compat_str)
6febd1c1
PH
417 line_count = message.count('\n') + 1
418 self._output_process.stdin.write((message + '\n').encode('utf-8'))
5d681e96 419 self._output_process.stdin.flush()
6febd1c1 420 res = ''.join(self._output_channel.readline().decode('utf-8')
9e1a5b84 421 for _ in range(line_count))
6febd1c1 422 return res[:-len('\n')]
1c088fa8 423
8222d8de 424 def to_screen(self, message, skip_eol=False):
0783b09b
PH
425 """Print message to stdout if not in quiet mode."""
426 return self.to_stdout(message, skip_eol, check_quiet=True)
427
734f90bb 428 def _write_string(self, s, out=None):
b58ddb32 429 write_string(s, out=out, encoding=self.params.get('encoding'))
734f90bb 430
0783b09b 431 def to_stdout(self, message, skip_eol=False, check_quiet=False):
8222d8de 432 """Print message to stdout if not in quiet mode."""
8bf9319e 433 if self.params.get('logger'):
43afe285 434 self.params['logger'].debug(message)
0783b09b 435 elif not check_quiet or not self.params.get('quiet', False):
1c088fa8 436 message = self._bidi_workaround(message)
6febd1c1 437 terminator = ['\n', ''][skip_eol]
8222d8de 438 output = message + terminator
1c088fa8 439
734f90bb 440 self._write_string(output, self._screen_file)
8222d8de
JMF
441
442 def to_stderr(self, message):
443 """Print message to stderr."""
11b85ce6 444 assert isinstance(message, compat_str)
8bf9319e 445 if self.params.get('logger'):
43afe285
IB
446 self.params['logger'].error(message)
447 else:
1c088fa8 448 message = self._bidi_workaround(message)
6febd1c1 449 output = message + '\n'
734f90bb 450 self._write_string(output, self._err_file)
8222d8de 451
1e5b9a95
PH
452 def to_console_title(self, message):
453 if not self.params.get('consoletitle', False):
454 return
e9c0cdd3 455 if compat_os_name == 'nt' and ctypes.windll.kernel32.GetConsoleWindow():
1e5b9a95
PH
456 # c_wchar_p() might not be necessary if `message` is
457 # already of type unicode()
458 ctypes.windll.kernel32.SetConsoleTitleW(ctypes.c_wchar_p(message))
459 elif 'TERM' in os.environ:
734f90bb 460 self._write_string('\033]0;%s\007' % message, self._screen_file)
1e5b9a95 461
bdde425c
PH
462 def save_console_title(self):
463 if not self.params.get('consoletitle', False):
464 return
465 if 'TERM' in os.environ:
efd6c574 466 # Save the title on stack
734f90bb 467 self._write_string('\033[22;0t', self._screen_file)
bdde425c
PH
468
469 def restore_console_title(self):
470 if not self.params.get('consoletitle', False):
471 return
472 if 'TERM' in os.environ:
efd6c574 473 # Restore the title from stack
734f90bb 474 self._write_string('\033[23;0t', self._screen_file)
bdde425c
PH
475
476 def __enter__(self):
477 self.save_console_title()
478 return self
479
480 def __exit__(self, *args):
481 self.restore_console_title()
f89197d7 482
dca08720
PH
483 if self.params.get('cookiefile') is not None:
484 self.cookiejar.save()
bdde425c 485
8222d8de
JMF
486 def trouble(self, message=None, tb=None):
487 """Determine action to take when a download problem appears.
488
489 Depending on if the downloader has been configured to ignore
490 download errors or not, this method may throw an exception or
491 not when errors are found, after printing the message.
492
493 tb, if given, is additional traceback information.
494 """
495 if message is not None:
496 self.to_stderr(message)
497 if self.params.get('verbose'):
498 if tb is None:
499 if sys.exc_info()[0]: # if .trouble has been called from an except block
6febd1c1 500 tb = ''
8222d8de 501 if hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
6febd1c1 502 tb += ''.join(traceback.format_exception(*sys.exc_info()[1].exc_info))
c0384f22 503 tb += encode_compat_str(traceback.format_exc())
8222d8de
JMF
504 else:
505 tb_data = traceback.format_list(traceback.extract_stack())
6febd1c1 506 tb = ''.join(tb_data)
8222d8de
JMF
507 self.to_stderr(tb)
508 if not self.params.get('ignoreerrors', False):
509 if sys.exc_info()[0] and hasattr(sys.exc_info()[1], 'exc_info') and sys.exc_info()[1].exc_info[0]:
510 exc_info = sys.exc_info()[1].exc_info
511 else:
512 exc_info = sys.exc_info()
513 raise DownloadError(message, exc_info)
514 self._download_retcode = 1
515
516 def report_warning(self, message):
517 '''
518 Print the message to stderr, it will be prefixed with 'WARNING:'
519 If stderr is a tty file the 'WARNING:' will be colored
520 '''
6d07ce01
JMF
521 if self.params.get('logger') is not None:
522 self.params['logger'].warning(message)
8222d8de 523 else:
ad8915b7
PH
524 if self.params.get('no_warnings'):
525 return
e9c0cdd3 526 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6d07ce01
JMF
527 _msg_header = '\033[0;33mWARNING:\033[0m'
528 else:
529 _msg_header = 'WARNING:'
530 warning_message = '%s %s' % (_msg_header, message)
531 self.to_stderr(warning_message)
8222d8de
JMF
532
533 def report_error(self, message, tb=None):
534 '''
535 Do the same as trouble, but prefixes the message with 'ERROR:', colored
536 in red if stderr is a tty file.
537 '''
e9c0cdd3 538 if not self.params.get('no_color') and self._err_file.isatty() and compat_os_name != 'nt':
6febd1c1 539 _msg_header = '\033[0;31mERROR:\033[0m'
8222d8de 540 else:
6febd1c1
PH
541 _msg_header = 'ERROR:'
542 error_message = '%s %s' % (_msg_header, message)
8222d8de
JMF
543 self.trouble(error_message, tb)
544
8222d8de
JMF
545 def report_file_already_downloaded(self, file_name):
546 """Report file has already been fully downloaded."""
547 try:
6febd1c1 548 self.to_screen('[download] %s has already been downloaded' % file_name)
ce02ed60 549 except UnicodeEncodeError:
6febd1c1 550 self.to_screen('[download] The file has already been downloaded')
8222d8de 551
8222d8de
JMF
552 def prepare_filename(self, info_dict):
553 """Generate the output filename."""
554 try:
555 template_dict = dict(info_dict)
556
557 template_dict['epoch'] = int(time.time())
558 autonumber_size = self.params.get('autonumber_size')
559 if autonumber_size is None:
560 autonumber_size = 5
6febd1c1 561 autonumber_templ = '%0' + str(autonumber_size) + 'd'
8222d8de 562 template_dict['autonumber'] = autonumber_templ % self._num_downloads
702665c0 563 if template_dict.get('playlist_index') is not None:
c6b4132a 564 template_dict['playlist_index'] = '%0*d' % (len(str(template_dict['n_entries'])), template_dict['playlist_index'])
17b75c0d
PH
565 if template_dict.get('resolution') is None:
566 if template_dict.get('width') and template_dict.get('height'):
567 template_dict['resolution'] = '%dx%d' % (template_dict['width'], template_dict['height'])
568 elif template_dict.get('height'):
805ef3c6 569 template_dict['resolution'] = '%sp' % template_dict['height']
17b75c0d 570 elif template_dict.get('width'):
51ce9117 571 template_dict['resolution'] = '%dx?' % template_dict['width']
8222d8de 572
586a91b6 573 sanitize = lambda k, v: sanitize_filename(
45598aab 574 compat_str(v),
1bb5c511 575 restricted=self.params.get('restrictfilenames'),
6febd1c1 576 is_id=(k == 'id'))
586a91b6 577 template_dict = dict((k, sanitize(k, v))
45598aab
PH
578 for k, v in template_dict.items()
579 if v is not None)
6febd1c1 580 template_dict = collections.defaultdict(lambda: 'NA', template_dict)
8222d8de 581
b3613d36 582 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
4644ac55 583 tmpl = compat_expanduser(outtmpl)
586a91b6 584 filename = tmpl % template_dict
3a0d2f52
S
585 # Temporary fix for #4787
586 # 'Treat' all problem characters by passing filename through preferredencoding
587 # to workaround encoding issues with subprocess on python2 @ Windows
588 if sys.version_info < (3, 0) and sys.platform == 'win32':
589 filename = encodeFilename(filename, True).decode(preferredencoding())
b3613d36 590 return sanitize_path(filename)
8222d8de 591 except ValueError as err:
6febd1c1 592 self.report_error('Error in output template: ' + str(err) + ' (encoding: ' + repr(preferredencoding()) + ')')
8222d8de
JMF
593 return None
594
442c37b7 595 def _match_entry(self, info_dict, incomplete):
6ec6cb4e 596 """ Returns None iff the file should be downloaded """
8222d8de 597
6febd1c1 598 video_title = info_dict.get('title', info_dict.get('id', 'video'))
7012b23c
PH
599 if 'title' in info_dict:
600 # This can happen when we're just evaluating the playlist
601 title = info_dict['title']
602 matchtitle = self.params.get('matchtitle', False)
603 if matchtitle:
604 if not re.search(matchtitle, title, re.IGNORECASE):
6febd1c1 605 return '"' + title + '" title did not match pattern "' + matchtitle + '"'
7012b23c
PH
606 rejecttitle = self.params.get('rejecttitle', False)
607 if rejecttitle:
608 if re.search(rejecttitle, title, re.IGNORECASE):
6febd1c1 609 return '"' + title + '" title matched reject pattern "' + rejecttitle + '"'
d800609c 610 date = info_dict.get('upload_date')
8222d8de
JMF
611 if date is not None:
612 dateRange = self.params.get('daterange', DateRange())
613 if date not in dateRange:
6febd1c1 614 return '%s upload date is not in range %s' % (date_from_str(date).isoformat(), dateRange)
d800609c 615 view_count = info_dict.get('view_count')
5fe18bdb
PH
616 if view_count is not None:
617 min_views = self.params.get('min_views')
618 if min_views is not None and view_count < min_views:
6febd1c1 619 return 'Skipping %s, because it has not reached minimum view count (%d/%d)' % (video_title, view_count, min_views)
5fe18bdb
PH
620 max_views = self.params.get('max_views')
621 if max_views is not None and view_count > max_views:
6febd1c1 622 return 'Skipping %s, because it has exceeded the maximum view count (%d/%d)' % (video_title, view_count, max_views)
05900629 623 if age_restricted(info_dict.get('age_limit'), self.params.get('age_limit')):
347de493 624 return 'Skipping "%s" because it is age restricted' % video_title
c1c9a79c 625 if self.in_download_archive(info_dict):
6febd1c1 626 return '%s has already been recorded in archive' % video_title
347de493 627
442c37b7
PH
628 if not incomplete:
629 match_filter = self.params.get('match_filter')
630 if match_filter is not None:
631 ret = match_filter(info_dict)
632 if ret is not None:
633 return ret
347de493 634
8222d8de 635 return None
fe7e0c98 636
b6c45014
JMF
637 @staticmethod
638 def add_extra_info(info_dict, extra_info):
639 '''Set the keys from extra_info in info dict if they are missing'''
640 for key, value in extra_info.items():
641 info_dict.setdefault(key, value)
642
7fc3fa05 643 def extract_info(self, url, download=True, ie_key=None, extra_info={},
61aa5ba3 644 process=True, force_generic_extractor=False):
8222d8de
JMF
645 '''
646 Returns a list with a dictionary for each video we find.
647 If 'download', also downloads the videos.
648 extra_info is a dict containing the extra values to add to each result
613b2d9d 649 '''
fe7e0c98 650
61aa5ba3 651 if not ie_key and force_generic_extractor:
d22dec74
S
652 ie_key = 'Generic'
653
8222d8de 654 if ie_key:
56c73665 655 ies = [self.get_info_extractor(ie_key)]
8222d8de
JMF
656 else:
657 ies = self._ies
658
659 for ie in ies:
660 if not ie.suitable(url):
661 continue
662
663 if not ie.working():
6febd1c1
PH
664 self.report_warning('The program functionality for this site has been marked as broken, '
665 'and will probably not work.')
8222d8de
JMF
666
667 try:
668 ie_result = ie.extract(url)
5f6a1245 669 if ie_result is None: # Finished already (backwards compatibility; listformats and friends should be moved here)
8222d8de
JMF
670 break
671 if isinstance(ie_result, list):
672 # Backwards compatibility: old IE result format
8222d8de
JMF
673 ie_result = {
674 '_type': 'compat_list',
675 'entries': ie_result,
676 }
ea38e55f 677 self.add_default_extra_info(ie_result, ie, url)
7fc3fa05
PH
678 if process:
679 return self.process_ie_result(ie_result, download, extra_info)
680 else:
681 return ie_result
fb043a6e 682 except ExtractorError as e: # An error we somewhat expected
2c74e6fa 683 self.report_error(compat_str(e), e.format_traceback())
8222d8de 684 break
d3e5bbf4
PH
685 except MaxDownloadsReached:
686 raise
8222d8de
JMF
687 except Exception as e:
688 if self.params.get('ignoreerrors', False):
9b9c5355 689 self.report_error(error_to_compat_str(e), tb=encode_compat_str(traceback.format_exc()))
8222d8de
JMF
690 break
691 else:
692 raise
693 else:
1a489545 694 self.report_error('no suitable InfoExtractor for URL %s' % url)
fe7e0c98 695
ea38e55f
PH
696 def add_default_extra_info(self, ie_result, ie, url):
697 self.add_extra_info(ie_result, {
698 'extractor': ie.IE_NAME,
699 'webpage_url': url,
700 'webpage_url_basename': url_basename(url),
701 'extractor_key': ie.ie_key(),
702 })
703
8222d8de
JMF
704 def process_ie_result(self, ie_result, download=True, extra_info={}):
705 """
706 Take the result of the ie(may be modified) and resolve all unresolved
707 references (URLs, playlist items).
708
709 It will also download the videos if 'download'.
710 Returns the resolved ie_result.
711 """
e8ee972c
PH
712 result_type = ie_result.get('_type', 'video')
713
057a5206
PH
714 if result_type in ('url', 'url_transparent'):
715 extract_flat = self.params.get('extract_flat', False)
716 if ((extract_flat == 'in_playlist' and 'playlist' in extra_info) or
717 extract_flat is True):
057a5206
PH
718 if self.params.get('forcejson', False):
719 self.to_stdout(json.dumps(ie_result))
e8ee972c
PH
720 return ie_result
721
8222d8de 722 if result_type == 'video':
b6c45014 723 self.add_extra_info(ie_result, extra_info)
feee2ecf 724 return self.process_video_result(ie_result, download=download)
8222d8de
JMF
725 elif result_type == 'url':
726 # We have to add extra_info to the results because it may be
727 # contained in a playlist
728 return self.extract_info(ie_result['url'],
729 download,
730 ie_key=ie_result.get('ie_key'),
731 extra_info=extra_info)
7fc3fa05
PH
732 elif result_type == 'url_transparent':
733 # Use the information from the embedding page
734 info = self.extract_info(
735 ie_result['url'], ie_key=ie_result.get('ie_key'),
736 extra_info=extra_info, download=False, process=False)
737
412c617d
PH
738 force_properties = dict(
739 (k, v) for k, v in ie_result.items() if v is not None)
b286f201 740 for f in ('_type', 'url', 'ie_key'):
412c617d
PH
741 if f in force_properties:
742 del force_properties[f]
743 new_result = info.copy()
744 new_result.update(force_properties)
7fc3fa05
PH
745
746 assert new_result.get('_type') != 'url_transparent'
7fc3fa05
PH
747
748 return self.process_ie_result(
749 new_result, download=download, extra_info=extra_info)
42e12102 750 elif result_type == 'playlist' or result_type == 'multi_video':
8222d8de 751 # We process each entry in the playlist
d800609c 752 playlist = ie_result.get('title') or ie_result.get('id')
6febd1c1 753 self.to_screen('[download] Downloading playlist: %s' % playlist)
8222d8de
JMF
754
755 playlist_results = []
756
8222d8de 757 playliststart = self.params.get('playliststart', 1) - 1
d800609c 758 playlistend = self.params.get('playlistend')
a19fd00c 759 # For backwards compatibility, interpret -1 as whole list
8222d8de 760 if playlistend == -1:
a19fd00c 761 playlistend = None
8222d8de 762
d800609c 763 playlistitems_str = self.params.get('playlist_items')
c14e88f0
PH
764 playlistitems = None
765 if playlistitems_str is not None:
766 def iter_playlistitems(format):
767 for string_segment in format.split(','):
768 if '-' in string_segment:
769 start, end = string_segment.split('-')
770 for item in range(int(start), int(end) + 1):
771 yield int(item)
772 else:
773 yield int(string_segment)
774 playlistitems = iter_playlistitems(playlistitems_str)
775
b82f815f
PH
776 ie_entries = ie_result['entries']
777 if isinstance(ie_entries, list):
778 n_all_entries = len(ie_entries)
c14e88f0 779 if playlistitems:
3884dcf3
JMF
780 entries = [
781 ie_entries[i - 1] for i in playlistitems
782 if -n_all_entries <= i - 1 < n_all_entries]
c14e88f0
PH
783 else:
784 entries = ie_entries[playliststart:playlistend]
b7ab0590
PH
785 n_entries = len(entries)
786 self.to_screen(
611c1dd9 787 '[%s] playlist %s: Collected %d video ids (downloading %d of them)' %
b7ab0590 788 (ie_result['extractor'], playlist, n_all_entries, n_entries))
b82f815f 789 elif isinstance(ie_entries, PagedList):
c14e88f0
PH
790 if playlistitems:
791 entries = []
792 for item in playlistitems:
793 entries.extend(ie_entries.getslice(
794 item - 1, item
795 ))
796 else:
797 entries = ie_entries.getslice(
798 playliststart, playlistend)
b7ab0590
PH
799 n_entries = len(entries)
800 self.to_screen(
611c1dd9 801 '[%s] playlist %s: Downloading %d videos' %
b7ab0590 802 (ie_result['extractor'], playlist, n_entries))
b82f815f 803 else: # iterable
c14e88f0
PH
804 if playlistitems:
805 entry_list = list(ie_entries)
806 entries = [entry_list[i - 1] for i in playlistitems]
807 else:
808 entries = list(itertools.islice(
809 ie_entries, playliststart, playlistend))
b82f815f
PH
810 n_entries = len(entries)
811 self.to_screen(
611c1dd9 812 '[%s] playlist %s: Downloading %d videos' %
b82f815f 813 (ie_result['extractor'], playlist, n_entries))
8222d8de 814
ff815fe6
MS
815 if self.params.get('playlistreverse', False):
816 entries = entries[::-1]
817
fe7e0c98 818 for i, entry in enumerate(entries, 1):
734ea11e 819 self.to_screen('[download] Downloading video %s of %s' % (i, n_entries))
8222d8de 820 extra = {
c6b4132a 821 'n_entries': n_entries,
fe7e0c98 822 'playlist': playlist,
a1cf99d0
PH
823 'playlist_id': ie_result.get('id'),
824 'playlist_title': ie_result.get('title'),
fe7e0c98 825 'playlist_index': i + playliststart,
b6c45014 826 'extractor': ie_result['extractor'],
9103bbc5 827 'webpage_url': ie_result['webpage_url'],
29eb5174 828 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 829 'extractor_key': ie_result['extractor_key'],
fe7e0c98 830 }
7012b23c 831
442c37b7 832 reason = self._match_entry(entry, incomplete=True)
7012b23c 833 if reason is not None:
6febd1c1 834 self.to_screen('[download] ' + reason)
7012b23c
PH
835 continue
836
8222d8de
JMF
837 entry_result = self.process_ie_result(entry,
838 download=download,
839 extra_info=extra)
840 playlist_results.append(entry_result)
841 ie_result['entries'] = playlist_results
371c3b79 842 self.to_screen('[download] Finished downloading playlist: %s' % playlist)
8222d8de
JMF
843 return ie_result
844 elif result_type == 'compat_list':
c9bf4114
PH
845 self.report_warning(
846 'Extractor %s returned a compat_list result. '
847 'It needs to be updated.' % ie_result.get('extractor'))
5f6a1245 848
8222d8de 849 def _fixup(r):
9e1a5b84
JW
850 self.add_extra_info(
851 r,
9103bbc5
JMF
852 {
853 'extractor': ie_result['extractor'],
854 'webpage_url': ie_result['webpage_url'],
29eb5174 855 'webpage_url_basename': url_basename(ie_result['webpage_url']),
be97abc2 856 'extractor_key': ie_result['extractor_key'],
9e1a5b84
JW
857 }
858 )
8222d8de
JMF
859 return r
860 ie_result['entries'] = [
b6c45014 861 self.process_ie_result(_fixup(r), download, extra_info)
8222d8de
JMF
862 for r in ie_result['entries']
863 ]
864 return ie_result
865 else:
866 raise Exception('Invalid result type: %s' % result_type)
867
67134eab
JMF
868 def _build_format_filter(self, filter_spec):
869 " Returns a function to filter the formats according to the filter_spec "
083c9df9
PH
870
871 OPERATORS = {
872 '<': operator.lt,
873 '<=': operator.le,
874 '>': operator.gt,
875 '>=': operator.ge,
876 '=': operator.eq,
877 '!=': operator.ne,
878 }
67134eab 879 operator_rex = re.compile(r'''(?x)\s*
2ec19e95 880 (?P<key>width|height|tbr|abr|vbr|asr|filesize|fps)
083c9df9
PH
881 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?\s*
882 (?P<value>[0-9.]+(?:[kKmMgGtTpPeEzZyY]i?[Bb]?)?)
67134eab 883 $
083c9df9 884 ''' % '|'.join(map(re.escape, OPERATORS.keys())))
67134eab 885 m = operator_rex.search(filter_spec)
9ddb6925
S
886 if m:
887 try:
888 comparison_value = int(m.group('value'))
889 except ValueError:
890 comparison_value = parse_filesize(m.group('value'))
891 if comparison_value is None:
892 comparison_value = parse_filesize(m.group('value') + 'B')
893 if comparison_value is None:
894 raise ValueError(
895 'Invalid value %r in format specification %r' % (
67134eab 896 m.group('value'), filter_spec))
9ddb6925
S
897 op = OPERATORS[m.group('op')]
898
083c9df9 899 if not m:
9ddb6925
S
900 STR_OPERATORS = {
901 '=': operator.eq,
902 '!=': operator.ne,
10d33b34
YCH
903 '^=': lambda attr, value: attr.startswith(value),
904 '$=': lambda attr, value: attr.endswith(value),
905 '*=': lambda attr, value: value in attr,
9ddb6925 906 }
67134eab 907 str_operator_rex = re.compile(r'''(?x)
9ddb6925
S
908 \s*(?P<key>ext|acodec|vcodec|container|protocol)
909 \s*(?P<op>%s)(?P<none_inclusive>\s*\?)?
b0df5223 910 \s*(?P<value>[a-zA-Z0-9._-]+)
67134eab 911 \s*$
9ddb6925 912 ''' % '|'.join(map(re.escape, STR_OPERATORS.keys())))
67134eab 913 m = str_operator_rex.search(filter_spec)
9ddb6925
S
914 if m:
915 comparison_value = m.group('value')
916 op = STR_OPERATORS[m.group('op')]
083c9df9 917
9ddb6925 918 if not m:
67134eab 919 raise ValueError('Invalid filter specification %r' % filter_spec)
083c9df9
PH
920
921 def _filter(f):
922 actual_value = f.get(m.group('key'))
923 if actual_value is None:
924 return m.group('none_inclusive')
925 return op(actual_value, comparison_value)
67134eab
JMF
926 return _filter
927
928 def build_format_selector(self, format_spec):
929 def syntax_error(note, start):
930 message = (
931 'Invalid format specification: '
932 '{0}\n\t{1}\n\t{2}^'.format(note, format_spec, ' ' * start[1]))
933 return SyntaxError(message)
934
935 PICKFIRST = 'PICKFIRST'
936 MERGE = 'MERGE'
937 SINGLE = 'SINGLE'
0130afb7 938 GROUP = 'GROUP'
67134eab
JMF
939 FormatSelector = collections.namedtuple('FormatSelector', ['type', 'selector', 'filters'])
940
941 def _parse_filter(tokens):
942 filter_parts = []
943 for type, string, start, _, _ in tokens:
944 if type == tokenize.OP and string == ']':
945 return ''.join(filter_parts)
946 else:
947 filter_parts.append(string)
948
232541df 949 def _remove_unused_ops(tokens):
17cc1534 950 # Remove operators that we don't use and join them with the surrounding strings
232541df
JMF
951 # for example: 'mp4' '-' 'baseline' '-' '16x9' is converted to 'mp4-baseline-16x9'
952 ALLOWED_OPS = ('/', '+', ',', '(', ')')
953 last_string, last_start, last_end, last_line = None, None, None, None
954 for type, string, start, end, line in tokens:
955 if type == tokenize.OP and string == '[':
956 if last_string:
957 yield tokenize.NAME, last_string, last_start, last_end, last_line
958 last_string = None
959 yield type, string, start, end, line
960 # everything inside brackets will be handled by _parse_filter
961 for type, string, start, end, line in tokens:
962 yield type, string, start, end, line
963 if type == tokenize.OP and string == ']':
964 break
965 elif type == tokenize.OP and string in ALLOWED_OPS:
966 if last_string:
967 yield tokenize.NAME, last_string, last_start, last_end, last_line
968 last_string = None
969 yield type, string, start, end, line
970 elif type in [tokenize.NAME, tokenize.NUMBER, tokenize.OP]:
971 if not last_string:
972 last_string = string
973 last_start = start
974 last_end = end
975 else:
976 last_string += string
977 if last_string:
978 yield tokenize.NAME, last_string, last_start, last_end, last_line
979
cf2ac6df 980 def _parse_format_selection(tokens, inside_merge=False, inside_choice=False, inside_group=False):
67134eab
JMF
981 selectors = []
982 current_selector = None
983 for type, string, start, _, _ in tokens:
984 # ENCODING is only defined in python 3.x
985 if type == getattr(tokenize, 'ENCODING', None):
986 continue
987 elif type in [tokenize.NAME, tokenize.NUMBER]:
988 current_selector = FormatSelector(SINGLE, string, [])
989 elif type == tokenize.OP:
cf2ac6df
JMF
990 if string == ')':
991 if not inside_group:
992 # ')' will be handled by the parentheses group
993 tokens.restore_last_token()
67134eab 994 break
cf2ac6df 995 elif inside_merge and string in ['/', ',']:
0130afb7
JMF
996 tokens.restore_last_token()
997 break
cf2ac6df
JMF
998 elif inside_choice and string == ',':
999 tokens.restore_last_token()
1000 break
1001 elif string == ',':
0a31a350
JMF
1002 if not current_selector:
1003 raise syntax_error('"," must follow a format selector', start)
67134eab
JMF
1004 selectors.append(current_selector)
1005 current_selector = None
1006 elif string == '/':
d96d604e
JMF
1007 if not current_selector:
1008 raise syntax_error('"/" must follow a format selector', start)
67134eab 1009 first_choice = current_selector
cf2ac6df 1010 second_choice = _parse_format_selection(tokens, inside_choice=True)
f5f4a27a 1011 current_selector = FormatSelector(PICKFIRST, (first_choice, second_choice), [])
67134eab
JMF
1012 elif string == '[':
1013 if not current_selector:
1014 current_selector = FormatSelector(SINGLE, 'best', [])
1015 format_filter = _parse_filter(tokens)
1016 current_selector.filters.append(format_filter)
0130afb7
JMF
1017 elif string == '(':
1018 if current_selector:
1019 raise syntax_error('Unexpected "("', start)
cf2ac6df
JMF
1020 group = _parse_format_selection(tokens, inside_group=True)
1021 current_selector = FormatSelector(GROUP, group, [])
67134eab
JMF
1022 elif string == '+':
1023 video_selector = current_selector
cf2ac6df 1024 audio_selector = _parse_format_selection(tokens, inside_merge=True)
0a31a350
JMF
1025 if not video_selector or not audio_selector:
1026 raise syntax_error('"+" must be between two format selectors', start)
cf2ac6df 1027 current_selector = FormatSelector(MERGE, (video_selector, audio_selector), [])
67134eab
JMF
1028 else:
1029 raise syntax_error('Operator not recognized: "{0}"'.format(string), start)
1030 elif type == tokenize.ENDMARKER:
1031 break
1032 if current_selector:
1033 selectors.append(current_selector)
1034 return selectors
1035
1036 def _build_selector_function(selector):
1037 if isinstance(selector, list):
1038 fs = [_build_selector_function(s) for s in selector]
1039
1040 def selector_function(formats):
1041 for f in fs:
1042 for format in f(formats):
1043 yield format
1044 return selector_function
0130afb7
JMF
1045 elif selector.type == GROUP:
1046 selector_function = _build_selector_function(selector.selector)
67134eab
JMF
1047 elif selector.type == PICKFIRST:
1048 fs = [_build_selector_function(s) for s in selector.selector]
1049
1050 def selector_function(formats):
1051 for f in fs:
1052 picked_formats = list(f(formats))
1053 if picked_formats:
1054 return picked_formats
1055 return []
1056 elif selector.type == SINGLE:
1057 format_spec = selector.selector
1058
1059 def selector_function(formats):
bb8e5536
JMF
1060 formats = list(formats)
1061 if not formats:
1062 return
5acfa126
JMF
1063 if format_spec == 'all':
1064 for f in formats:
1065 yield f
1066 elif format_spec in ['best', 'worst', None]:
67134eab
JMF
1067 format_idx = 0 if format_spec == 'worst' else -1
1068 audiovideo_formats = [
1069 f for f in formats
1070 if f.get('vcodec') != 'none' and f.get('acodec') != 'none']
1071 if audiovideo_formats:
1072 yield audiovideo_formats[format_idx]
1073 # for audio only (soundcloud) or video only (imgur) urls, select the best/worst audio format
1074 elif (all(f.get('acodec') != 'none' for f in formats) or
1075 all(f.get('vcodec') != 'none' for f in formats)):
1076 yield formats[format_idx]
1077 elif format_spec == 'bestaudio':
1078 audio_formats = [
1079 f for f in formats
1080 if f.get('vcodec') == 'none']
1081 if audio_formats:
1082 yield audio_formats[-1]
1083 elif format_spec == 'worstaudio':
1084 audio_formats = [
1085 f for f in formats
1086 if f.get('vcodec') == 'none']
1087 if audio_formats:
1088 yield audio_formats[0]
1089 elif format_spec == 'bestvideo':
1090 video_formats = [
1091 f for f in formats
1092 if f.get('acodec') == 'none']
1093 if video_formats:
1094 yield video_formats[-1]
1095 elif format_spec == 'worstvideo':
1096 video_formats = [
1097 f for f in formats
1098 if f.get('acodec') == 'none']
1099 if video_formats:
1100 yield video_formats[0]
1101 else:
1102 extensions = ['mp4', 'flv', 'webm', '3gp', 'm4a', 'mp3', 'ogg', 'aac', 'wav']
1103 if format_spec in extensions:
1104 filter_f = lambda f: f['ext'] == format_spec
1105 else:
1106 filter_f = lambda f: f['format_id'] == format_spec
1107 matches = list(filter(filter_f, formats))
1108 if matches:
1109 yield matches[-1]
1110 elif selector.type == MERGE:
1111 def _merge(formats_info):
1112 format_1, format_2 = [f['format_id'] for f in formats_info]
1113 # The first format must contain the video and the
1114 # second the audio
1115 if formats_info[0].get('vcodec') == 'none':
1116 self.report_error('The first format must '
1117 'contain the video, try using '
1118 '"-f %s+%s"' % (format_2, format_1))
1119 return
3d24bbfb
S
1120 # Formats must be opposite (video+audio)
1121 if formats_info[0].get('acodec') == 'none' and formats_info[1].get('acodec') == 'none':
1122 self.report_error(
1123 'Both formats %s and %s are video-only, you must specify "-f video+audio"'
1124 % (format_1, format_2))
1125 return
67134eab
JMF
1126 output_ext = (
1127 formats_info[0]['ext']
1128 if self.params.get('merge_output_format') is None
1129 else self.params['merge_output_format'])
1130 return {
1131 'requested_formats': formats_info,
1132 'format': '%s+%s' % (formats_info[0].get('format'),
1133 formats_info[1].get('format')),
1134 'format_id': '%s+%s' % (formats_info[0].get('format_id'),
1135 formats_info[1].get('format_id')),
1136 'width': formats_info[0].get('width'),
1137 'height': formats_info[0].get('height'),
1138 'resolution': formats_info[0].get('resolution'),
1139 'fps': formats_info[0].get('fps'),
1140 'vcodec': formats_info[0].get('vcodec'),
1141 'vbr': formats_info[0].get('vbr'),
1142 'stretched_ratio': formats_info[0].get('stretched_ratio'),
1143 'acodec': formats_info[1].get('acodec'),
1144 'abr': formats_info[1].get('abr'),
1145 'ext': output_ext,
1146 }
1147 video_selector, audio_selector = map(_build_selector_function, selector.selector)
083c9df9 1148
67134eab
JMF
1149 def selector_function(formats):
1150 formats = list(formats)
1151 for pair in itertools.product(video_selector(formats), audio_selector(formats)):
1152 yield _merge(pair)
083c9df9 1153
67134eab 1154 filters = [self._build_format_filter(f) for f in selector.filters]
083c9df9 1155
67134eab
JMF
1156 def final_selector(formats):
1157 for _filter in filters:
1158 formats = list(filter(_filter, formats))
1159 return selector_function(formats)
1160 return final_selector
083c9df9 1161
67134eab 1162 stream = io.BytesIO(format_spec.encode('utf-8'))
0130afb7 1163 try:
232541df 1164 tokens = list(_remove_unused_ops(compat_tokenize_tokenize(stream.readline)))
0130afb7
JMF
1165 except tokenize.TokenError:
1166 raise syntax_error('Missing closing/opening brackets or parenthesis', (0, len(format_spec)))
1167
1168 class TokenIterator(object):
1169 def __init__(self, tokens):
1170 self.tokens = tokens
1171 self.counter = 0
1172
1173 def __iter__(self):
1174 return self
1175
1176 def __next__(self):
1177 if self.counter >= len(self.tokens):
1178 raise StopIteration()
1179 value = self.tokens[self.counter]
1180 self.counter += 1
1181 return value
1182
1183 next = __next__
1184
1185 def restore_last_token(self):
1186 self.counter -= 1
1187
1188 parsed_selector = _parse_format_selection(iter(TokenIterator(tokens)))
67134eab 1189 return _build_selector_function(parsed_selector)
a9c58ad9 1190
e5660ee6
JMF
1191 def _calc_headers(self, info_dict):
1192 res = std_headers.copy()
1193
1194 add_headers = info_dict.get('http_headers')
1195 if add_headers:
1196 res.update(add_headers)
1197
1198 cookies = self._calc_cookies(info_dict)
1199 if cookies:
1200 res['Cookie'] = cookies
1201
1202 return res
1203
1204 def _calc_cookies(self, info_dict):
5c2266df 1205 pr = sanitized_Request(info_dict['url'])
e5660ee6 1206 self.cookiejar.add_cookie_header(pr)
662435f7 1207 return pr.get_header('Cookie')
e5660ee6 1208
dd82ffea
JMF
1209 def process_video_result(self, info_dict, download=True):
1210 assert info_dict.get('_type', 'video') == 'video'
1211
bec1fad2
PH
1212 if 'id' not in info_dict:
1213 raise ExtractorError('Missing "id" field in extractor result')
1214 if 'title' not in info_dict:
1215 raise ExtractorError('Missing "title" field in extractor result')
1216
dd82ffea
JMF
1217 if 'playlist' not in info_dict:
1218 # It isn't part of a playlist
1219 info_dict['playlist'] = None
1220 info_dict['playlist_index'] = None
1221
d5519808 1222 thumbnails = info_dict.get('thumbnails')
cfb56d1a
PH
1223 if thumbnails is None:
1224 thumbnail = info_dict.get('thumbnail')
1225 if thumbnail:
a7a14d95 1226 info_dict['thumbnails'] = thumbnails = [{'url': thumbnail}]
d5519808 1227 if thumbnails:
be6d7229 1228 thumbnails.sort(key=lambda t: (
cfb56d1a
PH
1229 t.get('preference'), t.get('width'), t.get('height'),
1230 t.get('id'), t.get('url')))
f6c24009 1231 for i, t in enumerate(thumbnails):
9603e8a7 1232 if t.get('width') and t.get('height'):
d5519808 1233 t['resolution'] = '%dx%d' % (t['width'], t['height'])
f6c24009
PH
1234 if t.get('id') is None:
1235 t['id'] = '%d' % i
d5519808
PH
1236
1237 if thumbnails and 'thumbnail' not in info_dict:
1238 info_dict['thumbnail'] = thumbnails[-1]['url']
1239
c9ae7b95 1240 if 'display_id' not in info_dict and 'id' in info_dict:
0afef30b
PH
1241 info_dict['display_id'] = info_dict['id']
1242
955c4514 1243 if info_dict.get('upload_date') is None and info_dict.get('timestamp') is not None:
a55e36f4
S
1244 # Working around out-of-range timestamp values (e.g. negative ones on Windows,
1245 # see http://bugs.python.org/issue1646728)
1246 try:
1247 upload_date = datetime.datetime.utcfromtimestamp(info_dict['timestamp'])
1248 info_dict['upload_date'] = upload_date.strftime('%Y%m%d')
1249 except (ValueError, OverflowError, OSError):
1250 pass
9d2ecdbc 1251
33d2fc2f
S
1252 # Auto generate title fields corresponding to the *_number fields when missing
1253 # in order to always have clean titles. This is very common for TV series.
1254 for field in ('chapter', 'season', 'episode'):
1255 if info_dict.get('%s_number' % field) is not None and not info_dict.get(field):
1256 info_dict[field] = '%s %d' % (field.capitalize(), info_dict['%s_number' % field])
1257
4bba3716
S
1258 subtitles = info_dict.get('subtitles')
1259 if subtitles:
1260 for _, subtitle in subtitles.items():
1261 for subtitle_format in subtitle:
1262 if 'ext' not in subtitle_format:
1263 subtitle_format['ext'] = determine_ext(subtitle_format['url']).lower()
1264
a504ced0 1265 if self.params.get('listsubtitles', False):
360e1ca5
JMF
1266 if 'automatic_captions' in info_dict:
1267 self.list_subtitles(info_dict['id'], info_dict.get('automatic_captions'), 'automatic captions')
4bba3716 1268 self.list_subtitles(info_dict['id'], subtitles, 'subtitles')
a504ced0 1269 return
360e1ca5 1270 info_dict['requested_subtitles'] = self.process_subtitles(
4bba3716 1271 info_dict['id'], subtitles,
360e1ca5 1272 info_dict.get('automatic_captions'))
a504ced0 1273
dd82ffea
JMF
1274 # We now pick which formats have to be downloaded
1275 if info_dict.get('formats') is None:
1276 # There's only one format available
1277 formats = [info_dict]
1278 else:
1279 formats = info_dict['formats']
1280
db95dc13
PH
1281 if not formats:
1282 raise ExtractorError('No video formats found!')
1283
181c7053
S
1284 formats_dict = {}
1285
dd82ffea 1286 # We check that all the formats have the format and format_id fields
db95dc13 1287 for i, format in enumerate(formats):
bec1fad2
PH
1288 if 'url' not in format:
1289 raise ExtractorError('Missing "url" key in result (index %d)' % i)
1290
dd82ffea 1291 if format.get('format_id') is None:
8016c922 1292 format['format_id'] = compat_str(i)
e2effb08
S
1293 else:
1294 # Sanitize format_id from characters used in format selector expression
1295 format['format_id'] = re.sub('[\s,/+\[\]()]', '_', format['format_id'])
181c7053
S
1296 format_id = format['format_id']
1297 if format_id not in formats_dict:
1298 formats_dict[format_id] = []
1299 formats_dict[format_id].append(format)
1300
1301 # Make sure all formats have unique format_id
1302 for format_id, ambiguous_formats in formats_dict.items():
1303 if len(ambiguous_formats) > 1:
1304 for i, format in enumerate(ambiguous_formats):
1305 format['format_id'] = '%s-%d' % (format_id, i)
1306
1307 for i, format in enumerate(formats):
8c51aa65 1308 if format.get('format') is None:
6febd1c1 1309 format['format'] = '{id} - {res}{note}'.format(
8c51aa65
JMF
1310 id=format['format_id'],
1311 res=self.format_resolution(format),
6febd1c1 1312 note=' ({0})'.format(format['format_note']) if format.get('format_note') is not None else '',
8c51aa65 1313 )
c1002e96
PH
1314 # Automatically determine file extension if missing
1315 if 'ext' not in format:
cce929ea 1316 format['ext'] = determine_ext(format['url']).lower()
b5559424
S
1317 # Automatically determine protocol if missing (useful for format
1318 # selection purposes)
1319 if 'protocol' not in format:
1320 format['protocol'] = determine_protocol(format)
e5660ee6
JMF
1321 # Add HTTP headers, so that external programs can use them from the
1322 # json output
1323 full_format_info = info_dict.copy()
1324 full_format_info.update(format)
1325 format['http_headers'] = self._calc_headers(full_format_info)
dd82ffea 1326
4bcc7bd1 1327 # TODO Central sorting goes here
99e206d5 1328
f89197d7 1329 if formats[0] is not info_dict:
b3d9ef88
JMF
1330 # only set the 'formats' fields if the original info_dict list them
1331 # otherwise we end up with a circular reference, the first (and unique)
f89197d7 1332 # element in the 'formats' field in info_dict is info_dict itself,
dfb1b146 1333 # which can't be exported to json
b3d9ef88 1334 info_dict['formats'] = formats
cfb56d1a 1335 if self.params.get('listformats'):
bfaae0a7 1336 self.list_formats(info_dict)
1337 return
cfb56d1a
PH
1338 if self.params.get('list_thumbnails'):
1339 self.list_thumbnails(info_dict)
1340 return
bfaae0a7 1341
de3ef3ed 1342 req_format = self.params.get('format')
a9c58ad9 1343 if req_format is None:
feccf29c 1344 req_format_list = []
3749e36e 1345 if (self.params.get('outtmpl', DEFAULT_OUTTMPL) != '-' and
8250c32f 1346 not info_dict.get('is_live')):
7fcb605b 1347 merger = FFmpegMergerPP(self)
97fcf1bb 1348 if merger.available and merger.can_merge():
7fcb605b 1349 req_format_list.append('bestvideo+bestaudio')
feccf29c
S
1350 req_format_list.append('best')
1351 req_format = '/'.join(req_format_list)
5acfa126
JMF
1352 format_selector = self.build_format_selector(req_format)
1353 formats_to_download = list(format_selector(formats))
dd82ffea 1354 if not formats_to_download:
6febd1c1 1355 raise ExtractorError('requested format not available',
78a3a9f8 1356 expected=True)
dd82ffea
JMF
1357
1358 if download:
1359 if len(formats_to_download) > 1:
6febd1c1 1360 self.to_screen('[info] %s: downloading video in %s formats' % (info_dict['id'], len(formats_to_download)))
dd82ffea
JMF
1361 for format in formats_to_download:
1362 new_info = dict(info_dict)
1363 new_info.update(format)
1364 self.process_info(new_info)
1365 # We update the info dict with the best quality format (backwards compatibility)
1366 info_dict.update(formats_to_download[-1])
1367 return info_dict
1368
98c70d6f 1369 def process_subtitles(self, video_id, normal_subtitles, automatic_captions):
a504ced0 1370 """Select the requested subtitles and their format"""
98c70d6f
JMF
1371 available_subs = {}
1372 if normal_subtitles and self.params.get('writesubtitles'):
1373 available_subs.update(normal_subtitles)
1374 if automatic_captions and self.params.get('writeautomaticsub'):
1375 for lang, cap_info in automatic_captions.items():
360e1ca5
JMF
1376 if lang not in available_subs:
1377 available_subs[lang] = cap_info
1378
4d171848
JMF
1379 if (not self.params.get('writesubtitles') and not
1380 self.params.get('writeautomaticsub') or not
1381 available_subs):
1382 return None
a504ced0
JMF
1383
1384 if self.params.get('allsubtitles', False):
1385 requested_langs = available_subs.keys()
1386 else:
1387 if self.params.get('subtitleslangs', False):
1388 requested_langs = self.params.get('subtitleslangs')
1389 elif 'en' in available_subs:
1390 requested_langs = ['en']
1391 else:
1392 requested_langs = [list(available_subs.keys())[0]]
1393
1394 formats_query = self.params.get('subtitlesformat', 'best')
1395 formats_preference = formats_query.split('/') if formats_query else []
1396 subs = {}
1397 for lang in requested_langs:
1398 formats = available_subs.get(lang)
1399 if formats is None:
1400 self.report_warning('%s subtitles not available for %s' % (lang, video_id))
1401 continue
a504ced0
JMF
1402 for ext in formats_preference:
1403 if ext == 'best':
1404 f = formats[-1]
1405 break
1406 matches = list(filter(lambda f: f['ext'] == ext, formats))
1407 if matches:
1408 f = matches[-1]
1409 break
1410 else:
1411 f = formats[-1]
1412 self.report_warning(
1413 'No subtitle format found matching "%s" for language %s, '
1414 'using %s' % (formats_query, lang, f['ext']))
1415 subs[lang] = f
1416 return subs
1417
8222d8de
JMF
1418 def process_info(self, info_dict):
1419 """Process a single resolved IE result."""
1420
1421 assert info_dict.get('_type', 'video') == 'video'
fd288278
PH
1422
1423 max_downloads = self.params.get('max_downloads')
1424 if max_downloads is not None:
1425 if self._num_downloads >= int(max_downloads):
1426 raise MaxDownloadsReached()
8222d8de
JMF
1427
1428 info_dict['fulltitle'] = info_dict['title']
1429 if len(info_dict['title']) > 200:
6febd1c1 1430 info_dict['title'] = info_dict['title'][:197] + '...'
8222d8de 1431
11b85ce6 1432 if 'format' not in info_dict:
8222d8de
JMF
1433 info_dict['format'] = info_dict['ext']
1434
442c37b7 1435 reason = self._match_entry(info_dict, incomplete=False)
8222d8de 1436 if reason is not None:
6febd1c1 1437 self.to_screen('[download] ' + reason)
8222d8de
JMF
1438 return
1439
fd288278 1440 self._num_downloads += 1
8222d8de 1441
e72c7e41 1442 info_dict['_filename'] = filename = self.prepare_filename(info_dict)
8222d8de
JMF
1443
1444 # Forced printings
1445 if self.params.get('forcetitle', False):
0783b09b 1446 self.to_stdout(info_dict['fulltitle'])
8222d8de 1447 if self.params.get('forceid', False):
0783b09b 1448 self.to_stdout(info_dict['id'])
8222d8de 1449 if self.params.get('forceurl', False):
16ae61f6 1450 if info_dict.get('requested_formats') is not None:
1451 for f in info_dict['requested_formats']:
1452 self.to_stdout(f['url'] + f.get('play_path', ''))
1453 else:
1454 # For RTMP URLs, also include the playpath
1455 self.to_stdout(info_dict['url'] + info_dict.get('play_path', ''))
216d71d0 1456 if self.params.get('forcethumbnail', False) and info_dict.get('thumbnail') is not None:
0783b09b 1457 self.to_stdout(info_dict['thumbnail'])
216d71d0 1458 if self.params.get('forcedescription', False) and info_dict.get('description') is not None:
0783b09b 1459 self.to_stdout(info_dict['description'])
8222d8de 1460 if self.params.get('forcefilename', False) and filename is not None:
0783b09b 1461 self.to_stdout(filename)
525ef922
PH
1462 if self.params.get('forceduration', False) and info_dict.get('duration') is not None:
1463 self.to_stdout(formatSeconds(info_dict['duration']))
8222d8de 1464 if self.params.get('forceformat', False):
0783b09b 1465 self.to_stdout(info_dict['format'])
9d153818 1466 if self.params.get('forcejson', False):
0783b09b 1467 self.to_stdout(json.dumps(info_dict))
8222d8de
JMF
1468
1469 # Do nothing else if in simulate mode
1470 if self.params.get('simulate', False):
1471 return
1472
1473 if filename is None:
1474 return
1475
1476 try:
e5a11a22 1477 dn = os.path.dirname(sanitize_path(encodeFilename(filename)))
d26e981d 1478 if dn and not os.path.exists(dn):
8222d8de
JMF
1479 os.makedirs(dn)
1480 except (OSError, IOError) as err:
9b9c5355 1481 self.report_error('unable to create directory ' + error_to_compat_str(err))
8222d8de
JMF
1482 return
1483
1484 if self.params.get('writedescription', False):
2699da80 1485 descfn = replace_extension(filename, 'description', info_dict.get('ext'))
7b6fefc9 1486 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(descfn)):
6febd1c1 1487 self.to_screen('[info] Video description is already present')
f00fd51d
JMF
1488 elif info_dict.get('description') is None:
1489 self.report_warning('There\'s no description to write.')
7b6fefc9
PH
1490 else:
1491 try:
6febd1c1 1492 self.to_screen('[info] Writing video description to: ' + descfn)
7b6fefc9
PH
1493 with io.open(encodeFilename(descfn), 'w', encoding='utf-8') as descfile:
1494 descfile.write(info_dict['description'])
7b6fefc9 1495 except (OSError, IOError):
6febd1c1 1496 self.report_error('Cannot write description file ' + descfn)
7b6fefc9 1497 return
8222d8de 1498
1fb07d10 1499 if self.params.get('writeannotations', False):
98727e12 1500 annofn = replace_extension(filename, 'annotations.xml', info_dict.get('ext'))
7b6fefc9 1501 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(annofn)):
6febd1c1 1502 self.to_screen('[info] Video annotations are already present')
7b6fefc9
PH
1503 else:
1504 try:
6febd1c1 1505 self.to_screen('[info] Writing video annotations to: ' + annofn)
7b6fefc9
PH
1506 with io.open(encodeFilename(annofn), 'w', encoding='utf-8') as annofile:
1507 annofile.write(info_dict['annotations'])
1508 except (KeyError, TypeError):
6febd1c1 1509 self.report_warning('There are no annotations to write.')
7b6fefc9 1510 except (OSError, IOError):
6febd1c1 1511 self.report_error('Cannot write annotations file: ' + annofn)
7b6fefc9 1512 return
1fb07d10 1513
c4a91be7 1514 subtitles_are_requested = any([self.params.get('writesubtitles', False),
0b7f3118 1515 self.params.get('writeautomaticsub')])
c4a91be7 1516
c84dd8a9 1517 if subtitles_are_requested and info_dict.get('requested_subtitles'):
8222d8de
JMF
1518 # subtitles download errors are already managed as troubles in relevant IE
1519 # that way it will silently go on when used with unsupporting IE
c84dd8a9 1520 subtitles = info_dict['requested_subtitles']
0f2c0d33 1521 ie = self.get_info_extractor(info_dict['extractor_key'])
a504ced0
JMF
1522 for sub_lang, sub_info in subtitles.items():
1523 sub_format = sub_info['ext']
1524 if sub_info.get('data') is not None:
1525 sub_data = sub_info['data']
1526 else:
1527 try:
0f2c0d33
JMF
1528 sub_data = ie._download_webpage(
1529 sub_info['url'], info_dict['id'], note=False)
1530 except ExtractorError as err:
a504ced0 1531 self.report_warning('Unable to download subtitle for "%s": %s' %
9b9c5355 1532 (sub_lang, error_to_compat_str(err.cause)))
a504ced0 1533 continue
8222d8de 1534 try:
d4051a8e 1535 sub_filename = subtitles_filename(filename, sub_lang, sub_format)
7b6fefc9 1536 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(sub_filename)):
6febd1c1 1537 self.to_screen('[info] Video subtitle %s.%s is already_present' % (sub_lang, sub_format))
7b6fefc9 1538 else:
6febd1c1 1539 self.to_screen('[info] Writing video subtitles to: ' + sub_filename)
7b6fefc9 1540 with io.open(encodeFilename(sub_filename), 'w', encoding='utf-8') as subfile:
a504ced0 1541 subfile.write(sub_data)
8222d8de 1542 except (OSError, IOError):
e4db1951 1543 self.report_error('Cannot write subtitles file ' + sub_filename)
8222d8de
JMF
1544 return
1545
8222d8de 1546 if self.params.get('writeinfojson', False):
b29e0000 1547 infofn = replace_extension(filename, 'info.json', info_dict.get('ext'))
7b6fefc9 1548 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(infofn)):
6febd1c1 1549 self.to_screen('[info] Video description metadata is already present')
7b6fefc9 1550 else:
6febd1c1 1551 self.to_screen('[info] Writing video description metadata as JSON to: ' + infofn)
7b6fefc9 1552 try:
cb202fd2 1553 write_json_file(self.filter_requested_info(info_dict), infofn)
7b6fefc9 1554 except (OSError, IOError):
6febd1c1 1555 self.report_error('Cannot write metadata to JSON file ' + infofn)
7b6fefc9 1556 return
8222d8de 1557
ec82d85a 1558 self._write_thumbnails(info_dict, filename)
8222d8de
JMF
1559
1560 if not self.params.get('skip_download', False):
4340deca
P
1561 try:
1562 def dl(name, info):
a055469f 1563 fd = get_suitable_downloader(info, self.params)(self, self.params)
4340deca
P
1564 for ph in self._progress_hooks:
1565 fd.add_progress_hook(ph)
1566 if self.params.get('verbose'):
1567 self.to_stdout('[debug] Invoking downloader on %r' % info.get('url'))
1568 return fd.download(name, info)
ee69b99a 1569
4340deca
P
1570 if info_dict.get('requested_formats') is not None:
1571 downloaded = []
1572 success = True
d47aeb22 1573 merger = FFmpegMergerPP(self)
f740fae2 1574 if not merger.available:
4340deca
P
1575 postprocessors = []
1576 self.report_warning('You have requested multiple '
1577 'formats but ffmpeg or avconv are not installed.'
4a5a898a 1578 ' The formats won\'t be merged.')
6350728b 1579 else:
4340deca 1580 postprocessors = [merger]
81cd954a
S
1581
1582 def compatible_formats(formats):
1583 video, audio = formats
1584 # Check extension
1585 video_ext, audio_ext = audio.get('ext'), video.get('ext')
1586 if video_ext and audio_ext:
1587 COMPATIBLE_EXTS = (
6728187a 1588 ('mp3', 'mp4', 'm4a', 'm4p', 'm4b', 'm4r', 'm4v'),
81cd954a
S
1589 ('webm')
1590 )
1591 for exts in COMPATIBLE_EXTS:
1592 if video_ext in exts and audio_ext in exts:
1593 return True
1594 # TODO: Check acodec/vcodec
1595 return False
1596
38c6902b
S
1597 filename_real_ext = os.path.splitext(filename)[1][1:]
1598 filename_wo_ext = (
1599 os.path.splitext(filename)[0]
1600 if filename_real_ext == info_dict['ext']
1601 else filename)
81cd954a 1602 requested_formats = info_dict['requested_formats']
c0dea0a7 1603 if self.params.get('merge_output_format') is None and not compatible_formats(requested_formats):
38c6902b 1604 info_dict['ext'] = 'mkv'
4a5a898a
S
1605 self.report_warning(
1606 'Requested formats are incompatible for merge and will be merged into mkv.')
38c6902b
S
1607 # Ensure filename always has a correct extension for successful merge
1608 filename = '%s.%s' % (filename_wo_ext, info_dict['ext'])
5b5fbc08
JMF
1609 if os.path.exists(encodeFilename(filename)):
1610 self.to_screen(
1611 '[download] %s has already been downloaded and '
1612 'merged' % filename)
1613 else:
81cd954a 1614 for f in requested_formats:
5b5fbc08
JMF
1615 new_info = dict(info_dict)
1616 new_info.update(f)
1617 fname = self.prepare_filename(new_info)
666a9a2b 1618 fname = prepend_extension(fname, 'f%s' % f['format_id'], new_info['ext'])
5b5fbc08
JMF
1619 downloaded.append(fname)
1620 partial_success = dl(fname, new_info)
1621 success = success and partial_success
1622 info_dict['__postprocessors'] = postprocessors
1623 info_dict['__files_to_merge'] = downloaded
4340deca
P
1624 else:
1625 # Just a single file
1626 success = dl(filename, info_dict)
1627 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
1628 self.report_error('unable to download video data: %s' % str(err))
1629 return
1630 except (OSError, IOError) as err:
1631 raise UnavailableVideoError(err)
1632 except (ContentTooShortError, ) as err:
1633 self.report_error('content too short (expected %s bytes and served %s)' % (err.expected, err.downloaded))
1634 return
8222d8de 1635
e38cafe9 1636 if success and filename != '-':
6271f1ca 1637 # Fixup content
62cd676c
PH
1638 fixup_policy = self.params.get('fixup')
1639 if fixup_policy is None:
1640 fixup_policy = 'detect_or_warn'
1641
6271f1ca
PH
1642 stretched_ratio = info_dict.get('stretched_ratio')
1643 if stretched_ratio is not None and stretched_ratio != 1:
6271f1ca
PH
1644 if fixup_policy == 'warn':
1645 self.report_warning('%s: Non-uniform pixel ratio (%s)' % (
1646 info_dict['id'], stretched_ratio))
1647 elif fixup_policy == 'detect_or_warn':
1648 stretched_pp = FFmpegFixupStretchedPP(self)
1649 if stretched_pp.available:
1650 info_dict.setdefault('__postprocessors', [])
1651 info_dict['__postprocessors'].append(stretched_pp)
1652 else:
1653 self.report_warning(
1654 '%s: Non-uniform pixel ratio (%s). Install ffmpeg or avconv to fix this automatically.' % (
1655 info_dict['id'], stretched_ratio))
1656 else:
62cd676c
PH
1657 assert fixup_policy in ('ignore', 'never')
1658
1659 if info_dict.get('requested_formats') is None and info_dict.get('container') == 'm4a_dash':
1660 if fixup_policy == 'warn':
1661 self.report_warning('%s: writing DASH m4a. Only some players support this container.' % (
1662 info_dict['id']))
1663 elif fixup_policy == 'detect_or_warn':
1664 fixup_pp = FFmpegFixupM4aPP(self)
1665 if fixup_pp.available:
1666 info_dict.setdefault('__postprocessors', [])
1667 info_dict['__postprocessors'].append(fixup_pp)
1668 else:
1669 self.report_warning(
1670 '%s: writing DASH m4a. Only some players support this container. Install ffmpeg or avconv to fix this automatically.' % (
1671 info_dict['id']))
1672 else:
1673 assert fixup_policy in ('ignore', 'never')
6271f1ca 1674
ff059017 1675 if info_dict.get('protocol') == 'm3u8_native' or info_dict.get('protocol') == 'm3u8' and self.params.get('hls_prefer_native', False):
f17f8651 1676 if fixup_policy == 'warn':
1677 self.report_warning('%s: malformated aac bitstream.' % (
1678 info_dict['id']))
1679 elif fixup_policy == 'detect_or_warn':
1680 fixup_pp = FFmpegFixupM3u8PP(self)
1681 if fixup_pp.available:
1682 info_dict.setdefault('__postprocessors', [])
1683 info_dict['__postprocessors'].append(fixup_pp)
1684 else:
1685 self.report_warning(
1686 '%s: malformated aac bitstream. Install ffmpeg or avconv to fix this automatically.' % (
1687 info_dict['id']))
1688 else:
1689 assert fixup_policy in ('ignore', 'never')
1690
8222d8de
JMF
1691 try:
1692 self.post_process(filename, info_dict)
1693 except (PostProcessingError) as err:
6febd1c1 1694 self.report_error('postprocessing: %s' % str(err))
8222d8de 1695 return
cd58dc3e 1696 self.record_download_archive(info_dict)
8222d8de
JMF
1697
1698 def download(self, url_list):
1699 """Download a given list of URLs."""
acd69589 1700 outtmpl = self.params.get('outtmpl', DEFAULT_OUTTMPL)
0c75c3fa 1701 if (len(url_list) > 1 and
8fb3ac36
PH
1702 '%' not in outtmpl and
1703 self.params.get('max_downloads') != 1):
acd69589 1704 raise SameFileError(outtmpl)
8222d8de
JMF
1705
1706 for url in url_list:
1707 try:
5f6a1245 1708 # It also downloads the videos
61aa5ba3
S
1709 res = self.extract_info(
1710 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
8222d8de 1711 except UnavailableVideoError:
6febd1c1 1712 self.report_error('unable to download video')
8222d8de 1713 except MaxDownloadsReached:
6febd1c1 1714 self.to_screen('[info] Maximum number of downloaded files reached.')
8222d8de 1715 raise
63e0be34
PH
1716 else:
1717 if self.params.get('dump_single_json', False):
1718 self.to_stdout(json.dumps(res))
8222d8de
JMF
1719
1720 return self._download_retcode
1721
1dcc4c0c 1722 def download_with_info_file(self, info_filename):
31bd3925
JMF
1723 with contextlib.closing(fileinput.FileInput(
1724 [info_filename], mode='r',
1725 openhook=fileinput.hook_encoded('utf-8'))) as f:
1726 # FileInput doesn't have a read method, we can't call json.load
cb202fd2 1727 info = self.filter_requested_info(json.loads('\n'.join(f)))
d4943898
JMF
1728 try:
1729 self.process_ie_result(info, download=True)
1730 except DownloadError:
1731 webpage_url = info.get('webpage_url')
1732 if webpage_url is not None:
6febd1c1 1733 self.report_warning('The info failed to download, trying with "%s"' % webpage_url)
d4943898
JMF
1734 return self.download([webpage_url])
1735 else:
1736 raise
1737 return self._download_retcode
1dcc4c0c 1738
cb202fd2
S
1739 @staticmethod
1740 def filter_requested_info(info_dict):
1741 return dict(
1742 (k, v) for k, v in info_dict.items()
1743 if k not in ['requested_formats', 'requested_subtitles'])
1744
8222d8de
JMF
1745 def post_process(self, filename, ie_info):
1746 """Run all the postprocessors on the given file."""
1747 info = dict(ie_info)
1748 info['filepath'] = filename
6350728b
JMF
1749 pps_chain = []
1750 if ie_info.get('__postprocessors') is not None:
1751 pps_chain.extend(ie_info['__postprocessors'])
1752 pps_chain.extend(self._pps)
1753 for pp in pps_chain:
71646e46 1754 files_to_delete = []
8222d8de 1755 try:
592e97e8 1756 files_to_delete, info = pp.run(info)
8222d8de 1757 except PostProcessingError as e:
bbcbf4d4 1758 self.report_error(e.msg)
592e97e8
JMF
1759 if files_to_delete and not self.params.get('keepvideo', False):
1760 for old_filename in files_to_delete:
f3ff1a36 1761 self.to_screen('Deleting original file %s (pass -k to keep)' % old_filename)
592e97e8
JMF
1762 try:
1763 os.remove(encodeFilename(old_filename))
1764 except (IOError, OSError):
1765 self.report_warning('Unable to remove downloaded original file')
c1c9a79c 1766
5db07df6
PH
1767 def _make_archive_id(self, info_dict):
1768 # Future-proof against any change in case
1769 # and backwards compatibility with prior versions
d31209a1 1770 extractor = info_dict.get('extractor_key')
7012b23c
PH
1771 if extractor is None:
1772 if 'id' in info_dict:
1773 extractor = info_dict.get('ie_key') # key in a playlist
1774 if extractor is None:
5db07df6 1775 return None # Incomplete video information
6febd1c1 1776 return extractor.lower() + ' ' + info_dict['id']
5db07df6
PH
1777
1778 def in_download_archive(self, info_dict):
1779 fn = self.params.get('download_archive')
1780 if fn is None:
1781 return False
1782
1783 vid_id = self._make_archive_id(info_dict)
1784 if vid_id is None:
7012b23c 1785 return False # Incomplete video information
5db07df6 1786
c1c9a79c
PH
1787 try:
1788 with locked_file(fn, 'r', encoding='utf-8') as archive_file:
1789 for line in archive_file:
1790 if line.strip() == vid_id:
1791 return True
1792 except IOError as ioe:
1793 if ioe.errno != errno.ENOENT:
1794 raise
1795 return False
1796
1797 def record_download_archive(self, info_dict):
1798 fn = self.params.get('download_archive')
1799 if fn is None:
1800 return
5db07df6
PH
1801 vid_id = self._make_archive_id(info_dict)
1802 assert vid_id
c1c9a79c 1803 with locked_file(fn, 'a', encoding='utf-8') as archive_file:
6febd1c1 1804 archive_file.write(vid_id + '\n')
dd82ffea 1805
8c51aa65 1806 @staticmethod
8abeeb94 1807 def format_resolution(format, default='unknown'):
fb04e403
PH
1808 if format.get('vcodec') == 'none':
1809 return 'audio only'
f49d89ee
PH
1810 if format.get('resolution') is not None:
1811 return format['resolution']
8c51aa65
JMF
1812 if format.get('height') is not None:
1813 if format.get('width') is not None:
6febd1c1 1814 res = '%sx%s' % (format['width'], format['height'])
8c51aa65 1815 else:
6febd1c1 1816 res = '%sp' % format['height']
f49d89ee 1817 elif format.get('width') is not None:
388ae76b 1818 res = '%dx?' % format['width']
8c51aa65 1819 else:
8abeeb94 1820 res = default
8c51aa65
JMF
1821 return res
1822
c57f7757
PH
1823 def _format_note(self, fdict):
1824 res = ''
1825 if fdict.get('ext') in ['f4f', 'f4m']:
1826 res += '(unsupported) '
32f90364
PH
1827 if fdict.get('language'):
1828 if res:
1829 res += ' '
1830 res += '[%s]' % fdict['language']
c57f7757
PH
1831 if fdict.get('format_note') is not None:
1832 res += fdict['format_note'] + ' '
1833 if fdict.get('tbr') is not None:
1834 res += '%4dk ' % fdict['tbr']
1835 if fdict.get('container') is not None:
1836 if res:
1837 res += ', '
1838 res += '%s container' % fdict['container']
1839 if (fdict.get('vcodec') is not None and
1840 fdict.get('vcodec') != 'none'):
1841 if res:
1842 res += ', '
1843 res += fdict['vcodec']
91c7271a 1844 if fdict.get('vbr') is not None:
c57f7757
PH
1845 res += '@'
1846 elif fdict.get('vbr') is not None and fdict.get('abr') is not None:
1847 res += 'video@'
1848 if fdict.get('vbr') is not None:
1849 res += '%4dk' % fdict['vbr']
fbb21cf5
PH
1850 if fdict.get('fps') is not None:
1851 res += ', %sfps' % fdict['fps']
c57f7757
PH
1852 if fdict.get('acodec') is not None:
1853 if res:
1854 res += ', '
1855 if fdict['acodec'] == 'none':
1856 res += 'video only'
1857 else:
1858 res += '%-5s' % fdict['acodec']
1859 elif fdict.get('abr') is not None:
1860 if res:
1861 res += ', '
1862 res += 'audio'
1863 if fdict.get('abr') is not None:
1864 res += '@%3dk' % fdict['abr']
1865 if fdict.get('asr') is not None:
1866 res += ' (%5dHz)' % fdict['asr']
1867 if fdict.get('filesize') is not None:
1868 if res:
1869 res += ', '
1870 res += format_bytes(fdict['filesize'])
9732d77e
PH
1871 elif fdict.get('filesize_approx') is not None:
1872 if res:
1873 res += ', '
1874 res += '~' + format_bytes(fdict['filesize_approx'])
c57f7757 1875 return res
91c7271a 1876
c57f7757 1877 def list_formats(self, info_dict):
94badb25 1878 formats = info_dict.get('formats', [info_dict])
b81a359e
PH
1879 table = [
1880 [f['format_id'], f['ext'], self.format_resolution(f), self._format_note(f)]
1881 for f in formats
e65566a9 1882 if f.get('preference') is None or f['preference'] >= -1000]
94badb25 1883 if len(formats) > 1:
b81a359e 1884 table[-1][-1] += (' ' if table[-1][-1] else '') + '(best)'
57dd9a8f 1885
b81a359e 1886 header_line = ['format code', 'extension', 'resolution', 'note']
cfb56d1a 1887 self.to_screen(
b81a359e
PH
1888 '[info] Available formats for %s:\n%s' %
1889 (info_dict['id'], render_table(header_line, table)))
cfb56d1a
PH
1890
1891 def list_thumbnails(self, info_dict):
1892 thumbnails = info_dict.get('thumbnails')
1893 if not thumbnails:
1894 tn_url = info_dict.get('thumbnail')
1895 if tn_url:
1896 thumbnails = [{'id': '0', 'url': tn_url}]
1897 else:
1898 self.to_screen(
1899 '[info] No thumbnails present for %s' % info_dict['id'])
1900 return
1901
1902 self.to_screen(
1903 '[info] Thumbnails for %s:' % info_dict['id'])
1904 self.to_screen(render_table(
1905 ['ID', 'width', 'height', 'URL'],
1906 [[t['id'], t.get('width', 'unknown'), t.get('height', 'unknown'), t['url']] for t in thumbnails]))
dca08720 1907
360e1ca5 1908 def list_subtitles(self, video_id, subtitles, name='subtitles'):
a504ced0 1909 if not subtitles:
360e1ca5 1910 self.to_screen('%s has no %s' % (video_id, name))
a504ced0 1911 return
a504ced0 1912 self.to_screen(
edab9dbf
JMF
1913 'Available %s for %s:' % (name, video_id))
1914 self.to_screen(render_table(
1915 ['Language', 'formats'],
1916 [[lang, ', '.join(f['ext'] for f in reversed(formats))]
1917 for lang, formats in subtitles.items()]))
a504ced0 1918
dca08720
PH
1919 def urlopen(self, req):
1920 """ Start an HTTP download """
82d8a8b6 1921 if isinstance(req, compat_basestring):
67dda517 1922 req = sanitized_Request(req)
19a41fc6 1923 return self._opener.open(req, timeout=self._socket_timeout)
dca08720
PH
1924
1925 def print_debug_header(self):
1926 if not self.params.get('verbose'):
1927 return
62fec3b2 1928
4192b51c
PH
1929 if type('') is not compat_str:
1930 # Python 2.6 on SLES11 SP1 (https://github.com/rg3/youtube-dl/issues/3326)
1931 self.report_warning(
1932 'Your Python is broken! Update to a newer and supported version')
1933
c6afed48
PH
1934 stdout_encoding = getattr(
1935 sys.stdout, 'encoding', 'missing (%s)' % type(sys.stdout).__name__)
b0472057 1936 encoding_str = (
734f90bb
PH
1937 '[debug] Encodings: locale %s, fs %s, out %s, pref %s\n' % (
1938 locale.getpreferredencoding(),
1939 sys.getfilesystemencoding(),
c6afed48 1940 stdout_encoding,
b0472057 1941 self.get_encoding()))
4192b51c 1942 write_string(encoding_str, encoding=None)
734f90bb
PH
1943
1944 self._write_string('[debug] youtube-dl version ' + __version__ + '\n')
dca08720
PH
1945 try:
1946 sp = subprocess.Popen(
1947 ['git', 'rev-parse', '--short', 'HEAD'],
1948 stdout=subprocess.PIPE, stderr=subprocess.PIPE,
1949 cwd=os.path.dirname(os.path.abspath(__file__)))
1950 out, err = sp.communicate()
1951 out = out.decode().strip()
1952 if re.match('[0-9a-f]+', out):
734f90bb 1953 self._write_string('[debug] Git HEAD: ' + out + '\n')
70a1165b 1954 except Exception:
dca08720
PH
1955 try:
1956 sys.exc_clear()
70a1165b 1957 except Exception:
dca08720 1958 pass
d28b5171
PH
1959 self._write_string('[debug] Python version %s - %s\n' % (
1960 platform.python_version(), platform_name()))
1961
73fac4e9 1962 exe_versions = FFmpegPostProcessor.get_versions(self)
4c83c967 1963 exe_versions['rtmpdump'] = rtmpdump_version()
d28b5171
PH
1964 exe_str = ', '.join(
1965 '%s %s' % (exe, v)
1966 for exe, v in sorted(exe_versions.items())
1967 if v
1968 )
1969 if not exe_str:
1970 exe_str = 'none'
1971 self._write_string('[debug] exe versions: %s\n' % exe_str)
dca08720
PH
1972
1973 proxy_map = {}
1974 for handler in self._opener.handlers:
1975 if hasattr(handler, 'proxies'):
1976 proxy_map.update(handler.proxies)
734f90bb 1977 self._write_string('[debug] Proxy map: ' + compat_str(proxy_map) + '\n')
dca08720 1978
58b1f00d
PH
1979 if self.params.get('call_home', False):
1980 ipaddr = self.urlopen('https://yt-dl.org/ip').read().decode('utf-8')
1981 self._write_string('[debug] Public IP address: %s\n' % ipaddr)
1982 latest_version = self.urlopen(
1983 'https://yt-dl.org/latest/version').read().decode('utf-8')
1984 if version_tuple(latest_version) > version_tuple(__version__):
1985 self.report_warning(
1986 'You are using an outdated version (newest version: %s)! '
1987 'See https://yt-dl.org/update if you need help updating.' %
1988 latest_version)
1989
e344693b 1990 def _setup_opener(self):
6ad14cab 1991 timeout_val = self.params.get('socket_timeout')
19a41fc6 1992 self._socket_timeout = 600 if timeout_val is None else float(timeout_val)
6ad14cab 1993
dca08720
PH
1994 opts_cookiefile = self.params.get('cookiefile')
1995 opts_proxy = self.params.get('proxy')
1996
1997 if opts_cookiefile is None:
1998 self.cookiejar = compat_cookiejar.CookieJar()
1999 else:
2000 self.cookiejar = compat_cookiejar.MozillaCookieJar(
2001 opts_cookiefile)
2002 if os.access(opts_cookiefile, os.R_OK):
2003 self.cookiejar.load()
2004
6a3f4c3f 2005 cookie_processor = YoutubeDLCookieProcessor(self.cookiejar)
dca08720
PH
2006 if opts_proxy is not None:
2007 if opts_proxy == '':
2008 proxies = {}
2009 else:
2010 proxies = {'http': opts_proxy, 'https': opts_proxy}
2011 else:
2012 proxies = compat_urllib_request.getproxies()
2013 # Set HTTPS proxy to HTTP one if given (https://github.com/rg3/youtube-dl/issues/805)
2014 if 'http' in proxies and 'https' not in proxies:
2015 proxies['https'] = proxies['http']
91410c9b 2016 proxy_handler = PerRequestProxyHandler(proxies)
a0ddb8a2
PH
2017
2018 debuglevel = 1 if self.params.get('debug_printtraffic') else 0
be4a824d
PH
2019 https_handler = make_HTTPS_handler(self.params, debuglevel=debuglevel)
2020 ydlh = YoutubeDLHandler(self.params, debuglevel=debuglevel)
8b172c2e 2021 data_handler = compat_urllib_request_DataHandler()
6240b0a2
JMF
2022
2023 # When passing our own FileHandler instance, build_opener won't add the
2024 # default FileHandler and allows us to disable the file protocol, which
2025 # can be used for malicious purposes (see
e37afbe0 2026 # https://github.com/rg3/youtube-dl/issues/8227)
6240b0a2
JMF
2027 file_handler = compat_urllib_request.FileHandler()
2028
2029 def file_open(*args, **kwargs):
30e2f2d7 2030 raise compat_urllib_error.URLError('file:// scheme is explicitly disabled in youtube-dl for security reasons')
6240b0a2
JMF
2031 file_handler.file_open = file_open
2032
2033 opener = compat_urllib_request.build_opener(
2034 proxy_handler, https_handler, cookie_processor, ydlh, data_handler, file_handler)
2461f79d 2035
dca08720
PH
2036 # Delete the default user-agent header, which would otherwise apply in
2037 # cases where our custom HTTP handler doesn't come into play
2038 # (See https://github.com/rg3/youtube-dl/issues/1309 for details)
2039 opener.addheaders = []
2040 self._opener = opener
62fec3b2
PH
2041
2042 def encode(self, s):
2043 if isinstance(s, bytes):
2044 return s # Already encoded
2045
2046 try:
2047 return s.encode(self.get_encoding())
2048 except UnicodeEncodeError as err:
2049 err.reason = err.reason + '. Check your system encoding configuration or use the --encoding option.'
2050 raise
2051
2052 def get_encoding(self):
2053 encoding = self.params.get('encoding')
2054 if encoding is None:
2055 encoding = preferredencoding()
2056 return encoding
ec82d85a
PH
2057
2058 def _write_thumbnails(self, info_dict, filename):
2059 if self.params.get('writethumbnail', False):
2060 thumbnails = info_dict.get('thumbnails')
2061 if thumbnails:
2062 thumbnails = [thumbnails[-1]]
2063 elif self.params.get('write_all_thumbnails', False):
2064 thumbnails = info_dict.get('thumbnails')
2065 else:
2066 return
2067
2068 if not thumbnails:
2069 # No thumbnails present, so return immediately
2070 return
2071
2072 for t in thumbnails:
2073 thumb_ext = determine_ext(t['url'], 'jpg')
2074 suffix = '_%s' % t['id'] if len(thumbnails) > 1 else ''
2075 thumb_display_id = '%s ' % t['id'] if len(thumbnails) > 1 else ''
82245a6d 2076 t['filename'] = thumb_filename = os.path.splitext(filename)[0] + suffix + '.' + thumb_ext
ec82d85a
PH
2077
2078 if self.params.get('nooverwrites', False) and os.path.exists(encodeFilename(thumb_filename)):
2079 self.to_screen('[%s] %s: Thumbnail %sis already present' %
2080 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2081 else:
2082 self.to_screen('[%s] %s: Downloading thumbnail %s...' %
2083 (info_dict['extractor'], info_dict['id'], thumb_display_id))
2084 try:
2085 uf = self.urlopen(t['url'])
d3d89c32 2086 with open(encodeFilename(thumb_filename), 'wb') as thumbf:
ec82d85a
PH
2087 shutil.copyfileobj(uf, thumbf)
2088 self.to_screen('[%s] %s: Writing thumbnail %sto: %s' %
2089 (info_dict['extractor'], info_dict['id'], thumb_display_id, thumb_filename))
2090 except (compat_urllib_error.URLError, compat_http_client.HTTPException, socket.error) as err:
2091 self.report_warning('Unable to download thumbnail "%s": %s' %
9b9c5355 2092 (t['url'], error_to_compat_str(err)))